diff --git a/Cargo.lock b/Cargo.lock index 98523e349fea..91a184a9551a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -156,9 +156,9 @@ dependencies = [ [[package]] name = "arrayvec" -version = "0.7.2" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" dependencies = [ "serde", ] @@ -572,6 +572,24 @@ dependencies = [ "xpcom", ] +[[package]] +name = "breakpad-symbols" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e1ad3f5e2e5c8a42fccedd6792cc05968b39b69c3fe7b5544072ac052f3fe85" +dependencies = [ + "async-trait", + "cachemap2", + "circular", + "debugid", + "futures-util", + "minidump-common", + "nom", + "range-map", + "thiserror", + "tracing", +] + [[package]] name = "build-parallel" version = "0.1.2" @@ -631,6 +649,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1db59621ec70f09c5e9b597b220c7a2b43611f4710dc03ceb8748637775692c" +[[package]] +name = "cachemap2" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ccbd3153aa153b2f5eff557537ffce81e4dd6c50ae0eddc41dc8d0c388436f" + [[package]] name = "calendrical_calculations" version = "0.1.1" @@ -777,6 +801,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb7bdea464ae038f09197b82430b921c53619fc8d2bcaf7b151013b3ca008017" +[[package]] +name = "circular" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fc239e0f6cb375d2402d48afb92f76f5404fd1df208a41930ec81eda078bea" + [[package]] name = "clang-sys" version = "1.7.0" @@ -2028,6 +2058,20 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "framehop" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd28d2036d4fd99e3629487baca659e5af1c5d554e320168613be79028610fc" +dependencies = [ + "arrayvec", + "cfg-if", + "fallible-iterator", + "gimli", + "macho-unwind-info", + "pe-unwind-info", +] + [[package]] name = "freetype" version = "0.7.0" @@ -2083,6 +2127,7 @@ dependencies = [ "futures-core", "futures-task", "futures-util", + "num_cpus", ] [[package]] @@ -2252,6 +2297,16 @@ dependencies = [ "wasi 0.11.0+wasi-snapshot-preview1", ] +[[package]] +name = "gimli" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e1d97fbe9722ba9bbd0c97051c2956e726562b61f86a25a4360398a40edfc9" +dependencies = [ + "fallible-iterator", + "stable_deref_trait", +] + [[package]] name = "gkrust" version = "0.1.0" @@ -3523,6 +3578,17 @@ dependencies = [ "libc", ] +[[package]] +name = "macho-unwind-info" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b6086acc74bc23f56b60e88bb082d505e23849d68d6c0f12bb6a7ad5c60e03e" +dependencies = [ + "thiserror", + "zerocopy", + "zerocopy-derive", +] + [[package]] name = "malloc_buf" version = "0.0.6" @@ -3706,10 +3772,50 @@ dependencies = [ ] [[package]] -name = "minidump-common" -version = "0.21.1" +name = "minidump" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bb6eaf88cc770fa58e6ae721cf2e40c2ca6a4c942ae8c7aa324d680bd3c6717" +checksum = "aefb80650628de087057ed167e3e1ef5bed65dc4b1bd28d47cd707c3848adce2" +dependencies = [ + "debugid", + "encoding_rs", + "memmap2", + "minidump-common", + "num-traits", + "procfs-core", + "range-map", + "scroll", + "thiserror", + "time 0.3.36", + "tracing", + "uuid", +] + +[[package]] +name = "minidump-analyzer" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "breakpad-symbols", + "clap", + "env_logger", + "futures-executor", + "futures-util", + "lazy_static", + "log", + "minidump", + "minidump-unwind", + "mozilla-central-workspace-hack", + "serde_json", + "windows-sys", +] + +[[package]] +name = "minidump-common" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a2b640f80e5514f49509ff1f97fb24693f95ef5be5ed810d70df4283a68acc" dependencies = [ "bitflags 2.6.0", "debugid", @@ -3721,10 +3827,28 @@ dependencies = [ ] [[package]] -name = "minidump-writer" -version = "0.8.9" +name = "minidump-unwind" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abcd9c8a1e6e1e9d56ce3627851f39a17ea83e17c96bc510f29d7e43d78a7d" +checksum = "afb5af4cbb631c54fe8c0c058799e9ac95b31c6e282f1afaaaaad10c2c441fcb" +dependencies = [ + "async-trait", + "breakpad-symbols", + "cachemap2", + "framehop", + "memmap2", + "minidump", + "minidump-common", + "object", + "scroll", + "tracing", +] + +[[package]] +name = "minidump-writer" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ef25fdfb9560aa90ad9b64e6901a86e63e7b3f125ca3083dd3d3936c12acc6" dependencies = [ "bitflags 2.6.0", "byteorder", @@ -3873,6 +3997,7 @@ dependencies = [ "futures", "futures-channel", "futures-core", + "futures-executor", "futures-sink", "futures-util", "getrandom", @@ -4323,9 +4448,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.0" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ac5bbd07aea88c60a577a1ce218075ffd59208b2d7ca97adf9bfc5aeb21ebe" +checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce" dependencies = [ "memchr", ] @@ -4468,6 +4593,19 @@ dependencies = [ "serde_json", ] +[[package]] +name = "pe-unwind-info" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ec3b43050c38ffb9de87e17d874e9956e3a9131b343c9b7b7002597727c3891" +dependencies = [ + "arrayvec", + "bitflags 2.6.0", + "thiserror", + "zerocopy", + "zerocopy-derive", +] + [[package]] name = "peek-poke" version = "0.3.0" @@ -7105,6 +7243,7 @@ version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" dependencies = [ + "byteorder", "zerocopy-derive", ] diff --git a/Cargo.toml b/Cargo.toml index bf413780e6c5..c936b49e4f69 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ members = [ "testing/geckodriver", "toolkit/components/uniffi-bindgen-gecko-js", "toolkit/crashreporter/client/app", + "toolkit/crashreporter/minidump-analyzer", "toolkit/crashreporter/mozwer-rust", "toolkit/crashreporter/rust_minidump_writer_linux", "toolkit/library/gtest/rust", diff --git a/build/workspace-hack/Cargo.toml b/build/workspace-hack/Cargo.toml index f33cb7dcceb4..f0284735ff97 100644 --- a/build/workspace-hack/Cargo.toml +++ b/build/workspace-hack/Cargo.toml @@ -20,7 +20,7 @@ arrayvec = { version = "0.7", features = ["serde"], optional = true } bitflags = { version = "2", features = ["serde", "std"], optional = true } bytes = { version = "1", optional = true } chrono = { version = "0.4", features = ["serde"], optional = true } -clap = { version = "4", default-features = false, features = ["cargo", "derive", "error-context", "help", "string", "usage", "wrap_help"], optional = true } +clap = { version = "4", default-features = false, features = ["cargo", "derive", "error-context", "help", "std", "string", "usage", "wrap_help"], optional = true } crossbeam-utils = { version = "0.8", optional = true } dist-bin = { path = "../rust/dist-bin", optional = true } env_logger = { version = "0.10", default-features = false, features = ["color"], optional = true } @@ -31,6 +31,7 @@ fnv = { version = "1", optional = true } futures = { version = "0.3", features = ["futures-executor"], optional = true } futures-channel = { version = "0.3", features = ["sink"], optional = true } futures-core = { version = "0.3", optional = true } +futures-executor = { version = "0.3", features = ["thread-pool"], optional = true } futures-sink = { version = "0.3", optional = true } futures-util = { version = "0.3", features = ["channel", "io", "sink"], optional = true } getrandom = { version = "0.2", default-features = false, features = ["std"], optional = true } @@ -209,6 +210,9 @@ features = [ "Win32_Networking", "Win32_Networking_WinSock", "Win32_Security", + "Win32_Security_Cryptography", + "Win32_Security_Cryptography_Catalog", + "Win32_Security_WinTrust", "Win32_Storage_FileSystem", "Win32_System_Com", "Win32_System_Diagnostics_Debug", @@ -242,6 +246,7 @@ gkrust-gtest = ["gkrust"] http3server = ["dep:arrayvec", "dep:bindgen", "dep:bitflags", "dep:bytes", "dep:chrono", "dep:clap", "dep:dist-bin", "dep:env_logger", "dep:fnv", "dep:futures", "dep:futures-channel", "dep:futures-core", "dep:futures-sink", "dep:futures-util", "dep:getrandom", "dep:hashbrown", "dep:hex", "dep:hyper", "dep:icu_locid", "dep:icu_properties", "dep:indexmap", "dep:itertools", "dep:log", "dep:memchr", "dep:mio", "dep:nom", "dep:num-integer", "dep:num-traits", "dep:once_cell", "dep:regex", "dep:semver", "dep:serde_json", "dep:smallvec", "dep:stable_deref_trait", "dep:strsim", "dep:time", "dep:time-macros", "dep:tinystr", "dep:tokio", "dep:tokio-util", "dep:toml", "dep:tracing", "dep:unicode-bidi", "dep:url", "dep:windows-sys", "dep:yoke", "dep:zerofrom", "dep:zerovec"] ipcclientcerts-static = ["dep:bindgen", "dep:bitflags", "dep:itertools", "dep:memchr", "dep:nom", "dep:regex"] jsrust = ["dep:arrayvec", "dep:cc", "dep:env_logger", "dep:getrandom", "dep:hashbrown", "dep:icu_locid", "dep:icu_properties", "dep:indexmap", "dep:log", "dep:memchr", "dep:num-traits", "dep:once_cell", "dep:semver", "dep:smallvec", "dep:stable_deref_trait", "dep:tinystr", "dep:unicode-bidi", "dep:url", "dep:yoke", "dep:zerofrom", "dep:zerovec"] +minidump-analyzer = ["dep:clap", "dep:env_logger", "dep:futures-executor", "dep:futures-util", "dep:log", "dep:serde_json", "dep:windows-sys"] mozwer_s = ["dep:getrandom", "dep:hashbrown", "dep:indexmap", "dep:log", "dep:once_cell", "dep:scroll", "dep:serde_json", "dep:uuid", "dep:windows-sys"] nmhproxy = ["dep:bitflags", "dep:hashbrown", "dep:icu_locid", "dep:icu_properties", "dep:indexmap", "dep:once_cell", "dep:serde_json", "dep:smallvec", "dep:stable_deref_trait", "dep:tinystr", "dep:unicode-bidi", "dep:url", "dep:windows-sys", "dep:yoke", "dep:zerofrom", "dep:zerovec"] osclientcerts-static = ["dep:bindgen", "dep:bitflags", "dep:core-foundation-sys", "dep:env_logger", "dep:itertools", "dep:log", "dep:memchr", "dep:nom", "dep:regex"] diff --git a/supply-chain/audits.toml b/supply-chain/audits.toml index a95b9f953d7c..8ffefd7c757c 100644 --- a/supply-chain/audits.toml +++ b/supply-chain/audits.toml @@ -28,6 +28,22 @@ start = "2022-08-04" end = "2024-03-09" notes = "Though the code is safe to run and deploy, the code for processing HTTP/1.1 messages (the `read-http` feature, specifically) is not suited for deployment in real applications, either clients or servers. Some features necessary for live deployment are not implemented, such as the proper handling of some types of response (e.g., a response to a HEAD request). Software that processes HTTP/1.1 messages requires a large number of compatibility tweaks if it is to be deployed interoperably. This feature only exists to support basic validation tools and is unlikely to be widely compatible." +[[wildcard-audits.breakpad-symbols]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +user-id = 72814 # Gabriele Svelto (gabrielesvelto) +start = "2022-11-30" +end = "2025-02-28" +notes = "This crate is written and maintained by mozilla employees." + +[[wildcard-audits.cachemap2]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +user-id = 106639 # Alex Franchuk (afranchuk) +start = "2023-03-21" +end = "2025-02-28" +notes = "This crate is written and solely maintained by a mozilla employee." + [[wildcard-audits.cexpr]] who = "Emilio Cobos Álvarez " criteria = "safe-to-deploy" @@ -185,6 +201,14 @@ start = "2019-03-14" end = "2025-04-25" notes = "I wrote most of the commits in the euclid reprository and review every change that is not produced by me." +[[wildcard-audits.framehop]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +user-id = 20227 # Markus Stange (mstange) +start = "2022-03-12" +end = "2025-02-28" +notes = "This crate is written and solely maintained by a mozilla employee." + [[wildcard-audits.freetype]] who = "Bobby Holley " criteria = "safe-to-deploy" @@ -267,6 +291,14 @@ end = "2023-05-04" renew = false notes = "I've reviewed every source contribution that was neither authored nor reviewed by Mozilla." +[[wildcard-audits.macho-unwind-info]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +user-id = 20227 # Markus Stange (mstange) +start = "2022-01-31" +end = "2025-02-28" +notes = "This crate is written and solely maintained by a mozilla employee." + [[wildcard-audits.marionette]] who = "Henrik Skupin " criteria = "safe-to-run" @@ -275,6 +307,14 @@ start = "2020-11-03" end = "2025-01-31" notes = "Maintained by the DevTools team at Mozilla and has no unsafe code." +[[wildcard-audits.minidump]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +user-id = 72814 # Gabriele Svelto (gabrielesvelto) +start = "2022-11-30" +end = "2025-02-28" +notes = "This crate is written and maintained by mozilla employees." + [[wildcard-audits.minidump-common]] who = "Alex Franchuk " criteria = "safe-to-deploy" @@ -283,6 +323,14 @@ start = "2022-11-30" end = "2025-02-28" notes = "This crate is written and maintained by mozilla employees." +[[wildcard-audits.minidump-unwind]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +user-id = 72814 # Gabriele Svelto (gabrielesvelto) +start = "2023-05-17" +end = "2025-02-28" +notes = "This crate is written and maintained by mozilla employees." + [[wildcard-audits.mozdevice]] who = "Henrik Skupin " criteria = "safe-to-run" @@ -331,6 +379,14 @@ start = "2022-08-04" end = "2024-03-09" notes = "This code contains two cryptographic back ends. No unsafe code is contained if the Rust `hpke` crate is used (the `rust-hpke` feature). Using NSS (the `nss` feature) involves extensive use of bindings to the native code provided by NSS. This interface uses wrappers that attempt to add safety to a fundamentally very dangerous library, but those wrappers have only been validated for use following the needs of this crate." +[[wildcard-audits.pe-unwind-info]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +user-id = 106639 # Alex Franchuk (afranchuk) +start = "2023-07-25" +end = "2025-02-28" +notes = "This crate is written and solely maintained by a mozilla employee." + [[wildcard-audits.qcms]] who = "Jeff Muizelaar " criteria = "safe-to-deploy" @@ -685,6 +741,12 @@ who = "Henri Sivonen " criteria = "safe-to-deploy" version = "0.3.0" +[[audits.arrayvec]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +delta = "0.7.2 -> 0.7.4" +notes = "Manually verified new unsafe pointer arithmetic." + [[audits.ash]] who = "Jim Blandy " criteria = "safe-to-deploy" @@ -1001,6 +1063,12 @@ who = "Bobby Holley " criteria = "safe-to-deploy" delta = "0.1.2 -> 0.1.2@git:ed8a4c6f900a90d4dbc1d64b856e61490a1c3570" +[[audits.circular]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +version = "0.3.0" +notes = "No dependencies. Unsafe code is necessary to provide functionality and was manually verified to be correct." + [[audits.clang-sys]] who = "Mike Hommey " criteria = "safe-to-deploy" @@ -2202,6 +2270,15 @@ who = "Simon Friedberger " criteria = "safe-to-deploy" delta = "0.2.10 -> 0.2.11" +[[audits.gimli]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +version = "0.30.0" +notes = """ +Unsafe code blocks are sound. Minimal dependencies used. No use of +side-effectful std functions. +""" + [[audits.gleam]] who = "Jamie Nicol " criteria = "safe-to-deploy" @@ -3000,6 +3077,12 @@ criteria = "safe-to-deploy" delta = "0.8.3 -> 0.8.9" notes = "Mainly dependency updates and a few small features (in support of mozilla bugs)." +[[audits.minidump-writer]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +delta = "0.8.9 -> 0.9.0" +notes = "Minor changes and refactors (some of which were done by Mozilla employees)." + [[audits.miniz_oxide]] who = "Mike Hommey " criteria = "safe-to-deploy" @@ -3249,6 +3332,12 @@ who = "Mike Hommey " criteria = "safe-to-deploy" delta = "0.30.0 -> 0.30.3" +[[audits.object]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +delta = "0.33.0 -> 0.36.1" +notes = "Hardly any new unsafe code, no new dependencies nor side-effectful std functions. Plenty of new tests." + [[audits.once_cell]] who = "Mike Hommey " criteria = "safe-to-deploy" @@ -4440,6 +4529,16 @@ who = "Mike Hommey " criteria = "safe-to-run" delta = "0.3.1 -> 0.3.2" +[[audits.tracing]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +version = "0.1.37" +notes = """ +There's only one unsafe impl, and its purpose is to ensure correct behavior by +creating a non-Send marker type (it has nothing to do with soundness). All +dependencies make sense, and no side-effectful std functions are used. +""" + [[audits.tracing]] who = "Mike Hommey " criteria = "safe-to-run" @@ -4450,6 +4549,12 @@ who = "Mike Hommey " criteria = "safe-to-run" delta = "0.1.36 -> 0.1.37" +[[audits.tracing-attributes]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +version = "0.1.24" +notes = "No unsafe code, macros extensively tested and produce reasonable code." + [[audits.tracing-attributes]] who = "Mike Hommey " criteria = "safe-to-run" @@ -4465,6 +4570,16 @@ who = "Mike Hommey " criteria = "safe-to-run" delta = "0.1.23 -> 0.1.24" +[[audits.tracing-core]] +who = "Alex Franchuk " +criteria = "safe-to-deploy" +version = "0.1.30" +notes = """ +Most unsafe code is in implementing non-std sync primitives. Unsafe impls are +logically correct and justified in comments, and unsafe code is sound and +justified in comments. +""" + [[audits.tracing-core]] who = "Mike Hommey " criteria = "safe-to-run" diff --git a/supply-chain/imports.lock b/supply-chain/imports.lock index 04394d3da136..27686c91f1ce 100644 --- a/supply-chain/imports.lock +++ b/supply-chain/imports.lock @@ -57,6 +57,27 @@ user-id = 128763 user-login = "martinthomson" user-name = "Martin Thomson" +[[publisher.breakpad-symbols]] +version = "0.21.1" +when = "2024-03-01" +user-id = 72814 +user-login = "gabrielesvelto" +user-name = "Gabriele Svelto" + +[[publisher.breakpad-symbols]] +version = "0.21.2" +when = "2024-06-03" +user-id = 72814 +user-login = "gabrielesvelto" +user-name = "Gabriele Svelto" + +[[publisher.breakpad-symbols]] +version = "0.22.0" +when = "2024-07-03" +user-id = 72814 +user-login = "gabrielesvelto" +user-name = "Gabriele Svelto" + [[publisher.bumpalo]] version = "3.15.4" when = "2024-03-07" @@ -78,6 +99,13 @@ user-id = 6741 user-login = "Darksonn" user-name = "Alice Ryhl" +[[publisher.cachemap2]] +version = "0.3.0" +when = "2024-01-19" +user-id = 106639 +user-login = "afranchuk" +user-name = "Alex Franchuk" + [[publisher.cc]] version = "1.0.89" when = "2024-03-04" @@ -211,6 +239,27 @@ user-id = 4333 user-login = "joshtriplett" user-name = "Josh Triplett" +[[publisher.framehop]] +version = "0.9.0" +when = "2024-02-07" +user-id = 20227 +user-login = "mstange" +user-name = "Markus Stange" + +[[publisher.framehop]] +version = "0.11.2" +when = "2024-05-06" +user-id = 20227 +user-login = "mstange" +user-name = "Markus Stange" + +[[publisher.framehop]] +version = "0.12.1" +when = "2024-06-04" +user-id = 20227 +user-login = "mstange" +user-name = "Markus Stange" + [[publisher.freetype]] version = "0.7.0" when = "2020-07-14" @@ -330,6 +379,13 @@ user-id = 2915 user-login = "Amanieu" user-name = "Amanieu d'Antras" +[[publisher.macho-unwind-info]] +version = "0.4.0" +when = "2024-01-17" +user-id = 20227 +user-login = "mstange" +user-name = "Markus Stange" + [[publisher.memchr]] version = "2.7.4" when = "2024-06-14" @@ -344,6 +400,27 @@ user-id = 359 user-login = "seanmonstar" user-name = "Sean McArthur" +[[publisher.minidump]] +version = "0.21.1" +when = "2024-03-01" +user-id = 72814 +user-login = "gabrielesvelto" +user-name = "Gabriele Svelto" + +[[publisher.minidump]] +version = "0.21.2" +when = "2024-06-03" +user-id = 72814 +user-login = "gabrielesvelto" +user-name = "Gabriele Svelto" + +[[publisher.minidump]] +version = "0.22.0" +when = "2024-07-03" +user-id = 72814 +user-login = "gabrielesvelto" +user-name = "Gabriele Svelto" + [[publisher.minidump-common]] version = "0.21.1" when = "2024-03-01" @@ -351,6 +428,48 @@ user-id = 72814 user-login = "gabrielesvelto" user-name = "Gabriele Svelto" +[[publisher.minidump-common]] +version = "0.21.2" +when = "2024-06-03" +user-id = 72814 +user-login = "gabrielesvelto" +user-name = "Gabriele Svelto" + +[[publisher.minidump-common]] +version = "0.22.0" +when = "2024-07-03" +user-id = 72814 +user-login = "gabrielesvelto" +user-name = "Gabriele Svelto" + +[[publisher.minidump-unwind]] +version = "0.21.1" +when = "2024-03-01" +user-id = 72814 +user-login = "gabrielesvelto" +user-name = "Gabriele Svelto" + +[[publisher.minidump-unwind]] +version = "0.21.2" +when = "2024-06-03" +user-id = 72814 +user-login = "gabrielesvelto" +user-name = "Gabriele Svelto" + +[[publisher.minidump-unwind]] +version = "0.22.0" +when = "2024-07-03" +user-id = 72814 +user-login = "gabrielesvelto" +user-name = "Gabriele Svelto" + +[[publisher.mio]] +version = "0.6.21" +when = "2019-11-27" +user-id = 10 +user-login = "carllerche" +user-name = "Carl Lerche" + [[publisher.nss-gk-api]] version = "0.3.0" when = "2023-06-14" @@ -414,6 +533,13 @@ user-id = 3618 user-login = "dtolnay" user-name = "David Tolnay" +[[publisher.pe-unwind-info]] +version = "0.2.3" +when = "2024-03-04" +user-id = 106639 +user-login = "afranchuk" +user-name = "Alex Franchuk" + [[publisher.presser]] version = "0.3.1" when = "2022-10-16" @@ -426,6 +552,13 @@ when = "2024-03-19" user-id = 213776 user-login = "divviup-github-automation" +[[publisher.proc-macro2]] +version = "1.0.74" +when = "2024-01-02" +user-id = 3618 +user-login = "dtolnay" +user-name = "David Tolnay" + [[publisher.proc-macro2]] version = "1.0.84" when = "2024-05-25" @@ -559,6 +692,13 @@ user-id = 2017 user-login = "mbrubeck" user-name = "Matt Brubeck" +[[publisher.syn]] +version = "2.0.46" +when = "2024-01-02" +user-id = 3618 +user-login = "dtolnay" +user-name = "David Tolnay" + [[publisher.syn]] version = "2.0.68" when = "2024-06-23" @@ -814,7 +954,7 @@ who = "Nick Fitzgerald " criteria = "safe-to-deploy" user-id = 696 # Nick Fitzgerald (fitzgen) start = "2020-01-14" -end = "2024-04-21" +end = "2025-07-30" notes = "I am an author of this crate." [[audits.bytecode-alliance.wildcard-audits.bumpalo]] @@ -822,14 +962,14 @@ who = "Nick Fitzgerald " criteria = "safe-to-deploy" user-id = 696 # Nick Fitzgerald (fitzgen) start = "2019-03-16" -end = "2024-03-10" +end = "2025-07-30" [[audits.bytecode-alliance.wildcard-audits.derive_arbitrary]] who = "Nick Fitzgerald " criteria = "safe-to-deploy" user-id = 696 # Nick Fitzgerald (fitzgen) start = "2020-01-14" -end = "2024-04-27" +end = "2025-07-30" notes = "I am an author of this crate" [[audits.bytecode-alliance.wildcard-audits.wasm-encoder]] @@ -1083,6 +1223,15 @@ criteria = "safe-to-deploy" delta = "0.31.1 -> 0.32.0" notes = "Various new features and refactorings as one would expect from an object parsing crate, all looks good." +[[audits.bytecode-alliance.audits.object]] +who = "Alex Crichton " +criteria = "safe-to-deploy" +delta = "0.32.0 -> 0.33.0" +notes = """ +No `unsafe` code in this update. Lots of changes but all +object-file-format-related, everything looks good. +""" + [[audits.bytecode-alliance.audits.percent-encoding]] who = "Alex Crichton " criteria = "safe-to-deploy" @@ -1424,24 +1573,6 @@ criteria = "safe-to-run" version = "0.3.2" aggregated-from = "https://chromium.googlesource.com/chromiumos/third_party/rust_crates/+/refs/heads/main/cargo-vet/audits.toml?format=TEXT" -[[audits.google.audits.tracing]] -who = "ChromeOS" -criteria = "safe-to-run" -version = "0.1.35" -aggregated-from = "https://chromium.googlesource.com/chromiumos/third_party/rust_crates/+/refs/heads/main/cargo-vet/audits.toml?format=TEXT" - -[[audits.google.audits.tracing-attributes]] -who = "ChromeOS" -criteria = "safe-to-run" -version = "0.1.22" -aggregated-from = "https://chromium.googlesource.com/chromiumos/third_party/rust_crates/+/refs/heads/main/cargo-vet/audits.toml?format=TEXT" - -[[audits.google.audits.tracing-core]] -who = "ChromeOS" -criteria = "safe-to-run" -version = "0.1.29" -aggregated-from = "https://chromium.googlesource.com/chromiumos/third_party/rust_crates/+/refs/heads/main/cargo-vet/audits.toml?format=TEXT" - [[audits.google.audits.try-lock]] who = "ChromeOS" criteria = "safe-to-run" diff --git a/third_party/rust/arrayvec/.cargo-checksum.json b/third_party/rust/arrayvec/.cargo-checksum.json index d0de9348ca18..f7b6795ac4db 100644 --- a/third_party/rust/arrayvec/.cargo-checksum.json +++ b/third_party/rust/arrayvec/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"CHANGELOG.md":"f684ba712e1546b4cc7de9637f484598cd3fa49b7e7b32c2d98562a8f78ce98c","Cargo.toml":"94a588809d4be252f0146b9e193abc1b22d8afcce0265af19f12905a3db37998","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"0245ee104228a100ce5fceecf43e25faae450494d9173f43fd94c27d69fdac13","README.md":"2264c34c62ea4c617d72047b00749b4786dfb9dff2fac24e0320170ee0cd19c8","benches/arraystring.rs":"fad1cecef71c290375befc77c75a868988b8d74135e8f8732bc5b58c85a8ab46","benches/extend.rs":"c38ecedbc88217a7e9fe1a73f916b168a96e48010a7ccd3dba5c3f8dea030d5d","ci/miri.sh":"6bad1d135e1bdd67a6b91c870a7cf5ee09a85f9515633592a6abfbba95fdaf52","src/array_string.rs":"4f0c2dab882e6df7d10a0b043220587626e64ff94dd53a80949667ed861490de","src/arrayvec.rs":"61fba79217f564e54761c25651c06ec3f6d23b9c6af4bfd621992ef2bb95a74b","src/arrayvec_impl.rs":"a5e3391dc350041651f0ba3816c863ff7f552ff553e4a88f801481dfad7e7613","src/char.rs":"1de50e1d6045af2b3496426492315ba774986f9bc8301ffa391de861a08cc9cb","src/errors.rs":"7fa2ff2350f811d52a210a7346c526d6715cacefd38a46e2d3b57ab7dc62b1ab","src/lib.rs":"29a4123616c0912ccae5d931d45f0ccc3746647da1ba077c34538824910dd0ca","src/utils.rs":"d1cdc508dfca385e63f1f57bc8b53ed4a7f515e4ac1ebaa97b1d543fc8369432","tests/serde.rs":"117eb2961b5954d13c577edf60bbb07cb7481685cc9d6c49760a981d71465849","tests/tests.rs":"f8a18ff5deadb167832964ca0fff4f280129dd4a1de024e9cc76ffb7efe1c12c"},"package":"8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"} \ No newline at end of file +{"files":{"CHANGELOG.md":"fc3afac61cfb6ed0370d35d8b7a01a9a4c43f07080ca6d7f4ed0ec5ffa1938a5","Cargo.toml":"bc1eb1e01f42009ada105cdcf8210471c9f988274a8e3874a362d169e90673dd","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"4da95ec4ecb65b738d470b7d762894ad9c97da93e6cbfb18b570fc2c96f4b871","README.md":"2264c34c62ea4c617d72047b00749b4786dfb9dff2fac24e0320170ee0cd19c8","benches/arraystring.rs":"fad1cecef71c290375befc77c75a868988b8d74135e8f8732bc5b58c85a8ab46","benches/extend.rs":"c38ecedbc88217a7e9fe1a73f916b168a96e48010a7ccd3dba5c3f8dea030d5d","src/array_string.rs":"39d9c33cdd3ec76e3dfd81996872c58d720877ef93fa7e8b23985d266230a2d6","src/arrayvec.rs":"50e2097cdfa4f1477c925daaf53cc7d59b31c66d11c0766cc1cbf14dad9dbc98","src/arrayvec_impl.rs":"e2642ae566c83ef37ad9aec6af7e3c50af310ba304553f38b2a787666b507580","src/char.rs":"1de50e1d6045af2b3496426492315ba774986f9bc8301ffa391de861a08cc9cb","src/errors.rs":"7fa2ff2350f811d52a210a7346c526d6715cacefd38a46e2d3b57ab7dc62b1ab","src/lib.rs":"8919a7e0c20890b1f094996147a1486d20578579aef03692315cd509e1745222","src/utils.rs":"d1cdc508dfca385e63f1f57bc8b53ed4a7f515e4ac1ebaa97b1d543fc8369432","tests/serde.rs":"117eb2961b5954d13c577edf60bbb07cb7481685cc9d6c49760a981d71465849","tests/tests.rs":"f8a18ff5deadb167832964ca0fff4f280129dd4a1de024e9cc76ffb7efe1c12c"},"package":"96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"} \ No newline at end of file diff --git a/third_party/rust/arrayvec/CHANGELOG.md b/third_party/rust/arrayvec/CHANGELOG.md index 903ef582f7ef..40a646a09595 100644 --- a/third_party/rust/arrayvec/CHANGELOG.md +++ b/third_party/rust/arrayvec/CHANGELOG.md @@ -1,6 +1,18 @@ Recent Changes (arrayvec) ========================= +## 0.7.4 + +- Add feature zeroize to support the `Zeroize` trait by @elichai + +## 0.7.3 + +- Use track_caller on multiple methods like push and similar, for capacity + overflows by @kornelski +- impl BorrowMut for ArrayString by @msrd0 +- Fix stacked borrows violations by @clubby789 +- Update Miri CI by @RalfJung + ## 0.7.2 - Add `.as_mut_str()` to `ArrayString` by @clarfonthey diff --git a/third_party/rust/arrayvec/Cargo.toml b/third_party/rust/arrayvec/Cargo.toml index 54b00740c162..308c6b2dfa43 100644 --- a/third_party/rust/arrayvec/Cargo.toml +++ b/third_party/rust/arrayvec/Cargo.toml @@ -12,25 +12,40 @@ [package] edition = "2018" name = "arrayvec" -version = "0.7.2" +version = "0.7.4" authors = ["bluss"] description = "A vector with fixed capacity, backed by an array (it can be stored on the stack too). Implements fixed capacity ArrayVec and ArrayString." documentation = "https://docs.rs/arrayvec/" -keywords = ["stack", "vector", "array", "data-structure", "no_std"] -categories = ["data-structures", "no-std"] +readme = "README.md" +keywords = [ + "stack", + "vector", + "array", + "data-structure", + "no_std", +] +categories = [ + "data-structures", + "no-std", +] license = "MIT OR Apache-2.0" repository = "https://github.com/bluss/arrayvec" + [package.metadata.docs.rs] -features = ["serde"] +features = [ + "serde", + "zeroize", +] [package.metadata.release] no-dev-version = true tag-name = "{{version}}" + [profile.bench] -debug = true +debug = 2 [profile.release] -debug = true +debug = 2 [[bench]] name = "extend" @@ -39,10 +54,17 @@ harness = false [[bench]] name = "arraystring" harness = false + [dependencies.serde] version = "1.0" optional = true default-features = false + +[dependencies.zeroize] +version = "1.4" +optional = true +default-features = false + [dev-dependencies.bencher] version = "0.1.4" diff --git a/third_party/rust/arrayvec/LICENSE-MIT b/third_party/rust/arrayvec/LICENSE-MIT index 2c8f27dba133..db4c067346eb 100644 --- a/third_party/rust/arrayvec/LICENSE-MIT +++ b/third_party/rust/arrayvec/LICENSE-MIT @@ -1,4 +1,4 @@ -Copyright (c) Ulrik Sverdrup "bluss" 2015-2017 +Copyright (c) Ulrik Sverdrup "bluss" 2015-2023 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated diff --git a/third_party/rust/arrayvec/ci/miri.sh b/third_party/rust/arrayvec/ci/miri.sh deleted file mode 100755 index 272995ca8640..000000000000 --- a/third_party/rust/arrayvec/ci/miri.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh - -set -ex - -export CARGO_NET_RETRY=5 -export CARGO_NET_TIMEOUT=10 - -MIRI_NIGHTLY=nightly-$(curl -s https://rust-lang.github.io/rustup-components-history/x86_64-unknown-linux-gnu/miri) -echo "Installing latest nightly with Miri: $MIRI_NIGHTLY" -rustup default "$MIRI_NIGHTLY" - -rustup component add miri -cargo miri setup - -cargo miri test diff --git a/third_party/rust/arrayvec/src/array_string.rs b/third_party/rust/arrayvec/src/array_string.rs index c4712a0cbecf..90cfc09014ba 100644 --- a/third_party/rust/arrayvec/src/array_string.rs +++ b/third_party/rust/arrayvec/src/array_string.rs @@ -1,4 +1,4 @@ -use std::borrow::Borrow; +use std::borrow::{Borrow, BorrowMut}; use std::cmp; use std::convert::TryFrom; use std::fmt; @@ -201,6 +201,7 @@ impl ArrayString /// /// assert_eq!(&string[..], "ab"); /// ``` + #[track_caller] pub fn push(&mut self, c: char) { self.try_push(c).unwrap(); } @@ -252,6 +253,7 @@ impl ArrayString /// /// assert_eq!(&string[..], "ad"); /// ``` + #[track_caller] pub fn push_str(&mut self, s: &str) { self.try_push_str(s).unwrap() } @@ -371,10 +373,12 @@ impl ArrayString let next = idx + ch.len_utf8(); let len = self.len(); + let ptr = self.as_mut_ptr(); unsafe { - ptr::copy(self.as_ptr().add(next), - self.as_mut_ptr().add(idx), - len - next); + ptr::copy( + ptr.add(next), + ptr.add(idx), + len - next); self.set_len(len - (next - idx)); } ch @@ -479,6 +483,11 @@ impl Borrow for ArrayString fn borrow(&self) -> &str { self } } +impl BorrowMut for ArrayString +{ + fn borrow_mut(&mut self) -> &mut str { self } +} + impl AsRef for ArrayString { fn as_ref(&self) -> &str { self } @@ -638,3 +647,27 @@ impl<'a, const CAP: usize> TryFrom> for ArrayString Ok(v) } } + +#[cfg(feature = "zeroize")] +/// "Best efforts" zeroing of the `ArrayString`'s buffer when the `zeroize` feature is enabled. +/// +/// The length is set to 0, and the buffer is dropped and zeroized. +/// Cannot ensure that previous moves of the `ArrayString` did not leave values on the stack. +/// +/// ``` +/// use arrayvec::ArrayString; +/// use zeroize::Zeroize; +/// let mut string = ArrayString::<6>::from("foobar").unwrap(); +/// string.zeroize(); +/// assert_eq!(string.len(), 0); +/// unsafe { string.set_len(string.capacity()) }; +/// assert_eq!(&*string, "\0\0\0\0\0\0"); +/// ``` +impl zeroize::Zeroize for ArrayString { + fn zeroize(&mut self) { + // There are no elements to drop + self.clear(); + // Zeroize the backing array. + self.xs.zeroize(); + } +} diff --git a/third_party/rust/arrayvec/src/arrayvec.rs b/third_party/rust/arrayvec/src/arrayvec.rs index e69e60c180df..37e151a6bb2f 100644 --- a/third_party/rust/arrayvec/src/arrayvec.rs +++ b/third_party/rust/arrayvec/src/arrayvec.rs @@ -77,6 +77,8 @@ impl ArrayVec { /// assert_eq!(&array[..], &[1, 2]); /// assert_eq!(array.capacity(), 16); /// ``` + #[inline] + #[track_caller] pub fn new() -> ArrayVec { assert_capacity_limit!(CAP); unsafe { @@ -172,6 +174,7 @@ impl ArrayVec { /// /// assert_eq!(&array[..], &[1, 2]); /// ``` + #[track_caller] pub fn push(&mut self, element: T) { ArrayVecImpl::push(self, element) } @@ -277,6 +280,7 @@ impl ArrayVec { /// assert_eq!(&array[..], &["y", "x"]); /// /// ``` + #[track_caller] pub fn insert(&mut self, index: usize, element: T) { self.try_insert(index, element).unwrap() } @@ -507,7 +511,7 @@ impl ArrayVec { } if DELETED { unsafe { - let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt); + let hole_slot = cur.sub(g.deleted_cnt); ptr::copy_nonoverlapping(cur, hole_slot, 1); } } @@ -748,6 +752,7 @@ impl DerefMut for ArrayVec { /// assert_eq!(array.capacity(), 3); /// ``` impl From<[T; CAP]> for ArrayVec { + #[track_caller] fn from(array: [T; CAP]) -> Self { let array = ManuallyDrop::new(array); let mut vec = >::new(); @@ -843,6 +848,32 @@ impl IntoIterator for ArrayVec { } +#[cfg(feature = "zeroize")] +/// "Best efforts" zeroing of the `ArrayVec`'s buffer when the `zeroize` feature is enabled. +/// +/// The length is set to 0, and the buffer is dropped and zeroized. +/// Cannot ensure that previous moves of the `ArrayVec` did not leave values on the stack. +/// +/// ``` +/// use arrayvec::ArrayVec; +/// use zeroize::Zeroize; +/// let mut array = ArrayVec::from([1, 2, 3]); +/// array.zeroize(); +/// assert_eq!(array.len(), 0); +/// let data = unsafe { core::slice::from_raw_parts(array.as_ptr(), array.capacity()) }; +/// assert_eq!(data, [0, 0, 0]); +/// ``` +impl zeroize::Zeroize for ArrayVec { + fn zeroize(&mut self) { + // Zeroize all the contained elements. + self.iter_mut().zeroize(); + // Drop all the elements and set the length to 0. + self.clear(); + // Zeroize the backing array. + self.xs.zeroize(); + } +} + /// By-value iterator for `ArrayVec`. pub struct IntoIter { index: usize, @@ -978,9 +1009,8 @@ impl<'a, T: 'a, const CAP: usize> Drop for Drain<'a, T, CAP> { // memmove back untouched tail, update to new length let start = source_vec.len(); let tail = self.tail_start; - let src = source_vec.as_ptr().add(tail); - let dst = source_vec.as_mut_ptr().add(start); - ptr::copy(src, dst, self.tail_len); + let ptr = source_vec.as_mut_ptr(); + ptr::copy(ptr.add(tail), ptr.add(start), self.tail_len); source_vec.set_len(start + self.tail_len); } } @@ -1012,6 +1042,7 @@ impl Extend for ArrayVec { /// Extend the `ArrayVec` with an iterator. /// /// ***Panics*** if extending the vector exceeds its capacity. + #[track_caller] fn extend>(&mut self, iter: I) { unsafe { self.extend_from_iter::<_, true>(iter) @@ -1021,6 +1052,7 @@ impl Extend for ArrayVec { #[inline(never)] #[cold] +#[track_caller] fn extend_panic() { panic!("ArrayVec: capacity exceeded in extend/from_iter"); } @@ -1032,6 +1064,7 @@ impl ArrayVec { /// /// Unsafe because if CHECK is false, the length of the input is not checked. /// The caller must ensure the length of the input fits in the capacity. + #[track_caller] pub(crate) unsafe fn extend_from_iter(&mut self, iterable: I) where I: IntoIterator { @@ -1082,7 +1115,7 @@ impl ArrayVec { unsafe fn raw_ptr_add(ptr: *mut T, offset: usize) -> *mut T { if mem::size_of::() == 0 { // Special case for ZST - (ptr as usize).wrapping_add(offset) as _ + ptr.cast::().wrapping_add(offset).cast() } else { ptr.add(offset) } diff --git a/third_party/rust/arrayvec/src/arrayvec_impl.rs b/third_party/rust/arrayvec/src/arrayvec_impl.rs index 6c09834add0e..c5ebe7b8998f 100644 --- a/third_party/rust/arrayvec/src/arrayvec_impl.rs +++ b/third_party/rust/arrayvec/src/arrayvec_impl.rs @@ -35,6 +35,7 @@ pub(crate) trait ArrayVecImpl { /// Return a raw mutable pointer to the vector's buffer. fn as_mut_ptr(&mut self) -> *mut Self::Item; + #[track_caller] fn push(&mut self, element: Self::Item) { self.try_push(element).unwrap() } diff --git a/third_party/rust/arrayvec/src/lib.rs b/third_party/rust/arrayvec/src/lib.rs index 5dc0273a70b4..f9a2fe687981 100644 --- a/third_party/rust/arrayvec/src/lib.rs +++ b/third_party/rust/arrayvec/src/lib.rs @@ -11,6 +11,10 @@ //! - Optional //! - Enable serialization for ArrayVec and ArrayString using serde 1.x //! +//! - `zeroize` +//! - Optional +//! - Implement `Zeroize` for ArrayVec and ArrayString +//! //! ## Rust Version //! //! This version of arrayvec requires Rust 1.51 or later. diff --git a/third_party/rust/breakpad-symbols/.cargo-checksum.json b/third_party/rust/breakpad-symbols/.cargo-checksum.json new file mode 100644 index 000000000000..6e20c55646b3 --- /dev/null +++ b/third_party/rust/breakpad-symbols/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"1927847c5674bbc606d47a8a15ca74627a70916ea6f73d33a54416d8057e9843","LICENSE":"06de63df29199a394442b57a28e886059ddc940973e10646877a0793fd53e2c9","README.md":"b0b97fcaf1d9eb5a3f3ca1fc0b0b1f593f7a116465ddcb8158541a40ff98660a","src/http.rs":"025a542391b2464fb6bdc769786b7c3d7ab697d932ee198360bc926e5e2b5cb6","src/lib.rs":"9e98e6bc83535d1e34afc136bda13528cfd542eb4b0a06fc6655e9136f9b24df","src/sym_file/mod.rs":"bb1c42d9b8823eabca753a7eff11533fdf403bcb0e0c91b298fdf07bcfde023e","src/sym_file/parser.rs":"6fbfd6805e8ef2cdadfd6c171d6ad40647a481760e7296f0ac093cb767fdf8dc","src/sym_file/types.rs":"c23a928bf092cbc9302316777ea00e416706bda6879ce7866a118ba18dbb718c","src/sym_file/walker.rs":"05f31914eb04186cdb292d68eb2f5bc5f2be9112e853867e49cc26eee1518a0a"},"package":"7e1ad3f5e2e5c8a42fccedd6792cc05968b39b69c3fe7b5544072ac052f3fe85"} \ No newline at end of file diff --git a/third_party/rust/breakpad-symbols/Cargo.toml b/third_party/rust/breakpad-symbols/Cargo.toml new file mode 100644 index 000000000000..614d4d3a9560 --- /dev/null +++ b/third_party/rust/breakpad-symbols/Cargo.toml @@ -0,0 +1,92 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "breakpad-symbols" +version = "0.22.0" +authors = ["Ted Mielczarek "] +exclude = ["testdata/*"] +description = "A library for working with Google Breakpad's text-format symbol files." +homepage = "https://github.com/rust-minidump/rust-minidump" +readme = "README.md" +license = "MIT" +repository = "https://github.com/rust-minidump/rust-minidump" +resolver = "2" + +[dependencies.async-trait] +version = "0.1.52" + +[dependencies.cab] +version = "0.5.0" +optional = true + +[dependencies.cachemap2] +version = "0.3.0" + +[dependencies.circular] +version = "0.3.0" + +[dependencies.debugid] +version = "0.8.0" + +[dependencies.futures-util] +version = "0.3" + +[dependencies.minidump-common] +version = "0.22.0" + +[dependencies.nom] +version = "7" + +[dependencies.range-map] +version = "0.2" + +[dependencies.reqwest] +version = "0.11.6" +features = [ + "gzip", + "rustls-tls", +] +optional = true +default-features = false + +[dependencies.tempfile] +version = "3.3.0" +optional = true + +[dependencies.thiserror] +version = "1.0.37" + +[dependencies.tracing] +version = "0.1.34" +features = ["log"] + +[dev-dependencies.tempfile] +version = "3.3.0" + +[dev-dependencies.tokio] +version = "1.12.0" +features = ["full"] + +[features] +fuzz = [] +http = [ + "reqwest", + "tempfile", +] +mozilla_cab_symbols = [ + "http", + "cab", +] + +[badges.travis-ci] +repository = "rust-minidump/rust-minidump" diff --git a/third_party/rust/breakpad-symbols/LICENSE b/third_party/rust/breakpad-symbols/LICENSE new file mode 100644 index 000000000000..3af7a472f939 --- /dev/null +++ b/third_party/rust/breakpad-symbols/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2015-2023 rust-minidump contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/rust/breakpad-symbols/README.md b/third_party/rust/breakpad-symbols/README.md new file mode 100644 index 000000000000..382df1e24f8b --- /dev/null +++ b/third_party/rust/breakpad-symbols/README.md @@ -0,0 +1,13 @@ +[![crates.io](https://img.shields.io/crates/v/breakpad-symbols.svg)](https://crates.io/crates/breakpad-symbols) [![](https://docs.rs/breakpad-symbols/badge.svg)](https://docs.rs/breakpad-symbols) + +Fetching, parsing, and evaluation of Breakpad's [text format .sym files](https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md). + +Fetches breakpad symbol files from disk or [a server the conforms the the Tecken protocol](https://tecken.readthedocs.io/en/latest/download.html), and provides an on-disk temp symbol file cache. + +Permissively parses breakpad symbol files to smooth over the unfortunately-very-common situation of corrupt debuginfo. Will generally try to recover the parse by discarding corrupt lines or arbitrarily picking one value when conflicts are found. + +Provides an API for resolving functions and source line info by address from symbol files. + +Provides an API for evaluating breakpad CFI (and WIN) expressions. + +This is primarily designed for use by [minidump-processor](https://crates.io/crates/minidump-processor). diff --git a/third_party/rust/breakpad-symbols/src/http.rs b/third_party/rust/breakpad-symbols/src/http.rs new file mode 100644 index 000000000000..ae0937288844 --- /dev/null +++ b/third_party/rust/breakpad-symbols/src/http.rs @@ -0,0 +1,542 @@ +//! Contains HTTP symbol retrieval specific functionality + +use crate::*; +use reqwest::{redirect, Client, Url}; +use std::io::{self, Write}; +use std::path::Path; +use std::str::FromStr; +use std::time::Duration; +use tempfile::NamedTempFile; +use tracing::{debug, trace, warn}; + +/// A key that uniquely identifies a File associated with a module +type FileKey = (ModuleKey, FileKind); + +/// An implementation of `SymbolSupplier` that loads Breakpad text-format +/// symbols from HTTP URLs. +/// +/// See [`crate::breakpad_sym_lookup`] for details on how paths are searched. +pub struct HttpSymbolSupplier { + /// File paths that are known to be in the cache + #[allow(clippy::type_complexity)] + cached_file_paths: CacheMap), FileError>>, + /// HTTP Client to use for fetching symbols. + client: Client, + /// URLs to search for symbols. + urls: Vec, + /// A `SimpleSymbolSupplier` to use for local symbol paths. + local: SimpleSymbolSupplier, + /// A path at which to cache downloaded symbols. + /// + /// We recommend using a subdirectory of `std::env::temp_dir()`, as this + /// will be your OS's intended location for tempory files. This should + /// give you free garbage collection of the cache while still allowing it + /// to function between runs. + cache: PathBuf, + /// A path to a temporary location where downloaded symbols can be written + /// before being atomically swapped into the cache. + /// + /// We recommend using `std::env::temp_dir()`, as this will be your OS's + /// intended location for temporary files. + tmp: PathBuf, +} + +impl HttpSymbolSupplier { + /// Create a new `HttpSymbolSupplier`. + /// + /// Symbols will be searched for in each of `local_paths` and `cache` first, + /// then via HTTP at each of `urls`. If a symbol file is found via HTTP it + /// will be saved under `cache`. + pub fn new( + urls: Vec, + cache: PathBuf, + tmp: PathBuf, + mut local_paths: Vec, + timeout: Duration, + ) -> HttpSymbolSupplier { + let client = Client::builder().timeout(timeout).build().unwrap(); + let urls = urls + .into_iter() + .filter_map(|mut u| { + if !u.ends_with('/') { + u.push('/'); + } + Url::parse(&u).ok() + }) + .collect(); + local_paths.push(cache.clone()); + let local = SimpleSymbolSupplier::new(local_paths); + let cached_file_paths = Default::default(); + HttpSymbolSupplier { + client, + cached_file_paths, + urls, + local, + cache, + tmp, + } + } + + #[tracing::instrument(level = "trace", skip(self, module), fields(module = crate::basename(&module.code_file())))] + pub async fn locate_file_internal( + &self, + module: &(dyn Module + Sync), + file_kind: FileKind, + ) -> Result<(PathBuf, Option), FileError> { + self.cached_file_paths + .cache_default(file_key(module, file_kind)) + .get(|| async { + // First look for the file in the cache + if let Ok(path) = self.local.locate_file(module, file_kind).await { + return Ok((path, None)); + } + + // Then try to download the file + // FIXME: if we try to parallelize this with `join` then if we have multiple hits + // we'll end up downloading all of them at once and having them race to write into + // the cache... is that ok? Maybe? Since only one will ever win the swap, and it's + // unlikely to get multiple hits... this might actually be ok! + if let Some(lookup) = lookup(module, file_kind) { + for url in &self.urls { + let fetch = + fetch_lookup(&self.client, url, &lookup, &self.cache, &self.tmp).await; + + if let Ok((path, url)) = fetch { + return Ok((path, url)); + } + } + + // If we're allowed to look for mozilla's special CAB paths, do that + if cfg!(feature = "mozilla_cab_symbols") { + for url in &self.urls { + let fetch = fetch_cab_lookup( + &self.client, + url, + &lookup, + &self.cache, + &self.tmp, + ) + .await; + + if let Ok((path, url)) = fetch { + return Ok((path, url)); + } + } + } + } + Err(FileError::NotFound) + }) + .await + .as_ref() + .clone() + } +} + +fn file_key(module: &(dyn Module + Sync), file_kind: FileKind) -> FileKey { + (module_key(module), file_kind) +} + +fn create_cache_file(tmp_path: &Path, final_path: &Path) -> io::Result { + // Use tempfile to save things to our cache to ensure proper + // atomicity of writes. We may want multiple instances of rust-minidump + // to be sharing a cache, and we don't want one instance to see another + // instance's partially written results. + // + // tempfile is designed explicitly for this purpose, and will handle all + // the platform-specific details and do its best to cleanup if things + // crash. + + // First ensure that the target directory in the cache exists + let base = final_path.parent().ok_or_else(|| { + io::Error::new( + io::ErrorKind::Other, + format!("Bad cache path: {final_path:?}"), + ) + })?; + fs::create_dir_all(base)?; + + NamedTempFile::new_in(tmp_path) +} + +fn commit_cache_file(mut temp: NamedTempFile, final_path: &Path, url: &Url) -> io::Result<()> { + // Append any extra metadata we also want to be cached as "INFO" lines, + // because this is an established format that parsers will ignore the + // contents of by default. + + // INFO URL allows us to properly report the url we retrieved a symbol file + // from, even when the file is loaded from our on-disk cache. + let cache_metadata = format!("INFO URL {url}\n"); + temp.write_all(cache_metadata.as_bytes())?; + + // TODO: don't do this + if final_path.exists() { + fs::remove_file(final_path)?; + } + + // If another process already wrote this entry, prefer their value to + // avoid needless file system churn. + temp.persist_noclobber(final_path)?; + + Ok(()) +} + +/// Perform a code_file/code_identifier lookup for a specific symbol server. +async fn individual_lookup_debug_info_by_code_info( + base_url: &Url, + lookup_path: &str, +) -> Option { + let url = base_url.join(lookup_path).ok()?; + + debug!("Trying code file / code identifier lookup: {}", url); + + // This should not follow redirects--we want the next url if there is one + let no_redirects_client = Client::builder() + .redirect(redirect::Policy::none()) + .build() + .ok()?; + + let response = no_redirects_client.get(url.clone()).send().await; + if let Ok(res) = response { + let res_status = res.status(); + if res_status == reqwest::StatusCode::FOUND + || res_status == reqwest::StatusCode::MOVED_PERMANENTLY + { + let location_header = res.headers().get("Location")?; + let mut new_url = location_header.to_str().ok()?; + if new_url.starts_with('/') { + new_url = new_url.strip_prefix('/').unwrap_or(new_url); + } + + // new_url looks like some/path/stuff/xul.pdb/somedebugid/xul.sym and we want the debug + // file and debug id portions which are at fixed indexes from the end + let mut parts = new_url.rsplit('/'); + let debug_identifier_part = parts.nth(1)?; + let debug_identifier = DebugId::from_str(debug_identifier_part).ok()?; + let debug_file_part = parts.next()?; + let debug_file = String::from(debug_file_part); + + debug!("Found debug info {} {}", debug_file, debug_identifier); + return Some(DebugInfoResult { + debug_file, + debug_identifier, + }); + } + } + + None +} + +/// Given a vector of symbol urls and a module with a code_file and code_identifier, +/// this tries to request a symbol file using the code file and code identifier. +/// +/// `//.sym` +/// +/// If the symbol server returns an HTTP 302 redirect, the Location header will +/// have the correct download API url with the debug file and debug identifier. +/// +/// This is supported by tecken +/// +/// This returns a DebugInfoResult with the new debug file and debug identifier +/// or None. +async fn lookup_debug_info_by_code_info( + symbol_urls: &Vec, + module: &(dyn Module + Sync), +) -> Option { + let lookup_path = code_info_breakpad_sym_lookup(module)?; + + for base_url in symbol_urls { + if let Some(result) = + individual_lookup_debug_info_by_code_info(base_url, &lookup_path).await + { + return Some(result); + } + } + + debug!( + "No debug file / debug id found with lookup path {}.", + lookup_path + ); + + None +} + +/// Fetch a symbol file from the URL made by combining `base_url` and `rel_path` using `client`, +/// save the file contents under `cache` + `rel_path` and also return them. +async fn fetch_symbol_file( + client: &Client, + base_url: &Url, + module: &(dyn Module + Sync), + cache: &Path, + tmp: &Path, +) -> Result { + trace!("HttpSymbolSupplier trying symbol server {}", base_url); + // This function is a bit of a complicated mess because we want to write + // the input to our symbol cache, but we're a streaming parser. So we + // use the bare SymbolFile::parse to get access to the contents of + // the input stream as it's downloaded+parsed to write it to disk. + // + // Note that caching is strictly "optional" because it's more important + // to parse the symbols. So if at any point the caching i/o fails, we just + // give up on caching but let the parse+download continue. + + // First try to GET the file from a server + let sym_lookup = breakpad_sym_lookup(module).ok_or(SymbolError::MissingDebugFileOrId)?; + let mut url = base_url + .join(&sym_lookup.server_rel) + .map_err(|_| SymbolError::NotFound)?; + let code_id = module.code_identifier().unwrap_or_default(); + url.query_pairs_mut() + .append_pair("code_file", crate::basename(&module.code_file())) + .append_pair("code_id", code_id.as_str()); + debug!("Trying {}", url); + let res = client + .get(url.clone()) + .send() + .await + .and_then(|res| res.error_for_status()) + .map_err(|_| SymbolError::NotFound)?; + + // Now try to create the temp cache file (not yet in the cache) + let final_cache_path = cache.join(sym_lookup.cache_rel); + let mut temp = create_cache_file(tmp, &final_cache_path) + .map_err(|e| { + warn!("Failed to save symbol file in local disk cache: {}", e); + }) + .ok(); + + // Now stream parse the file as it downloads. + let mut symbol_file = SymbolFile::parse_async(res, |data| { + // While we're downloading+parsing, save this data to the the disk cache too + if let Some(file) = temp.as_mut() { + if let Err(e) = file.write_all(data) { + // Give up on caching this. + warn!("Failed to save symbol file in local disk cache: {}", e); + temp = None; + } + } + }) + .await?; + // Make note of what URL this symbol file was downloaded from. + symbol_file.url = Some(url.to_string()); + + // Try to finish the cache file and atomically swap it into the cache. + if let Some(temp) = temp { + let _ = commit_cache_file(temp, &final_cache_path, &url).map_err(|e| { + warn!("Failed to save symbol file in local disk cache: {}", e); + }); + } + + Ok(symbol_file) +} + +/// Like fetch_symbol_file but instead of parsing the file live, we just download it opaquely based +/// on the given Lookup. +/// +/// The returned value is the path to the downloaded file and the url it was downloaded from. +async fn fetch_lookup( + client: &Client, + base_url: &Url, + lookup: &FileLookup, + cache: &Path, + tmp: &Path, +) -> Result<(PathBuf, Option), SymbolError> { + // First try to GET the file from a server + let url = base_url + .join(&lookup.server_rel) + .map_err(|_| SymbolError::NotFound)?; + debug!("Trying {}", url); + let mut res = client + .get(url.clone()) + .send() + .await + .and_then(|res| res.error_for_status()) + .map_err(|_| SymbolError::NotFound)?; + + // Now try to create the temp cache file (not yet in the cache) + let final_cache_path = cache.join(&lookup.cache_rel); + let mut temp = create_cache_file(tmp, &final_cache_path)?; + + // Now stream the contents to our file + while let Some(chunk) = res + .chunk() + .await + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))? + { + temp.write_all(&chunk[..])?; + } + + // And swap it into the cache + temp.persist_noclobber(&final_cache_path) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; + + trace!("symbols: fetched native binary: {}", lookup.cache_rel); + + Ok((final_cache_path, Some(url))) +} + +#[cfg(feature = "mozilla_cab_symbols")] +async fn fetch_cab_lookup( + client: &Client, + base_url: &Url, + lookup: &FileLookup, + cache: &Path, + tmp: &Path, +) -> Result<(PathBuf, Option), FileError> { + let cab_lookup = moz_lookup(lookup.clone()); + // First try to GET the file from a server + let url = base_url + .join(&cab_lookup.server_rel) + .map_err(|_| FileError::NotFound)?; + debug!("Trying {}", url); + let res = client + .get(url.clone()) + .send() + .await + .and_then(|res| res.error_for_status()) + .map_err(|_| FileError::NotFound)?; + + let cab_bytes = res.bytes().await.map_err(|_| FileError::NotFound)?; + let final_cache_path = + unpack_cabinet_file(&cab_bytes, lookup, cache, tmp).map_err(|_| FileError::NotFound)?; + + trace!("symbols: fetched native binary: {}", lookup.cache_rel); + + Ok((final_cache_path, Some(url))) +} + +#[cfg(not(feature = "mozilla_cab_symbols"))] +async fn fetch_cab_lookup( + _client: &Client, + _base_url: &Url, + _lookup: &FileLookup, + _cache: &Path, + _tmp: &Path, +) -> Result<(PathBuf, Option), FileError> { + Err(FileError::NotFound) +} + +#[cfg(feature = "mozilla_cab_symbols")] +pub fn unpack_cabinet_file( + buf: &[u8], + lookup: &FileLookup, + cache: &Path, + tmp: &Path, +) -> Result { + trace!("symbols: unpacking CAB file: {}", lookup.cache_rel); + // try to find a file in a cabinet archive and unpack it to the destination + use cab::Cabinet; + use std::io::Cursor; + fn get_cabinet_file( + cab: &Cabinet>, + file_name: &str, + ) -> Result { + for folder in cab.folder_entries() { + for file in folder.file_entries() { + let cab_file_name = file.name(); + if cab_file_name.ends_with(file_name) { + return Ok(cab_file_name.to_string()); + } + } + } + Err(std::io::Error::from(std::io::ErrorKind::NotFound)) + } + let final_cache_path = cache.join(&lookup.cache_rel); + + let cursor = Cursor::new(buf); + let mut cab = Cabinet::new(cursor)?; + let file_name = final_cache_path.file_name().unwrap().to_string_lossy(); + let cab_file = get_cabinet_file(&cab, &file_name)?; + let mut reader = cab.read_file(&cab_file)?; + + // Now try to create the temp cache file (not yet in the cache) + let mut temp = create_cache_file(tmp, &final_cache_path)?; + std::io::copy(&mut reader, &mut temp)?; + + // And swap it into the cache + temp.persist_noclobber(&final_cache_path) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; + + Ok(final_cache_path) +} + +/// Try to lookup native binaries in the cache and by querying the symbol server + +#[async_trait] +impl SymbolSupplier for HttpSymbolSupplier { + #[tracing::instrument(name = "symbols", level = "trace", skip_all, fields(file = crate::basename(&module.code_file())))] + async fn locate_symbols( + &self, + module: &(dyn Module + Sync), + ) -> Result { + // If we don't have a debug_file or debug_identifier, then try to get it + // from a symbol server. + let mut debug_file = module.debug_file().map(|name| name.into_owned()); + let mut debug_id = module.debug_identifier(); + let missing_debug_info = debug_file.is_none() || debug_id.is_none(); + + let extra_debug_info; + + if missing_debug_info { + debug!("Missing debug file or debug identifier--trying lookup with code info"); + extra_debug_info = lookup_debug_info_by_code_info(&self.urls, module).await; + if let Some(debug_info_result) = &extra_debug_info { + debug_file = Some(debug_info_result.debug_file.clone()); + debug_id = Some(debug_info_result.debug_identifier); + } + } else { + extra_debug_info = None; + } + + // Build a minimal module for lookups with the debug file and debug + // identifier we need to use + let lookup_module = SimpleModule::from_basic_info( + debug_file, + debug_id, + Some(module.code_file().into_owned()), + module.code_identifier(), + ); + + // First: try local paths for sym files + let local_result = self.local.locate_symbols(&lookup_module).await; + if !matches!(local_result, Err(SymbolError::NotFound)) { + // Everything but NotFound prevents cascading + return local_result.map(|r| LocateSymbolsResult { + symbols: r.symbols, + extra_debug_info: r.extra_debug_info.or(extra_debug_info), + }); + } + trace!("HttpSymbolSupplier search (SimpleSymbolSupplier found nothing)"); + + // Second: try to directly download sym files + for url in &self.urls { + // First, try to get a breakpad .sym file from the symbol server + let sym = + fetch_symbol_file(&self.client, url, &lookup_module, &self.cache, &self.tmp).await; + match sym { + Ok(symbols) => { + trace!("HttpSymbolSupplier parsed file!"); + return Ok(LocateSymbolsResult { + symbols, + extra_debug_info, + }); + } + Err(e) => { + trace!("HttpSymbolSupplier failed: {}", e); + } + } + } + + // If we get this far, we have failed to find anything + Err(SymbolError::NotFound) + } + + async fn locate_file( + &self, + module: &(dyn Module + Sync), + file_kind: FileKind, + ) -> Result { + self.locate_file_internal(module, file_kind) + .await + .map(|(path, _url)| path) + } +} diff --git a/third_party/rust/breakpad-symbols/src/lib.rs b/third_party/rust/breakpad-symbols/src/lib.rs new file mode 100644 index 000000000000..11638f36a25c --- /dev/null +++ b/third_party/rust/breakpad-symbols/src/lib.rs @@ -0,0 +1,1240 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +//! A library for working with [Google Breakpad][breakpad]'s +//! text-format [symbol files][symbolfiles]. +//! +//! See the [walker][] module for documentation on CFI evaluation. +//! +//! The highest-level API provided by this crate is to use the +//! [`Symbolizer`][symbolizer] struct. +//! +//! [breakpad]: https://chromium.googlesource.com/breakpad/breakpad/+/master/ +//! [symbolfiles]: https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md +//! [symbolizer]: struct.Symbolizer.html +//! +//! # Examples +//! +//! ``` +//! # std::env::set_current_dir(env!("CARGO_MANIFEST_DIR")); +//! use breakpad_symbols::{SimpleSymbolSupplier, Symbolizer, SimpleFrame, SimpleModule}; +//! use debugid::DebugId; +//! use std::path::PathBuf; +//! use std::str::FromStr; +//! +//! #[tokio::main] +//! async fn main() { +//! let paths = vec!(PathBuf::from("../testdata/symbols/")); +//! let supplier = SimpleSymbolSupplier::new(paths); +//! let symbolizer = Symbolizer::new(supplier); +//! +//! // Simple function name lookup with debug file, debug id, address. +//! let debug_id = DebugId::from_str("5A9832E5287241C1838ED98914E9B7FF1").unwrap(); +//! assert_eq!(symbolizer.get_symbol_at_address("test_app.pdb", debug_id, 0x1010) +//! .await +//! .unwrap(), +//! "vswprintf"); +//! } +//! ``` + +use async_trait::async_trait; +use cachemap2::CacheMap; +use debugid::{CodeId, DebugId}; +use futures_util::lock::Mutex as FutMutex; +use tracing::trace; + +use std::collections::HashMap; +use std::fs; +use std::path::PathBuf; +use std::sync::Mutex; +use std::{borrow::Cow, sync::Arc}; + +pub use minidump_common::{traits::Module, utils::basename}; +pub use sym_file::walker; + +pub use crate::sym_file::{CfiRules, SymbolFile}; + +#[cfg(feature = "http")] +pub mod http; +mod sym_file; + +#[cfg(feature = "http")] +pub use http::*; + +// Re-exports for the purposes of the cfi_eval fuzzer. Not public API. +#[doc(hidden)] +#[cfg(feature = "fuzz")] +pub mod fuzzing_private_exports { + pub use crate::sym_file::walker::{eval_win_expr_for_fuzzer, walk_with_stack_cfi}; + pub use crate::sym_file::{StackInfoWin, WinStackThing}; +} + +/// Statistics on the symbols of a module. +#[derive(Default, Debug, Clone)] +pub struct SymbolStats { + /// If the module's symbols were downloaded, this is the url used. + pub symbol_url: Option, + /// If the symbols were found and loaded into memory. + pub loaded_symbols: bool, + /// If we tried to parse the symbols, but failed. + pub corrupt_symbols: bool, + /// If the module's debug info had to be looked up, this is the debug info used. + pub extra_debug_info: Option, +} + +/// Statistics on pending symbols. +/// +/// Fetched with [`Symbolizer::pending_stats`]. +#[derive(Default, Debug, Clone)] +pub struct PendingSymbolStats { + /// The number of symbols we have finished processing + /// (could be either successful or not, either way is fine). + pub symbols_processed: u64, + /// The number of symbols we have been asked to process. + pub symbols_requested: u64, +} + +/// A `Module` implementation that holds arbitrary data. +/// +/// This can be useful for getting symbols for a module when you +/// have a debug id and filename but not an actual minidump. If you have a +/// minidump, you should be using [`MinidumpModule`][minidumpmodule]. +/// +/// [minidumpmodule]: ../minidump/struct.MinidumpModule.html +#[derive(Default)] +pub struct SimpleModule { + pub base_address: Option, + pub size: Option, + pub code_file: Option, + pub code_identifier: Option, + pub debug_file: Option, + pub debug_id: Option, + pub version: Option, +} + +impl SimpleModule { + /// Create a `SimpleModule` with the given `debug_file` and `debug_id`. + /// + /// Uses `default` for the remaining fields. + pub fn new(debug_file: &str, debug_id: DebugId) -> SimpleModule { + SimpleModule { + debug_file: Some(String::from(debug_file)), + debug_id: Some(debug_id), + ..SimpleModule::default() + } + } + + /// Create a `SimpleModule` with `debug_file`, `debug_id`, `code_file`, and `code_identifier`. + /// + /// Uses `default` for the remaining fields. + pub fn from_basic_info( + debug_file: Option, + debug_id: Option, + code_file: Option, + code_identifier: Option, + ) -> SimpleModule { + SimpleModule { + debug_file, + debug_id, + code_file, + code_identifier, + ..SimpleModule::default() + } + } +} + +impl Module for SimpleModule { + fn base_address(&self) -> u64 { + self.base_address.unwrap_or(0) + } + fn size(&self) -> u64 { + self.size.unwrap_or(0) + } + fn code_file(&self) -> Cow { + self.code_file + .as_ref() + .map_or(Cow::from(""), |s| Cow::Borrowed(&s[..])) + } + fn code_identifier(&self) -> Option { + self.code_identifier.as_ref().cloned() + } + fn debug_file(&self) -> Option> { + self.debug_file.as_ref().map(|s| Cow::Borrowed(&s[..])) + } + fn debug_identifier(&self) -> Option { + self.debug_id + } + fn version(&self) -> Option> { + self.version.as_ref().map(|s| Cow::Borrowed(&s[..])) + } +} + +/// Like `PathBuf::file_name`, but try to work on Windows or POSIX-style paths. +fn leafname(path: &str) -> &str { + path.rsplit(|c| c == '/' || c == '\\') + .next() + .unwrap_or(path) +} + +/// If `filename` ends with `match_extension`, remove it. Append `new_extension` to the result. +fn replace_or_add_extension(filename: &str, match_extension: &str, new_extension: &str) -> String { + let mut bits = filename.split('.').collect::>(); + if bits.len() > 1 + && bits + .last() + .map_or(false, |e| e.to_lowercase() == match_extension) + { + bits.pop(); + } + bits.push(new_extension); + bits.join(".") +} + +/// A lookup we would like to perform for some file (sym, exe, pdb, dll, ...) +#[derive(Debug, Clone)] +pub struct FileLookup { + pub debug_id: String, + pub debug_file: String, + pub cache_rel: String, + pub server_rel: String, +} + +/// Get a relative symbol path at which to locate symbols for `module`. +/// +/// Symbols are generally stored in the layout used by Microsoft's symbol +/// server and associated tools: +/// `//.sym`. If +/// `debug filename` ends with *.pdb* the leaf filename will have that +/// removed. +/// `extension` is the expected extension for the symbol filename, generally +/// *sym* if Breakpad text format symbols are expected. +/// +/// The debug filename and debug identifier can be found in the +/// [first line][module_line] of the symbol file output by the dump_syms tool. +/// You can use [this script][packagesymbols] to run dump_syms and put the +/// resulting symbol files in the proper directory structure. +/// +/// [module_line]: https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md#MODULE-records +/// [packagesymbols]: https://gist.github.com/luser/2ad32d290f224782fcfc#file-packagesymbols-py +pub fn breakpad_sym_lookup(module: &(dyn Module + Sync)) -> Option { + let debug_file = module.debug_file()?; + let debug_id = module.debug_identifier()?; + + let leaf = leafname(&debug_file); + let filename = replace_or_add_extension(leaf, "pdb", "sym"); + let rel_path = [leaf, &debug_id.breakpad().to_string(), &filename[..]].join("/"); + Some(FileLookup { + cache_rel: rel_path.clone(), + server_rel: rel_path, + debug_id: debug_id.breakpad().to_string(), + debug_file: filename, + }) +} + +/// Get a relative symbol path at which to locate symbols for `module` using +/// the code file and code identifier. This is helpful for Microsoft modules +/// where we don't have a valid debug filename and debug id to retrieve the +/// symbol file with and the symbol server supports looking up debug filename +/// and debug id using the code file and code id. +/// +/// If `code file` ends with *.dll* the leaf filename will have that removed. +/// `extension` is the expected extension for the symbol filename, generally +/// *sym* if Breakpad text format symbols are expected. +/// +/// `//.sym` +pub fn code_info_breakpad_sym_lookup(module: &(dyn Module + Sync)) -> Option { + let code_file = module.code_file(); + let code_identifier = module.code_identifier()?; + + if code_file.is_empty() { + return None; + } + let leaf = leafname(&code_file); + let filename = replace_or_add_extension(leaf, "dll", "sym"); + let rel_path = [ + leaf, + &code_identifier.to_string().to_uppercase(), + &filename[..], + ] + .join("/"); + + Some(rel_path) +} + +/// Returns a lookup for this module's extra debuginfo (pdb) +pub fn extra_debuginfo_lookup(module: &(dyn Module + Sync)) -> Option { + let debug_file = module.debug_file()?; + let debug_id = module.debug_identifier()?; + + let leaf = leafname(&debug_file); + let rel_path = [leaf, &debug_id.breakpad().to_string(), leaf].join("/"); + Some(FileLookup { + cache_rel: rel_path.clone(), + server_rel: rel_path, + debug_id: debug_id.to_string(), + debug_file: leaf.to_string(), + }) +} + +/// Returns a lookup for this module's binary (exe, dll, so, dylib, ...) +pub fn binary_lookup(module: &(dyn Module + Sync)) -> Option { + // NOTE: to make dump_syms happy we're currently moving the bin + // to be next to the pdb. This changes where we would naively put it, + // hence the two different paths! + + let code_file = module.code_file(); + let code_id = module.code_identifier()?; + let debug_file = module.debug_file()?; + let debug_id = module.debug_identifier()?; + + let bin_leaf = leafname(&code_file); + let debug_leaf = leafname(&debug_file); + + Some(FileLookup { + cache_rel: [debug_leaf, &debug_id.breakpad().to_string(), bin_leaf].join("/"), + server_rel: [bin_leaf, code_id.as_ref(), bin_leaf].join("/"), + debug_id: debug_id.to_string(), + debug_file: debug_file.to_string(), + }) +} + +/// Mangles a lookup to mozilla's format where the last char is replaced by an underscore +/// (and the file is wrapped in a CAB, but dump_syms handles that transparently). +pub fn moz_lookup(mut lookup: FileLookup) -> FileLookup { + lookup.server_rel.pop().unwrap(); + lookup.server_rel.push('_'); + lookup +} + +pub fn lookup(module: &(dyn Module + Sync), file_kind: FileKind) -> Option { + match file_kind { + FileKind::BreakpadSym => breakpad_sym_lookup(module), + FileKind::Binary => binary_lookup(module), + FileKind::ExtraDebugInfo => extra_debuginfo_lookup(module), + } +} + +/// Possible results of locating symbols for a module. +/// +/// Because symbols may be found from different sources, symbol providers +/// are usually configured to "cascade" into the next one whenever they report +/// `NotFound`. +/// +/// Cascading currently assumes that if any provider finds symbols for +/// a module, all other providers will find the same symbols (if any). +/// Therefore cascading will not be applied if a LoadError or ParseError +/// occurs (because presumably, all the other sources will also fail to +/// load/parse.) +/// +/// In theory we could do some interesting things where we attempt to +/// be more robust and actually merge together the symbols from multiple +/// sources, but that would make it difficult to cache symbol files, and +/// would rarely actually improve results. +/// +/// Since symbol files can be on the order of a gigabyte(!) and downloaded +/// from the network, aggressive caching is pretty important. The current +/// approach is a nice balance of simple and effective. +#[derive(Debug, thiserror::Error)] +pub enum SymbolError { + /// Symbol file could not be found. + /// + /// In this case other symbol providers may still be able to find it! + #[error("symbol file not found")] + NotFound, + /// The module was lacking either the debug file or debug id, as such the + /// path of the symbol could not be generated. + #[error("the debug file or id were missing")] + MissingDebugFileOrId, + /// Symbol file could not be loaded into memory. + #[error("couldn't read input stream")] + LoadError(#[from] std::io::Error), + /// Symbol file was too corrupt to be parsed at all. + /// + /// Because symbol files are pretty modular, many corruptions/ambiguities + /// can be either repaired or discarded at a fairly granular level + /// (e.g. a bad STACK WIN line can be discarded without affecting anything + /// else). But sometimes we can't make any sense of the symbol file, and + /// you find yourself here. + #[error("parse error: {0} at line {1}")] + ParseError(&'static str, u64), +} + +#[derive(Clone, Debug, thiserror::Error)] +pub enum FileError { + #[error("file not found")] + NotFound, +} + +/// An error produced by fill_symbol. +#[derive(Debug)] +pub struct FillSymbolError { + // We don't want to yield a full SymbolError for fill_symbol + // as this would involve cloning bulky Error strings every time + // someone requested symbols for a missing module. + // + // As it turns out there's currently no reason to care about *why* + // fill_symbol, so for now this is just a dummy type until we have + // something to put here. + // + // The only reason fill_symbol *can* produce an Err is so that + // the caller can distinguish between "we had symbols, but this address + // didn't map to a function name" and "we had no symbols for that module" + // (this is used as a heuristic for stack scanning). +} + +impl PartialEq for SymbolError { + fn eq(&self, other: &SymbolError) -> bool { + matches!( + (self, other), + (SymbolError::NotFound, SymbolError::NotFound) + | (SymbolError::LoadError(_), SymbolError::LoadError(_)) + | (SymbolError::ParseError(..), SymbolError::ParseError(..)) + ) + } +} + +/// The result of a lookup by code_file/code_identifier against a symbol +/// server. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DebugInfoResult { + pub debug_file: String, + pub debug_identifier: DebugId, +} + +/// The result of locating symbols, with debug info if it had to be looked up. +#[derive(Debug, PartialEq, Eq)] +pub struct LocateSymbolsResult { + pub symbols: SymbolFile, + pub extra_debug_info: Option, +} + +/// A trait for things that can locate symbols for a given module. +#[async_trait] +pub trait SymbolSupplier { + /// Locate and load a symbol file for `module`. + /// + /// Implementations may use any strategy for locating and loading + /// symbols. + async fn locate_symbols( + &self, + module: &(dyn Module + Sync), + ) -> Result; + + /// Locate a specific file associated with a `module` + /// + /// Implementations may use any strategy for locating and loading + /// symbols. + async fn locate_file( + &self, + module: &(dyn Module + Sync), + file_kind: FileKind, + ) -> Result; +} + +/// An implementation of `SymbolSupplier` that loads Breakpad text-format symbols from local disk +/// paths. +/// +/// See [`breakpad_sym_lookup`] for details on how paths are searched. +pub struct SimpleSymbolSupplier { + /// Local disk paths in which to search for symbols. + paths: Vec, +} + +impl SimpleSymbolSupplier { + /// Instantiate a new `SimpleSymbolSupplier` that will search in `paths`. + pub fn new(paths: Vec) -> SimpleSymbolSupplier { + SimpleSymbolSupplier { paths } + } +} + +#[async_trait] +impl SymbolSupplier for SimpleSymbolSupplier { + #[tracing::instrument(name = "symbols", level = "trace", skip_all, fields(module = crate::basename(&module.code_file())))] + async fn locate_symbols( + &self, + module: &(dyn Module + Sync), + ) -> Result { + let file_path = self + .locate_file(module, FileKind::BreakpadSym) + .await + .map_err(|_| SymbolError::NotFound)?; + let symbols = SymbolFile::from_file(&file_path).map_err(|e| { + trace!("SimpleSymbolSupplier failed: {}", e); + e + })?; + trace!("SimpleSymbolSupplier parsed file!"); + Ok(LocateSymbolsResult { + symbols, + extra_debug_info: None, + }) + } + + #[tracing::instrument(level = "trace", skip(self, module), fields(module = crate::basename(&module.code_file())))] + async fn locate_file( + &self, + module: &(dyn Module + Sync), + file_kind: FileKind, + ) -> Result { + trace!("SimpleSymbolSupplier search"); + if let Some(lookup) = lookup(module, file_kind) { + for path in self.paths.iter() { + if path.is_file() && file_kind == FileKind::BreakpadSym { + if let Ok(sf) = SymbolFile::from_file(path) { + if sf.module_id == lookup.debug_id { + trace!("SimpleSymbolSupplier found file {}", path.display()); + return Ok(path.to_path_buf()); + } + } + } else if path.is_dir() { + let test_path = path.join(lookup.cache_rel.clone()); + if fs::metadata(&test_path).ok().map_or(false, |m| m.is_file()) { + trace!("SimpleSymbolSupplier found file {}", test_path.display()); + return Ok(test_path); + } + } + } + } else { + trace!("SimpleSymbolSupplier could not build symbol_path"); + } + Err(FileError::NotFound) + } +} + +/// A SymbolSupplier that maps module names (code_files) to an in-memory string. +/// +/// Intended for mocking symbol files in tests. +#[derive(Default, Debug, Clone)] +pub struct StringSymbolSupplier { + modules: HashMap, + code_info_to_debug_info: HashMap, +} + +impl StringSymbolSupplier { + /// Make a new StringSymbolSupplier with no modules. + pub fn new(modules: HashMap) -> Self { + Self { + modules, + code_info_to_debug_info: HashMap::new(), + } + } + + /// Perform a code_file/code_identifier lookup for a specific symbol server. + async fn lookup_debug_info_by_code_info( + &self, + module: &(dyn Module + Sync), + ) -> Option { + let lookup_path = code_info_breakpad_sym_lookup(module)?; + self.code_info_to_debug_info.get(&lookup_path).cloned() + } +} + +#[async_trait] +impl SymbolSupplier for StringSymbolSupplier { + #[tracing::instrument(name = "symbols", level = "trace", skip_all, fields(file = crate::basename(&module.code_file())))] + async fn locate_symbols( + &self, + module: &(dyn Module + Sync), + ) -> Result { + trace!("StringSymbolSupplier search"); + if let Some(symbols) = self.modules.get(&*module.code_file()) { + trace!("StringSymbolSupplier found file"); + let file = SymbolFile::from_bytes(symbols.as_bytes())?; + trace!("StringSymbolSupplier parsed file!"); + return Ok(LocateSymbolsResult { + symbols: file, + extra_debug_info: self.lookup_debug_info_by_code_info(module).await, + }); + } + trace!("StringSymbolSupplier could not find file"); + Err(SymbolError::NotFound) + } + + async fn locate_file( + &self, + _module: &(dyn Module + Sync), + _file_kind: FileKind, + ) -> Result { + // StringSymbolSupplier can never find files, is for testing + Err(FileError::NotFound) + } +} + +/// A trait for setting symbol information on something like a stack frame. +pub trait FrameSymbolizer { + /// Get the program counter value for this frame. + fn get_instruction(&self) -> u64; + /// Set the name, base address, and parameter size of the function in + /// which this frame is executing. + fn set_function(&mut self, name: &str, base: u64, parameter_size: u32); + /// Set the source file and (1-based) line number this frame represents. + fn set_source_file(&mut self, file: &str, line: u32, base: u64); + /// Add an inline frame. This method can be called multiple times, in the + /// order "outside to inside". + fn add_inline_frame(&mut self, _name: &str, _file: Option<&str>, _line: Option) {} +} + +pub trait FrameWalker { + /// Get the instruction address that we're trying to unwind from. + fn get_instruction(&self) -> u64; + /// Check whether the callee has a callee of its own. + fn has_grand_callee(&self) -> bool; + /// Get the number of bytes the callee's callee's parameters take up + /// on the stack (or 0 if unknown/invalid). This is needed for + /// STACK WIN unwinding. + fn get_grand_callee_parameter_size(&self) -> u32; + /// Get a register-sized value stored at this address. + fn get_register_at_address(&self, address: u64) -> Option; + /// Get the value of a register from the callee's frame. + fn get_callee_register(&self, name: &str) -> Option; + /// Set the value of a register for the caller's frame. + fn set_caller_register(&mut self, name: &str, val: u64) -> Option<()>; + /// Explicitly mark one of the caller's registers as invalid. + fn clear_caller_register(&mut self, name: &str); + /// Set whatever registers in the caller should be set based on the cfa (e.g. rsp). + fn set_cfa(&mut self, val: u64) -> Option<()>; + /// Set whatever registers in the caller should be set based on the return address (e.g. rip). + fn set_ra(&mut self, val: u64) -> Option<()>; +} + +/// A simple implementation of `FrameSymbolizer` that just holds data. +#[derive(Debug, Default)] +pub struct SimpleFrame { + /// The program counter value for this frame. + pub instruction: u64, + /// The name of the function in which the current instruction is executing. + pub function: Option, + /// The offset of the start of `function` from the module base. + pub function_base: Option, + /// The size, in bytes, that this function's parameters take up on the stack. + pub parameter_size: Option, + /// The name of the source file in which the current instruction is executing. + pub source_file: Option, + /// The 1-based index of the line number in `source_file` in which the current instruction is + /// executing. + pub source_line: Option, + /// The offset of the start of `source_line` from the function base. + pub source_line_base: Option, +} + +impl SimpleFrame { + /// Instantiate a `SimpleFrame` with instruction pointer `instruction`. + pub fn with_instruction(instruction: u64) -> SimpleFrame { + SimpleFrame { + instruction, + ..SimpleFrame::default() + } + } +} + +impl FrameSymbolizer for SimpleFrame { + fn get_instruction(&self) -> u64 { + self.instruction + } + fn set_function(&mut self, name: &str, base: u64, parameter_size: u32) { + self.function = Some(String::from(name)); + self.function_base = Some(base); + self.parameter_size = Some(parameter_size); + } + fn set_source_file(&mut self, file: &str, line: u32, base: u64) { + self.source_file = Some(String::from(file)); + self.source_line = Some(line); + self.source_line_base = Some(base); + } +} + +/// A type of file related to a module that you might want downloaded. +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum FileKind { + /// A Breakpad symbol (.sym) file + BreakpadSym, + /// The native binary of a module ("code file") (.exe/.dll/.so/.dylib...) + Binary, + /// Extra debuginfo for a module ("debug file") (.pdb/...?) + ExtraDebugInfo, +} + +// Can't make Module derive Hash, since then it can't be used as a trait +// object (because the hash method is generic), so this is a hacky workaround. +/// A key that uniquely identifies a module: +/// +/// * code_file +/// * code_id +/// * debug_file +/// * debug_id +type ModuleKey = (String, Option, Option, Option); + +/// Helper for deriving a hash key from a `Module` for `Symbolizer`. +fn module_key(module: &(dyn Module + Sync)) -> ModuleKey { + ( + module.code_file().to_string(), + module.code_identifier().map(|s| s.to_string()), + module.debug_file().map(|s| s.to_string()), + module.debug_identifier().map(|s| s.to_string()), + ) +} + +struct CachedAsyncResult { + inner: FutMutex>>>, +} + +impl Default for CachedAsyncResult { + fn default() -> Self { + CachedAsyncResult { + inner: FutMutex::new(None), + } + } +} + +impl CachedAsyncResult { + pub async fn get<'a, F, Fut>(&self, f: F) -> Arc> + where + F: FnOnce() -> Fut + 'a, + Fut: std::future::Future> + 'a, + { + let mut guard = self.inner.lock().await; + if guard.is_none() { + *guard = Some(Arc::new(f().await)); + } + guard.as_ref().unwrap().clone() + } +} + +/// Symbolicate stack frames. +/// +/// A `Symbolizer` manages loading symbols and looking up symbols in them +/// including caching so that symbols for a given module are only loaded once. +/// +/// Call [`Symbolizer::new`][new] to instantiate a `Symbolizer`. A Symbolizer +/// requires a [`SymbolSupplier`][supplier] to locate symbols. If you have +/// symbols on disk in the [customary directory layout][breakpad_sym_lookup], a +/// [`SimpleSymbolSupplier`][simple] will work. +/// +/// Use [`get_symbol_at_address`][get_symbol] or [`fill_symbol`][fill_symbol] to +/// do symbol lookup. +/// +/// [new]: struct.Symbolizer.html#method.new +/// [supplier]: trait.SymbolSupplier.html +/// [simple]: struct.SimpleSymbolSupplier.html +/// [get_symbol]: struct.Symbolizer.html#method.get_symbol_at_address +/// [fill_symbol]: struct.Symbolizer.html#method.fill_symbol + +pub struct Symbolizer { + /// Symbol supplier for locating symbols. + supplier: Box, + /// Cache of symbol locating results. + // TODO?: use lru-cache: https://crates.io/crates/lru-cache/ + // note that using an lru-cache would mess up the fact that we currently + // use this for statistics collection. Splitting out statistics would be + // way messier but not impossible. + symbols: CacheMap>, + pending_stats: Mutex, + stats: Mutex>, +} + +impl Symbolizer { + /// Create a `Symbolizer` that uses `supplier` to locate symbols. + pub fn new(supplier: T) -> Symbolizer { + Symbolizer { + supplier: Box::new(supplier), + symbols: CacheMap::default(), + pending_stats: Mutex::default(), + stats: Mutex::default(), + } + } + + /// Helper method for non-minidump-using callers. + /// + /// Pass `debug_file` and `debug_id` describing a specific module, + /// and `address`, a module-relative address, and get back + /// a symbol in that module that covers that address, or `None`. + /// + /// See [the module-level documentation][module] for an example. + /// + /// [module]: index.html + pub async fn get_symbol_at_address( + &self, + debug_file: &str, + debug_id: DebugId, + address: u64, + ) -> Option { + let k = (debug_file, debug_id); + let mut frame = SimpleFrame::with_instruction(address); + self.fill_symbol(&k, &mut frame).await.ok()?; + frame.function + } + + /// Fill symbol information in `frame` using the instruction address + /// from `frame`, and the module information from `module`. If you're not + /// using a minidump module, you can use [`SimpleModule`][simplemodule] and + /// [`SimpleFrame`][simpleframe]. + /// + /// An Error indicates that no symbols could be found for the relevant + /// module. + /// + /// # Examples + /// + /// ``` + /// # std::env::set_current_dir(env!("CARGO_MANIFEST_DIR")); + /// use std::str::FromStr; + /// use debugid::DebugId; + /// use breakpad_symbols::{SimpleSymbolSupplier,Symbolizer,SimpleFrame,SimpleModule}; + /// + /// #[tokio::main] + /// async fn main() { + /// use std::path::PathBuf; + /// let paths = vec!(PathBuf::from("../testdata/symbols/")); + /// let supplier = SimpleSymbolSupplier::new(paths); + /// let symbolizer = Symbolizer::new(supplier); + /// let debug_id = DebugId::from_str("5A9832E5287241C1838ED98914E9B7FF1").unwrap(); + /// let m = SimpleModule::new("test_app.pdb", debug_id); + /// let mut f = SimpleFrame::with_instruction(0x1010); + /// let _ = symbolizer.fill_symbol(&m, &mut f).await; + /// assert_eq!(f.function.unwrap(), "vswprintf"); + /// assert_eq!(f.source_file.unwrap(), + /// r"c:\program files\microsoft visual studio 8\vc\include\swprintf.inl"); + /// assert_eq!(f.source_line.unwrap(), 51); + /// } + /// ``` + /// + /// [simplemodule]: struct.SimpleModule.html + /// [simpleframe]: struct.SimpleFrame.html + pub async fn fill_symbol( + &self, + module: &(dyn Module + Sync), + frame: &mut (dyn FrameSymbolizer + Send), + ) -> Result<(), FillSymbolError> { + let cached_sym = self.get_symbols(module).await; + let sym = cached_sym + .as_ref() + .as_ref() + .map_err(|_| FillSymbolError {})?; + sym.fill_symbol(module, frame); + Ok(()) + } + + /// Collect various statistics on the symbols. + /// + /// Keys are the file name of the module (code_file's file name). + pub fn stats(&self) -> HashMap { + self.stats.lock().unwrap().clone() + } + + /// Get live symbol stats for interactive updates. + pub fn pending_stats(&self) -> PendingSymbolStats { + self.pending_stats.lock().unwrap().clone() + } + + /// Tries to use CFI to walk the stack frame of the FrameWalker + /// using the symbols of the given Module. Output will be written + /// using the FrameWalker's `set_caller_*` APIs. + pub async fn walk_frame( + &self, + module: &(dyn Module + Sync), + walker: &mut (dyn FrameWalker + Send), + ) -> Option<()> { + let cached_sym = self.get_symbols(module).await; + let sym = cached_sym.as_ref(); + if let Ok(sym) = sym { + trace!("found symbols for address, searching for cfi entries"); + sym.walk_frame(module, walker) + } else { + trace!("couldn't find symbols for address, cannot use cfi"); + None + } + } + + /// Gets the fully parsed SymbolFile for a given module (or an Error). + /// + /// This returns a CachedOperation which is guaranteed to already be resolved (lifetime stuff). + async fn get_symbols( + &self, + module: &(dyn Module + Sync), + ) -> Arc> { + self.symbols + .cache_default(module_key(module)) + .get(|| async { + trace!("locating symbols for module {}", module.code_file()); + self.pending_stats.lock().unwrap().symbols_requested += 1; + let result = self.supplier.locate_symbols(module).await; + self.pending_stats.lock().unwrap().symbols_processed += 1; + + let mut stats = SymbolStats::default(); + match &result { + Ok(res) => { + stats.symbol_url.clone_from(&res.symbols.url); + stats.loaded_symbols = true; + stats.corrupt_symbols = false; + stats.extra_debug_info.clone_from(&res.extra_debug_info); + } + Err(SymbolError::NotFound) => { + stats.loaded_symbols = false; + } + Err(SymbolError::MissingDebugFileOrId) => { + stats.loaded_symbols = false; + } + Err(SymbolError::LoadError(_)) => { + stats.loaded_symbols = false; + } + Err(SymbolError::ParseError(..)) => { + stats.loaded_symbols = true; + stats.corrupt_symbols = true; + } + } + let key = leafname(module.code_file().as_ref()).to_string(); + self.stats.lock().unwrap().insert(key, stats); + + result.map(|r| r.symbols) + }) + .await + } + + /// Gets the path to a file for a given module (or an Error). + /// + /// This returns a CachedOperation which is guaranteed to already be resolved (lifetime stuff). + pub async fn get_file_path( + &self, + module: &(dyn Module + Sync), + file_kind: FileKind, + ) -> Result { + self.supplier.locate_file(module, file_kind).await + } +} + +#[test] +fn test_leafname() { + assert_eq!(leafname("c:\\foo\\bar\\test.pdb"), "test.pdb"); + assert_eq!(leafname("c:/foo/bar/test.pdb"), "test.pdb"); + assert_eq!(leafname("test.pdb"), "test.pdb"); + assert_eq!(leafname("test"), "test"); + assert_eq!(leafname("/path/to/test"), "test"); +} + +#[test] +fn test_replace_or_add_extension() { + assert_eq!( + replace_or_add_extension("test.pdb", "pdb", "sym"), + "test.sym" + ); + assert_eq!( + replace_or_add_extension("TEST.PDB", "pdb", "sym"), + "TEST.sym" + ); + assert_eq!(replace_or_add_extension("test", "pdb", "sym"), "test.sym"); + assert_eq!( + replace_or_add_extension("test.x", "pdb", "sym"), + "test.x.sym" + ); + assert_eq!(replace_or_add_extension("", "pdb", "sym"), ".sym"); + assert_eq!(replace_or_add_extension("test.x", "x", "y"), "test.y"); +} + +#[cfg(test)] +mod test { + + use super::*; + use std::fs::File; + use std::io::Write; + use std::path::Path; + use std::str::FromStr; + + #[tokio::test] + async fn test_relative_symbol_path() { + let debug_id = DebugId::from_str("abcd1234-abcd-1234-abcd-abcd12345678-a").unwrap(); + let m = SimpleModule::new("foo.pdb", debug_id); + assert_eq!( + &breakpad_sym_lookup(&m).unwrap().cache_rel, + "foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym" + ); + + let m2 = SimpleModule::new("foo.pdb", debug_id); + assert_eq!( + &breakpad_sym_lookup(&m2).unwrap().cache_rel, + "foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym" + ); + + let m3 = SimpleModule::new("foo.xyz", debug_id); + assert_eq!( + &breakpad_sym_lookup(&m3).unwrap().cache_rel, + "foo.xyz/ABCD1234ABCD1234ABCDABCD12345678a/foo.xyz.sym" + ); + + let m4 = SimpleModule::new("foo.xyz", debug_id); + assert_eq!( + &breakpad_sym_lookup(&m4).unwrap().cache_rel, + "foo.xyz/ABCD1234ABCD1234ABCDABCD12345678a/foo.xyz.sym" + ); + + let bad = SimpleModule::default(); + assert!(breakpad_sym_lookup(&bad).is_none()); + + let bad2 = SimpleModule { + debug_file: Some("foo".to_string()), + ..SimpleModule::default() + }; + assert!(breakpad_sym_lookup(&bad2).is_none()); + + let bad3 = SimpleModule { + debug_id: Some(debug_id), + ..SimpleModule::default() + }; + assert!(breakpad_sym_lookup(&bad3).is_none()); + } + + #[tokio::test] + async fn test_relative_symbol_path_abs_paths() { + let debug_id = DebugId::from_str("abcd1234-abcd-1234-abcd-abcd12345678-a").unwrap(); + { + let m = SimpleModule::new("/path/to/foo.bin", debug_id); + assert_eq!( + &breakpad_sym_lookup(&m).unwrap().cache_rel, + "foo.bin/ABCD1234ABCD1234ABCDABCD12345678a/foo.bin.sym" + ); + } + + { + let m = SimpleModule::new("c:/path/to/foo.pdb", debug_id); + assert_eq!( + &breakpad_sym_lookup(&m).unwrap().cache_rel, + "foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym" + ); + } + + { + let m = SimpleModule::new("c:\\path\\to\\foo.pdb", debug_id); + assert_eq!( + &breakpad_sym_lookup(&m).unwrap().cache_rel, + "foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym" + ); + } + } + + #[tokio::test] + async fn test_code_info_breakpad_sym_lookup() { + // Test normal data + let m = SimpleModule { + code_file: Some("foo.dll".to_string()), + code_identifier: Some(CodeId::from_str("64E782C570C4000").unwrap()), + ..SimpleModule::default() + }; + assert_eq!( + &code_info_breakpad_sym_lookup(&m).unwrap(), + "foo.dll/64E782C570C4000/foo.sym" + ); + + let bad = SimpleModule::default(); + assert!(code_info_breakpad_sym_lookup(&bad).is_none()); + + let bad2 = SimpleModule { + code_file: Some("foo".to_string()), + ..SimpleModule::default() + }; + assert!(code_info_breakpad_sym_lookup(&bad2).is_none()); + + let bad3 = SimpleModule { + code_identifier: Some(CodeId::from_str("64E782C570C4000").unwrap()), + ..SimpleModule::default() + }; + assert!(code_info_breakpad_sym_lookup(&bad3).is_none()); + } + + fn mksubdirs(path: &Path, dirs: &[&str]) -> Vec { + dirs.iter() + .map(|dir| { + let new_path = path.join(dir); + fs::create_dir(&new_path).unwrap(); + new_path + }) + .collect() + } + + fn write_symbol_file(path: &Path, contents: &[u8]) { + let dir = path.parent().unwrap(); + if !fs::metadata(dir).ok().map_or(false, |m| m.is_dir()) { + fs::create_dir_all(dir).unwrap(); + } + let mut f = File::create(path).unwrap(); + f.write_all(contents).unwrap(); + } + + fn write_good_symbol_file(path: &Path) { + write_symbol_file(path, b"MODULE Linux x86 abcd1234 foo\n"); + } + + fn write_bad_symbol_file(path: &Path) { + write_symbol_file(path, b"this is not a symbol file\n"); + } + + #[tokio::test] + async fn test_simple_symbol_supplier() { + let t = tempfile::tempdir().unwrap(); + let paths = mksubdirs(t.path(), &["one", "two"]); + + let supplier = SimpleSymbolSupplier::new(paths.clone()); + let bad = SimpleModule::default(); + assert_eq!( + supplier.locate_symbols(&bad).await, + Err(SymbolError::NotFound) + ); + + // Try loading symbols for each of two modules in each of the two + // search paths. + for &(path, file, id, sym) in [ + ( + &paths[0], + "foo.pdb", + DebugId::from_str("abcd1234-0000-0000-0000-abcd12345678-a").unwrap(), + "foo.pdb/ABCD1234000000000000ABCD12345678a/foo.sym", + ), + ( + &paths[1], + "bar.xyz", + DebugId::from_str("ff990000-0000-0000-0000-abcd12345678-a").unwrap(), + "bar.xyz/FF990000000000000000ABCD12345678a/bar.xyz.sym", + ), + ] + .iter() + { + let m = SimpleModule::new(file, id); + // No symbols present yet. + assert_eq!( + supplier.locate_symbols(&m).await, + Err(SymbolError::NotFound) + ); + write_good_symbol_file(&path.join(sym)); + // Should load OK now that it exists. + assert!( + supplier.locate_symbols(&m).await.is_ok(), + "{}", + format!("Located symbols for {sym}") + ); + } + + // Write a malformed symbol file, verify that it's found but fails to load. + let debug_id = DebugId::from_str("ffff0000-0000-0000-0000-abcd12345678-a").unwrap(); + let mal = SimpleModule::new("baz.pdb", debug_id); + let sym = "baz.pdb/FFFF0000000000000000ABCD12345678a/baz.sym"; + assert_eq!( + supplier.locate_symbols(&mal).await, + Err(SymbolError::NotFound) + ); + write_bad_symbol_file(&paths[0].join(sym)); + let res = supplier.locate_symbols(&mal).await; + assert!( + matches!(res, Err(SymbolError::ParseError(..))), + "{}", + format!("Correctly failed to parse {sym}, result: {res:?}") + ); + } + + #[tokio::test] + async fn test_symbolizer() { + let t = tempfile::tempdir().unwrap(); + let path = t.path(); + + // TODO: This could really use a MockSupplier + let supplier = SimpleSymbolSupplier::new(vec![PathBuf::from(path)]); + let symbolizer = Symbolizer::new(supplier); + let debug_id = DebugId::from_str("abcd1234-abcd-1234-abcd-abcd12345678-a").unwrap(); + let m1 = SimpleModule::new("foo.pdb", debug_id); + write_symbol_file( + &path.join("foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym"), + b"MODULE Linux x86 ABCD1234ABCD1234ABCDABCD12345678a foo +FILE 1 foo.c +FUNC 1000 30 10 some func +1000 30 100 1 +", + ); + let mut f1 = SimpleFrame::with_instruction(0x1010); + symbolizer.fill_symbol(&m1, &mut f1).await.unwrap(); + assert_eq!(f1.function.unwrap(), "some func"); + assert_eq!(f1.function_base.unwrap(), 0x1000); + assert_eq!(f1.source_file.unwrap(), "foo.c"); + assert_eq!(f1.source_line.unwrap(), 100); + assert_eq!(f1.source_line_base.unwrap(), 0x1000); + + assert_eq!( + symbolizer + .get_symbol_at_address("foo.pdb", debug_id, 0x1010) + .await + .unwrap(), + "some func" + ); + + let debug_id = DebugId::from_str("ffff0000-0000-0000-0000-abcd12345678-a").unwrap(); + let m2 = SimpleModule::new("bar.pdb", debug_id); + let mut f2 = SimpleFrame::with_instruction(0x1010); + // No symbols present, should not find anything. + assert!(symbolizer.fill_symbol(&m2, &mut f2).await.is_err()); + assert!(f2.function.is_none()); + assert!(f2.function_base.is_none()); + assert!(f2.source_file.is_none()); + assert!(f2.source_line.is_none()); + // Results should be cached. + write_symbol_file( + &path.join("bar.pdb/ffff0000000000000000ABCD12345678a/bar.sym"), + b"MODULE Linux x86 ffff0000000000000000ABCD12345678a bar +FILE 53 bar.c +FUNC 1000 30 10 another func +1000 30 7 53 +", + ); + assert!(symbolizer.fill_symbol(&m2, &mut f2).await.is_err()); + assert!(f2.function.is_none()); + assert!(f2.function_base.is_none()); + assert!(f2.source_file.is_none()); + assert!(f2.source_line.is_none()); + // This should also use cached results. + assert!(symbolizer + .get_symbol_at_address("bar.pdb", debug_id, 0x1010) + .await + .is_none()); + } + + #[tokio::test] + async fn test_extra_debug_info() { + let debug_info = DebugInfoResult { + debug_file: String::from_str("foo.pdb").unwrap(), + debug_identifier: DebugId::from_str("abcd1234-abcd-1234-abcd-abcd12345678-a").unwrap(), + }; + + let mut supplier = StringSymbolSupplier { + modules: HashMap::new(), + code_info_to_debug_info: HashMap::new(), + }; + supplier.modules.insert( + String::from_str("foo.pdb").unwrap(), + String::from_str( + "MODULE Linux x86 ABCD1234ABCD1234ABCDABCD12345678a foo +FILE 1 foo.c +FUNC 1000 30 10 some func +1000 30 100 1 +", + ) + .unwrap(), + ); + supplier.code_info_to_debug_info.insert( + String::from_str("foo.pdb/64E782C570C4000/foo.pdb.sym").unwrap(), + debug_info.clone(), + ); + + let symbolizer = Symbolizer::new(supplier); + let module = SimpleModule::from_basic_info( + None, + None, + Some(String::from_str("foo.pdb").unwrap()), + Some(CodeId::from_str("64E782C570C4000").unwrap()), + ); + + let mut f1 = SimpleFrame::with_instruction(0x1010); + symbolizer.fill_symbol(&module, &mut f1).await.unwrap(); + assert_eq!(f1.function.unwrap(), "some func"); + assert_eq!(f1.function_base.unwrap(), 0x1000); + assert_eq!(f1.source_file.unwrap(), "foo.c"); + assert_eq!(f1.source_line.unwrap(), 100); + assert_eq!(f1.source_line_base.unwrap(), 0x1000); + + let sym_stats = symbolizer.stats(); + let stats = sym_stats.get("foo.pdb").unwrap(); + assert_eq!(stats.extra_debug_info, Some(debug_info)); + } +} diff --git a/third_party/rust/breakpad-symbols/src/sym_file/mod.rs b/third_party/rust/breakpad-symbols/src/sym_file/mod.rs new file mode 100644 index 000000000000..29638dce9b3b --- /dev/null +++ b/third_party/rust/breakpad-symbols/src/sym_file/mod.rs @@ -0,0 +1,628 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. +use crate::{FrameSymbolizer, FrameWalker, Module, SymbolError}; + +pub use crate::sym_file::types::*; +pub use parser::SymbolParser; +use std::fs::File; +use std::io::Read; +use std::ops::Deref; +use std::path::Path; +use tracing::trace; + +mod parser; +mod types; +pub mod walker; + +// # Sync VS Async +// +// There is both a sync and an async entry-point to the parser. +// The two impls should be essentially identical, except for how they +// read bytes from the input reader into our circular buffer. +// +// +// # Streaming +// +// This parser streams the input to avoid the need to materialize all of +// it into memory at once (symbol files can be a gigabyte!). As a result, +// we need to iteratively parse. +// +// We do this by repeatedly filling up a buffer with input and asking the +// parser to parse it. The parser will return how much of the input it +// consumed, which we can use to clear space in our buffer and to tell +// if it successfully consumed the whole input when the Reader runs dry. +// +// +// # Handling EOF / Capacity +// +// Having a fix-sized buffer has one fatal issue: if one atomic step +// of the parser needs more than this amount of data, then we won't +// be able to parse it. +// +// This can result in `buf` filling up and `buf.space()` becoming an +// empty slice. This in turn will make the reader yield 0 bytes, and +// we'll treat it like EOF and fail the parse. When this happens, we +// try to double the buffer's size and request more bytes. If we get +// more, hooray! If we don't, then it's a "real" EOF. +// +// The "atom" of our parser is a line, so we need our buffer to be able +// to fit any line. However we actually only have roughly +// *half* this value as our limit, as circular::Buffer will only +// `shift` the buffer's contents if over half of its capacity has been +// drained by `consume` -- and `space()` only grows when a `shift` happens. +// +// I have in fact seen 8kb function names from Rust (thanks generic combinators!) +// and 82kb function names from C++ (thanks 'auto' returns!), so we +// need a buffer size that can grow to at least 200KB. This is a *very* large +// amount to backshift repeatedly, so to keep this under control, we start +// with only a 10KB buffer, which is generous but tolerable. +// +// We should still have *SOME* limit on this to avoid nasty death spirals, +// so let's go with 2MB (MAX_BUFFER_CAPACITY), letting you have a horrifying 1MB symbol. +// +// But just *dying* when we hit this point is terrible, so lets have an +// extra layer of robustness: if we ever hit the limit, enter "panic recovery" +// and just start discarding bytes until we hit a newline. Then resume normal +// parsing. The net effect of this is that we just treat this one line as +// corrupt (because statistically it won't even be needed!). + +// Allows for at least 80KB symbol names, at most 160KB symbol names (fuzzy because of circular). +static MAX_BUFFER_CAPACITY: usize = 1024 * 160; +static INITIAL_BUFFER_CAPACITY: usize = 1024 * 10; + +impl SymbolFile { + /// Parse a SymbolFile from the given Reader. + /// + /// Every time a chunk of the input is parsed, that chunk will + /// be passed to `callback` to allow you to do something else + /// with the data as it's streamed in (e.g. you can save the + /// input to a cache). + /// + /// The reader is wrapped in a buffer reader so you shouldn't + /// buffer the input yourself. + pub fn parse( + mut input_reader: R, + mut callback: impl FnMut(&[u8]), + ) -> Result { + let mut buf = circular::Buffer::with_capacity(INITIAL_BUFFER_CAPACITY); + let mut parser = SymbolParser::new(); + let mut fully_consumed = false; + let mut tried_to_grow = false; + let mut in_panic_recovery = false; + let mut just_finished_recovering = false; + let mut total_consumed = 0u64; + loop { + if in_panic_recovery { + // PANIC RECOVERY MODE! DISCARD BYTES UNTIL NEWLINE. + let input = buf.data(); + if let Some(new_line_idx) = input.iter().position(|&byte| byte == b'\n') { + // Hooray, we found a new line! Consume up to and including that, and resume. + let amount = new_line_idx + 1; + callback(&input[..amount]); + buf.consume(amount); + total_consumed += amount as u64; + + // Back to normal! + in_panic_recovery = false; + fully_consumed = false; + just_finished_recovering = true; + parser.lines += 1; + trace!("RECOVERY: complete!"); + } else { + // No newline, discard everything + let amount = input.len(); + callback(&input[..amount]); + buf.consume(amount); + total_consumed += amount as u64; + + // If the next read returns 0 bytes, then that's a proper EOF! + fully_consumed = true; + } + } + + // Read the data in, and tell the circular buffer about the new data + let size = input_reader.read(buf.space())?; + buf.fill(size); + + if size == 0 { + // If the reader returned no more bytes, this can be either mean + // EOF or the buffer is out of capacity. There are a lot of cases + // to consider, so let's go through them one at a time... + if just_finished_recovering && !buf.data().is_empty() { + // We just finished PANIC RECOVERY, but there's still bytes in + // the buffer. Assume that is parseable and resume normal parsing + // (do nothing, fallthrough to normal path). + } else if fully_consumed { + // Success! The last iteration cleared the buffer and we still got + // no more bytes, so that's a proper EOF with a complete parse! + return Ok(parser.finish()); + } else if !tried_to_grow { + // We still have some stuff in the buffer, assume this is because + // the buffer is full, and try to make it BIGGER and ask for more again. + let new_cap = buf.capacity().saturating_mul(2); + if new_cap > MAX_BUFFER_CAPACITY { + // TIME TO PANIC!!! This line is catastrophically big, just start + // discarding bytes until we hit a newline. + trace!("RECOVERY: discarding enormous line {}", parser.lines); + in_panic_recovery = true; + continue; + } + trace!("parser out of space? trying more ({}KB)", new_cap / 1024); + buf.grow(new_cap); + tried_to_grow = true; + continue; + } else if total_consumed == 0 { + // We grew the buffer and still got no more bytes, so it's a proper EOF. + // But actually, we never consumed any bytes, so this is an empty file? + // Give a better error message for that. + return Err(SymbolError::ParseError( + "empty SymbolFile (probably something wrong with your debuginfo tooling?)", + 0, + )); + } else { + // Ok give up, this input is just impossible. + return Err(SymbolError::ParseError( + "unexpected EOF during parsing of SymbolFile (or a line was too long?)", + parser.lines, + )); + } + } else { + tried_to_grow = false; + } + + if in_panic_recovery { + // Don't run the normal parser while we're still recovering! + continue; + } + just_finished_recovering = false; + + // Ask the parser to parse more of the input + let input = buf.data(); + let consumed = parser.parse_more(input)?; + total_consumed += consumed as u64; + + // Give the other consumer of this Reader a chance to use this data. + callback(&input[..consumed]); + + // Remember for the next iteration if all the input was consumed. + fully_consumed = input.len() == consumed; + buf.consume(consumed); + } + } + + /// `parse` but async + #[cfg(feature = "http")] + pub async fn parse_async( + mut response: reqwest::Response, + mut callback: impl FnMut(&[u8]), + ) -> Result { + let mut chunk; + let mut slice = &[][..]; + let mut input_reader = &mut slice; + let mut buf = circular::Buffer::with_capacity(INITIAL_BUFFER_CAPACITY); + let mut parser = SymbolParser::new(); + + let mut fully_consumed = false; + let mut tried_to_grow = false; + let mut in_panic_recovery = false; + let mut just_finished_recovering = false; + let mut total_consumed = 0u64; + loop { + if in_panic_recovery { + // PANIC RECOVERY MODE! DISCARD BYTES UNTIL NEWLINE. + let input = buf.data(); + if let Some(new_line_idx) = input.iter().position(|&byte| byte == b'\n') { + // Hooray, we found a new line! Consume up to and including that, and resume. + let amount = new_line_idx + 1; + callback(&input[..amount]); + buf.consume(amount); + total_consumed += amount as u64; + + // Back to normal! + in_panic_recovery = false; + fully_consumed = false; + just_finished_recovering = true; + parser.lines += 1; + trace!("PANIC RECOVERY: complete!"); + } else { + // No newline, discard everything + let amount = input.len(); + callback(&input[..amount]); + buf.consume(amount); + total_consumed += amount as u64; + + // If the next read returns 0 bytes, then that's a proper EOF! + fully_consumed = true; + } + } + + // Little rube-goldberg machine to stream the contents: + // * get a chunk (Bytes) from the Response + // * get its underlying slice + // * then get a mutable reference to that slice + // * then Read that mutable reference in our circular buffer + // * when the slice runs out, get the next chunk and repeat + if input_reader.is_empty() { + chunk = response + .chunk() + .await + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))? + .unwrap_or_default(); + slice = &chunk[..]; + input_reader = &mut slice; + } + + // Read the data in, and tell the circular buffer about the new data + let size = input_reader.read(buf.space())?; + buf.fill(size); + + if size == 0 { + // If the reader returned no more bytes, this can be either mean + // EOF or the buffer is out of capacity. There are a lot of cases + // to consider, so let's go through them one at a time... + if just_finished_recovering && !buf.data().is_empty() { + // We just finished PANIC RECOVERY, but there's still bytes in + // the buffer. Assume that is parseable and resume normal parsing + // (do nothing, fallthrough to normal path). + } else if fully_consumed { + // Success! The last iteration cleared the buffer and we still got + // no more bytes, so that's a proper EOF with a complete parse! + return Ok(parser.finish()); + } else if !tried_to_grow { + // We still have some stuff in the buffer, assume this is because + // the buffer is full, and try to make it BIGGER and ask for more again. + let new_cap = buf.capacity().saturating_mul(2); + if new_cap > MAX_BUFFER_CAPACITY { + // TIME TO PANIC!!! This line is catastrophically big, just start + // discarding bytes until we hit a newline. + trace!("RECOVERY: discarding enormous line {}", parser.lines); + in_panic_recovery = true; + continue; + } + trace!("parser out of space? trying more ({}KB)", new_cap / 1024); + buf.grow(new_cap); + tried_to_grow = true; + continue; + } else if total_consumed == 0 { + // We grew the buffer and still got no more bytes, so it's a proper EOF. + // But actually, we never consumed any bytes, so this is an empty file? + // Give a better error message for that. + return Err(SymbolError::ParseError( + "empty SymbolFile (probably something wrong with your debuginfo tooling?)", + 0, + )); + } else { + // Ok give up, this input is just impossible. + return Err(SymbolError::ParseError( + "unexpected EOF during parsing of SymbolFile (or a line was too long?)", + parser.lines, + )); + } + } else { + tried_to_grow = false; + } + + if in_panic_recovery { + // Don't run the normal parser while we're still recovering! + continue; + } + just_finished_recovering = false; + + // Ask the parser to parse more of the input + let input = buf.data(); + let consumed = parser.parse_more(input)?; + total_consumed += consumed as u64; + + // Give the other consumer of this Reader a chance to use this data. + callback(&input[..consumed]); + + // Remember for the next iteration if all the input was consumed. + fully_consumed = input.len() == consumed; + buf.consume(consumed); + } + } + + // Parse a SymbolFile from bytes. + pub fn from_bytes(bytes: &[u8]) -> Result { + Self::parse(bytes, |_| ()) + } + + // Parse a SymbolFile from a file. + pub fn from_file(path: &Path) -> Result { + let file = File::open(path)?; + Self::parse(file, |_| ()) + } + + /// Fill in as much source information for `frame` as possible. + pub fn fill_symbol(&self, module: &dyn Module, frame: &mut dyn FrameSymbolizer) { + // Look for a FUNC covering the address first. + if frame.get_instruction() < module.base_address() { + return; + } + let addr = frame.get_instruction() - module.base_address(); + if let Some(func) = self.functions.get(addr) { + // TODO: although FUNC records have a parameter size, it appears that + // they aren't to be trusted? The STACK WIN records are more reliable + // when available. This is important precisely because these values + // are used to unwind subsequent STACK WIN frames (because certain + // calling conventions have the caller push the callee's arguments, + // which affects the the stack's size!). + // + // Need to spend more time thinking about if this is the right approach + let parameter_size = if let Some(info) = self.win_stack_framedata_info.get(addr) { + info.parameter_size + } else if let Some(info) = self.win_stack_fpo_info.get(addr) { + info.parameter_size + } else { + func.parameter_size + }; + + frame.set_function( + &func.name, + func.address + module.base_address(), + parameter_size, + ); + + // See if there's source line and inline info as well. + // + // In the following, we transform data between two different representations of inline calls. + // The input shape has function names associated with the location of the call to that function. + // The output shape has function names associated with a location *inside* that function. + // + // Input: + // + // ( + // outer_name, + // inline_calls: [ // Each location is the line of the *call* to the function + // (inline_call_location[0], inline_name[0]), + // (inline_call_location[1], inline_name[1]), + // (inline_call_location[2], inline_name[2]), + // ] + // innermost_location, + // ) + // + // Output: + // + // ( // Each location is the line *inside* the function + // (outer_name, inline_call_location[0]), + // inlines: [ + // (inline_name[0], inline_call_location[1]), + // (inline_name[1], inline_call_location[2]), + // (inline_name[2], innermost_location), + // ] + // ) + if let Some((file_id, line, address, next_inline_origin)) = + func.get_outermost_sourceloc(addr) + { + if let Some(file) = self.files.get(&file_id) { + frame.set_source_file(file, line, address + module.base_address()); + } + + if let Some(mut inline_origin) = next_inline_origin { + // There is an inline call at the address. + // Enumerate all inlines at the address one by one by looking up + // successively deeper call depths. + // The call to `get_outermost_source_location` above looked up depth 0, so here + // we start at depth 1. + for depth in 1.. { + match func.get_inlinee_at_depth(depth, addr) { + Some((call_file_id, call_line, _address, next_inline_origin)) => { + // We found another inline frame. + let call_file = self.files.get(&call_file_id).map(Deref::deref); + if let Some(name) = self.inline_origins.get(&inline_origin) { + frame.add_inline_frame(name, call_file, Some(call_line)); + } + + inline_origin = next_inline_origin; + } + None => break, + } + } + // We've run out of inline calls but we still have to output the final frame. + let (file, line) = match func.get_innermost_sourceloc(addr) { + Some((file_id, line, _)) => ( + self.files.get(&file_id).map(Deref::deref), + if line != 0 { Some(line) } else { None }, + ), + None => (None, None), + }; + if let Some(name) = self.inline_origins.get(&inline_origin) { + frame.add_inline_frame(name, file, line); + } + } + } + } else if let Some(public) = self.find_nearest_public(addr) { + // We couldn't find a valid FUNC record, but we could find a PUBLIC record. + // Unfortauntely, PUBLIC records don't have end-points, so this could be + // a random PUBLIC record from the start of the module that isn't at all + // applicable. To try limit this problem, we can use the nearest FUNC + // record that comes *before* the address we're trying to find a symbol for. + // + // It is reasonable to assume a PUBLIC record cannot extend *past* a FUNC, + // so if the PUBLIC has a smaller base address than the nearest previous FUNC + // to our target address, the PUBLIC must actually end before that FUNC and + // therefore not actually apply to the target address. + // + // We get the nearest previous FUNC by getting the raw slice of ranges + // and binary searching for our base address. Rust's builtin binary search + // will fail to find the value since it uses strict equality *but* the Err + // will helpfully contain the index in the slice where our value "should" + // be inserted to preserve the sort. The element before this index is + // therefore the nearest previous value! + // + // Case analysis for this -1 because binary search is an off-by-one minefield: + // + // * if the address we were looking for came *before* every FUNC, binary_search + // would yield "0" because that's where it should go to preserve the sort. + // The checked_sub will then fail and make us just assume the PUBLIC is reasonable, + // which is correct. + // + // * if we get 1, this saying we actually want element 0, so again -1 is + // correct. (This generalizes to all other "reasonable" values, but 1 is easiest + // to think about given the previous case's analysis.) + // + // * if the address we were looking for came *after* every FUNC, binary search + // would yield "slice.len()", and the nearest FUNC is indeed at `len-1`, so + // again correct. + let funcs_slice = self.functions.ranges_values().as_slice(); + let prev_func = funcs_slice + .binary_search_by_key(&addr, |(range, _)| range.start) + .err() + .and_then(|idx| idx.checked_sub(1)) + .and_then(|idx| funcs_slice.get(idx)); + + if let Some(prev_func) = prev_func { + if public.address <= prev_func.1.address { + // This PUBLIC is truncated by a FUNC before it gets to `addr`, + // so we shouldn't use it. + return; + } + } + + // Settle for a PUBLIC. + frame.set_function( + &public.name, + public.address + module.base_address(), + public.parameter_size, + ); + } + } + + pub fn walk_frame(&self, module: &dyn Module, walker: &mut dyn FrameWalker) -> Option<()> { + if walker.get_instruction() < module.base_address() { + return None; + } + let addr = walker.get_instruction() - module.base_address(); + + // Preferentially use framedata over fpo, because if both are present, + // the former tends to be more precise (breakpad heuristic). + let win_stack_result = if let Some(info) = self.win_stack_framedata_info.get(addr) { + walker::walk_with_stack_win_framedata(info, walker) + } else if let Some(info) = self.win_stack_fpo_info.get(addr) { + walker::walk_with_stack_win_fpo(info, walker) + } else { + None + }; + + // If STACK WIN failed, try STACK CFI + win_stack_result.or_else(|| { + if let Some(info) = self.cfi_stack_info.get(addr) { + // Don't use add_rules that come after this address + let mut count = 0; + let len = info.add_rules.len(); + while count < len && info.add_rules[count].address <= addr { + count += 1; + } + + walker::walk_with_stack_cfi(&info.init, &info.add_rules[0..count], walker) + } else { + None + } + }) + } + + /// Find the nearest `PublicSymbol` whose address is less than or equal to `addr`. + pub fn find_nearest_public(&self, addr: u64) -> Option<&PublicSymbol> { + self.publics.iter().rev().find(|&p| p.address <= addr) + } +} + +#[cfg(test)] +mod test { + use super::*; + use std::ffi::OsStr; + fn test_symbolfile_from_file(rel_path: &str) { + let mut path = std::env::current_dir().unwrap(); + if path.file_name() == Some(OsStr::new("rust-minidump")) { + path.push("breakpad-symbols"); + } + path.push(rel_path); + let sym = SymbolFile::from_file(&path).unwrap(); + assert_eq!(sym.files.len(), 6661); + assert_eq!(sym.publics.len(), 5); + assert_eq!(sym.find_nearest_public(0x9b07).unwrap().name, "_NLG_Return"); + assert_eq!( + sym.find_nearest_public(0x142e7).unwrap().name, + "_NLG_Return" + ); + assert_eq!( + sym.find_nearest_public(0x23b06).unwrap().name, + "__from_strstr_to_strchr" + ); + assert_eq!( + sym.find_nearest_public(0xFFFFFFFF).unwrap().name, + "__from_strstr_to_strchr" + ); + assert_eq!(sym.functions.ranges_values().count(), 1065); + assert_eq!(sym.functions.get(0x1000).unwrap().name, "vswprintf"); + assert_eq!(sym.functions.get(0x1012).unwrap().name, "vswprintf"); + assert!(sym.functions.get(0x1013).is_none()); + // There are 1556 `STACK WIN 4` lines in the symbol file, but only 856 + // that don't overlap. However they all overlap in ways that we have + // to handle in the wild. + assert_eq!(sym.win_stack_framedata_info.ranges_values().count(), 1556); + assert_eq!(sym.win_stack_fpo_info.ranges_values().count(), 259); + assert_eq!( + sym.win_stack_framedata_info.get(0x41b0).unwrap().address, + 0x41b0 + ); + } + + #[test] + fn test_symbolfile_from_lf_file() { + test_symbolfile_from_file( + "testdata/symbols/test_app.pdb/5A9832E5287241C1838ED98914E9B7FF1/test_app.sym", + ); + } + + #[test] + fn test_symbolfile_from_crlf_file() { + test_symbolfile_from_file( + "testdata/symbols/test_app.pdb/6A9832E5287241C1838ED98914E9B7FF1/test_app.sym", + ); + } + + fn test_symbolfile_from_bytes(symbolfile_bytes: &[u8]) { + let sym = SymbolFile::from_bytes(symbolfile_bytes).unwrap(); + + assert_eq!(sym.files.len(), 1); + assert_eq!(sym.publics.len(), 1); + assert_eq!(sym.functions.ranges_values().count(), 1); + assert_eq!(sym.functions.get(0x1000).unwrap().name, "another func"); + assert_eq!( + sym.functions + .get(0x1000) + .unwrap() + .lines + .ranges_values() + .count(), + 1 + ); + // test fallback + assert_eq!(sym.functions.get(0x1001).unwrap().name, "another func"); + } + + #[test] + fn test_symbolfile_from_bytes_with_lf() { + test_symbolfile_from_bytes( + b"MODULE Linux x86 ffff0000 bar +FILE 53 bar.c +PUBLIC 1234 10 some public +FUNC 1000 30 10 another func +1000 30 7 53 +", + ); + } + + #[test] + fn test_symbolfile_from_bytes_with_crlf() { + test_symbolfile_from_bytes( + b"MODULE Linux x86 ffff0000 bar +FILE 53 bar.c +PUBLIC 1234 10 some public +FUNC 1000 30 10 another func +1000 30 7 53 +", + ); + } +} diff --git a/third_party/rust/breakpad-symbols/src/sym_file/parser.rs b/third_party/rust/breakpad-symbols/src/sym_file/parser.rs new file mode 100644 index 000000000000..473dad9ff478 --- /dev/null +++ b/third_party/rust/breakpad-symbols/src/sym_file/parser.rs @@ -0,0 +1,1653 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +use nom::branch::alt; +use nom::bytes::complete::{tag, take_while}; +use nom::character::complete::{hex_digit1, space1}; +use nom::character::{is_digit, is_hex_digit}; +use nom::combinator::{cut, map, map_res, opt}; +use nom::error::{Error, ErrorKind, ParseError}; +use nom::multi::separated_list1; +use nom::sequence::{preceded, terminated, tuple}; +use nom::{Err, IResult}; +use range_map::{Range, RangeMap}; +use tracing::warn; + +use std::collections::HashMap; +use std::convert::TryFrom; +use std::fmt::Debug; +use std::{mem, str}; + +use minidump_common::traits::IntoRangeMapSafe; + +use crate::sym_file::types::*; +use crate::SymbolError; + +#[derive(Debug)] +enum Line { + Module(String, String, String, String), + Info(Info), + File(u32, String), + InlineOrigin(u32, String), + Public(PublicSymbol), + Function(Function, Vec, Vec), + StackWin(WinFrameType), + StackCfi(StackInfoCfi), +} + +/// Match a hex string, parse it to a u32 or a u64. +fn hex_str + std::ops::BitOr + From>( + input: &[u8], +) -> IResult<&[u8], T> { + // Consume up to max_len digits. For u32 that's 8 digits and for u64 that's 16 digits. + // Two hex digits form one byte. + let max_len = mem::size_of::() * 2; + + let mut res: T = T::from(0); + let mut k = 0; + for v in input.iter().take(max_len) { + let digit = match (*v as char).to_digit(16) { + Some(v) => v, + None => break, + }; + res = res << T::from(4); + res = res | T::from(digit as u8); + k += 1; + } + if k == 0 { + return Err(Err::Error(Error::from_error_kind( + input, + ErrorKind::HexDigit, + ))); + } + let remaining = &input[k..]; + Ok((remaining, res)) +} + +/// Match a decimal string, parse it to a u32. +/// +/// This is doing everything manually so that we only look at each byte once. +/// With a naive implementation you might be looking at them three times: First +/// you might get a slice of acceptable characters from nom, then you might parse +/// that slice into a str (checking for utf-8 unnecessarily), and then you might +/// parse that string into a decimal number. +fn decimal_u32(input: &[u8]) -> IResult<&[u8], u32> { + const MAX_LEN: usize = 10; // u32::MAX has 10 decimal digits + let mut res: u64 = 0; + let mut k = 0; + for v in input.iter().take(MAX_LEN) { + let digit = *v as char; + let digit_value = match digit.to_digit(10) { + Some(v) => v, + None => break, + }; + res = res * 10 + digit_value as u64; + k += 1; + } + if k == 0 { + return Err(Err::Error(Error::from_error_kind(input, ErrorKind::Digit))); + } + let res = u32::try_from(res) + .map_err(|_| Err::Error(Error::from_error_kind(input, ErrorKind::TooLarge)))?; + let remaining = &input[k..]; + Ok((remaining, res)) +} + +/// Take 0 or more non-space bytes. +fn non_space(input: &[u8]) -> IResult<&[u8], &[u8]> { + take_while(|c: u8| c != b' ')(input) +} + +/// Accept `\n` with an arbitrary number of preceding `\r` bytes. +/// +/// This is different from `line_ending` which doesn't accept `\r` if it isn't +/// followed by `\n`. +fn my_eol(input: &[u8]) -> IResult<&[u8], &[u8]> { + preceded(take_while(|b| b == b'\r'), tag(b"\n"))(input) +} + +/// Accept everything except `\r` and `\n`. +/// +/// This is different from `not_line_ending` which rejects its input if it's +/// followed by a `\r` which is not immediately followed by a `\n`. +fn not_my_eol(input: &[u8]) -> IResult<&[u8], &[u8]> { + take_while(|b| b != b'\r' && b != b'\n')(input) +} + +/// Parse a single byte if it matches the predicate. +/// +/// nom has `satisfy`, which is similar. It differs in the argument type of the +/// predicate: `satisfy`'s predicate takes `char`, whereas `single`'s predicate +/// takes `u8`. +fn single(predicate: fn(u8) -> bool) -> impl Fn(&[u8]) -> IResult<&[u8], u8> { + move |i: &[u8]| match i.split_first() { + Some((b, rest)) if predicate(*b) => Ok((rest, *b)), + _ => Err(Err::Error(Error::from_error_kind(i, ErrorKind::Satisfy))), + } +} + +// Matches a MODULE record. +fn module_line(input: &[u8]) -> IResult<&[u8], (String, String, String, String)> { + let (input, _) = terminated(tag("MODULE"), space1)(input)?; + let (input, (os, cpu, debug_id, filename)) = cut(tuple(( + terminated(map_res(non_space, str::from_utf8), space1), // os + terminated(map_res(non_space, str::from_utf8), space1), // cpu + terminated(map_res(hex_digit1, str::from_utf8), space1), // debug id + terminated(map_res(not_my_eol, str::from_utf8), my_eol), // filename + )))(input)?; + Ok(( + input, + ( + os.to_string(), + cpu.to_string(), + debug_id.to_string(), + filename.to_string(), + ), + )) +} + +// Matches an INFO URL record. +fn info_url(input: &[u8]) -> IResult<&[u8], Info> { + let (input, _) = terminated(tag("INFO URL"), space1)(input)?; + let (input, url) = cut(terminated(map_res(not_my_eol, str::from_utf8), my_eol))(input)?; + Ok((input, Info::Url(url.to_string()))) +} + +// Matches other INFO records. +fn info_line(input: &[u8]) -> IResult<&[u8], &[u8]> { + let (input, _) = terminated(tag("INFO"), space1)(input)?; + cut(terminated(not_my_eol, my_eol))(input) +} + +// Matches a FILE record. +fn file_line(input: &[u8]) -> IResult<&[u8], (u32, String)> { + let (input, _) = terminated(tag("FILE"), space1)(input)?; + let (input, (id, filename)) = cut(tuple(( + terminated(decimal_u32, space1), + terminated(map_res(not_my_eol, str::from_utf8), my_eol), + )))(input)?; + Ok((input, (id, filename.to_string()))) +} + +// Matches an INLINE_ORIGIN record. +fn inline_origin_line(input: &[u8]) -> IResult<&[u8], (u32, String)> { + let (input, _) = terminated(tag("INLINE_ORIGIN"), space1)(input)?; + let (input, (id, function)) = cut(tuple(( + terminated(decimal_u32, space1), + terminated(map_res(not_my_eol, str::from_utf8), my_eol), + )))(input)?; + Ok((input, (id, function.to_string()))) +} + +// Matches a PUBLIC record. +fn public_line(input: &[u8]) -> IResult<&[u8], PublicSymbol> { + let (input, _) = terminated(tag("PUBLIC"), space1)(input)?; + let (input, (_multiple, address, parameter_size, name)) = cut(tuple(( + opt(terminated(tag("m"), space1)), + terminated(hex_str::, space1), + terminated(hex_str::, space1), + terminated(map_res(not_my_eol, str::from_utf8), my_eol), + )))(input)?; + Ok(( + input, + PublicSymbol { + address, + parameter_size, + name: name.to_string(), + }, + )) +} + +// Matches line data after a FUNC record. +fn func_line_data(input: &[u8]) -> IResult<&[u8], SourceLine> { + let (input, (address, size, line, file)) = tuple(( + terminated(hex_str::, space1), + terminated(hex_str::, space1), + terminated(decimal_u32, space1), + terminated(decimal_u32, my_eol), + ))(input)?; + Ok(( + input, + SourceLine { + address, + size, + file, + line, + }, + )) +} + +// Matches a FUNC record. +fn func_line(input: &[u8]) -> IResult<&[u8], Function> { + let (input, _) = terminated(tag("FUNC"), space1)(input)?; + let (input, (_multiple, address, size, parameter_size, name)) = cut(tuple(( + opt(terminated(tag("m"), space1)), + terminated(hex_str::, space1), + terminated(hex_str::, space1), + terminated(hex_str::, space1), + terminated(map_res(not_my_eol, str::from_utf8), my_eol), + )))(input)?; + Ok(( + input, + Function { + address, + size, + parameter_size, + name: name.to_string(), + lines: RangeMap::new(), + inlinees: Vec::new(), + }, + )) +} + +// Matches one entry of the form
which is used at the end of an INLINE record +fn inline_address_range(input: &[u8]) -> IResult<&[u8], (u64, u32)> { + tuple((terminated(hex_str::, space1), hex_str::))(input) +} + +// Matches an INLINE record. +/// +/// An INLINE record has the form `INLINE [
]+`. +fn inline_line(input: &[u8]) -> IResult<&[u8], impl Iterator> { + let (input, _) = terminated(tag("INLINE"), space1)(input)?; + let (input, (depth, call_line, call_file, origin_id)) = cut(tuple(( + terminated(decimal_u32, space1), + terminated(decimal_u32, space1), + terminated(decimal_u32, space1), + terminated(decimal_u32, space1), + )))(input)?; + let (input, address_ranges) = cut(terminated( + separated_list1(space1, inline_address_range), + my_eol, + ))(input)?; + Ok(( + input, + address_ranges + .into_iter() + .map(move |(address, size)| Inlinee { + address, + size, + call_file, + call_line, + depth, + origin_id, + }), + )) +} + +// Matches a STACK WIN record. +fn stack_win_line(input: &[u8]) -> IResult<&[u8], WinFrameType> { + let (input, _) = terminated(tag("STACK WIN"), space1)(input)?; + let ( + input, + ( + ty, + address, + code_size, + prologue_size, + epilogue_size, + parameter_size, + saved_register_size, + local_size, + max_stack_size, + has_program_string, + rest, + ), + ) = cut(tuple(( + terminated(single(is_hex_digit), space1), // ty + terminated(hex_str::, space1), // address + terminated(hex_str::, space1), // code_size + terminated(hex_str::, space1), // prologue_size + terminated(hex_str::, space1), // epilogue_size + terminated(hex_str::, space1), // parameter_size + terminated(hex_str::, space1), // saved_register_size + terminated(hex_str::, space1), // local_size + terminated(hex_str::, space1), // max_stack_size + terminated(map(single(is_digit), |b| b == b'1'), space1), // has_program_string + terminated(map_res(not_my_eol, str::from_utf8), my_eol), + )))(input)?; + + // Sometimes has_program_string is just wrong. We could try to infer which one is right + // but this is rare enough that it's better to just play it safe and discard the input. + let really_has_program_string = ty == b'4'; + if really_has_program_string != has_program_string { + let kind = match ty { + b'4' => "FrameData", + b'0' => "Fpo", + _ => "Unknown Type!", + }; + warn!("STACK WIN entry had inconsistent values for type and has_program_string, discarding corrupt entry"); + // warn!(" {}", &line); + warn!( + " type: {} ({}), has_program_string: {}, final_arg: {}", + str::from_utf8(&[ty]).unwrap_or(""), + kind, + has_program_string, + rest + ); + + return Ok((input, WinFrameType::Unhandled)); + } + + let program_string_or_base_pointer = if really_has_program_string { + WinStackThing::ProgramString(rest.to_string()) + } else { + WinStackThing::AllocatesBasePointer(rest == "1") + }; + let info = StackInfoWin { + address, + size: code_size, + prologue_size, + epilogue_size, + parameter_size, + saved_register_size, + local_size, + max_stack_size, + program_string_or_base_pointer, + }; + let frame_type = match ty { + b'4' => WinFrameType::FrameData(info), + b'0' => WinFrameType::Fpo(info), + _ => WinFrameType::Unhandled, + }; + Ok((input, frame_type)) +} + +// Matches a STACK CFI record. +fn stack_cfi(input: &[u8]) -> IResult<&[u8], CfiRules> { + let (input, _) = terminated(tag("STACK CFI"), space1)(input)?; + let (input, (address, rules)) = cut(tuple(( + terminated(hex_str::, space1), + terminated(map_res(not_my_eol, str::from_utf8), my_eol), + )))(input)?; + Ok(( + input, + CfiRules { + address, + rules: rules.to_string(), + }, + )) +} + +// Matches a STACK CFI INIT record. +fn stack_cfi_init(input: &[u8]) -> IResult<&[u8], StackInfoCfi> { + let (input, _) = terminated(tag("STACK CFI INIT"), space1)(input)?; + let (input, (address, size, rules)) = cut(tuple(( + terminated(hex_str::, space1), + terminated(hex_str::, space1), + terminated(map_res(not_my_eol, str::from_utf8), my_eol), + )))(input)?; + Ok(( + input, + StackInfoCfi { + init: CfiRules { + address, + rules: rules.to_string(), + }, + size, + add_rules: Default::default(), + }, + )) +} + +// Parse any of the line data that can occur in the body of a symbol file. +fn line(input: &[u8]) -> IResult<&[u8], Line> { + alt(( + map(info_url, Line::Info), + map(info_line, |_| Line::Info(Info::Unknown)), + map(file_line, |(i, f)| Line::File(i, f)), + map(inline_origin_line, |(i, f)| Line::InlineOrigin(i, f)), + map(public_line, Line::Public), + map(func_line, |f| Line::Function(f, Vec::new(), Vec::new())), + map(stack_win_line, Line::StackWin), + map(stack_cfi_init, Line::StackCfi), + map(module_line, |(p, a, i, f)| Line::Module(p, a, i, f)), + ))(input) +} + +/// A parser for SymbolFiles. +/// +/// This is basically just a SymbolFile but with some extra state +/// to handle streaming parsing. +/// +/// Use this by repeatedly calling [`parse_more`][] until the +/// whole input is consumed. Then call [`finish`][]. +#[derive(Debug, Default)] +pub struct SymbolParser { + module_id: String, + debug_file: String, + files: HashMap, + inline_origins: HashMap, + publics: Vec, + + // When building a RangeMap when need to sort an array of this + // format anyway, so we might as well construct it directly and + // save a giant allocation+copy. + functions: Vec<(Range, Function)>, + cfi_stack_info: Vec<(Range, StackInfoCfi)>, + win_stack_framedata_info: Vec<(Range, StackInfoWin)>, + win_stack_fpo_info: Vec<(Range, StackInfoWin)>, + url: Option, + pub lines: u64, + cur_item: Option, +} + +impl SymbolParser { + /// Creates a new SymbolParser. + pub fn new() -> Self { + Self::default() + } + + /// Parses as much of the input as it can, and then returns + /// how many bytes of the input was used. The *unused* portion of the + /// input must be resubmitted on subsequent calls to parse_more + /// (along with more data so we can make progress on the parse). + pub fn parse_more(&mut self, mut input: &[u8]) -> Result { + // We parse the input line-by-line, so trim away any part of the input + // that comes after the last newline (this is necessary for streaming + // parsing, as it can otherwise be impossible to tell if a line is + // truncated.) + input = if let Some(idx) = input.iter().rposition(|&x| x == b'\n') { + &input[..idx + 1] + } else { + // If there's no newline, then we can't process anything! + return Ok(0); + }; + // Remember the (truncated) input so that we can tell how many bytes + // we've consumed. + let orig_input = input; + + loop { + // If there's no more input, then we've consumed all of it + // (except for the partial line we trimmed away). + if input.is_empty() { + return Ok(orig_input.len()); + } + + // First check if we're currently processing sublines of a + // multi-line item like `FUNC` and `STACK CFI INIT`. + // If we are, parse the next line as its subline format. + // + // If we encounter an error, this probably means we've + // reached a new item (which ends this one). To handle this, + // we can just finish off the current item and resubmit this + // line to the top-level parser (below). If the line is + // genuinely corrupt, then the top-level parser will also + // fail to read it. + // + // We `take` and then reconstitute the item for borrowing/move + // reasons. + match self.cur_item.take() { + Some(Line::Function(cur, mut lines, mut inlinees)) => { + match self.parse_func_subline(input, &mut lines, &mut inlinees) { + Ok((new_input, ())) => { + input = new_input; + self.cur_item = Some(Line::Function(cur, lines, inlinees)); + self.lines += 1; + continue; + } + Err(_) => { + self.finish_item(Line::Function(cur, lines, inlinees)); + continue; + } + } + } + Some(Line::StackCfi(mut cur)) => match stack_cfi(input) { + Ok((new_input, line)) => { + cur.add_rules.push(line); + input = new_input; + self.cur_item = Some(Line::StackCfi(cur)); + self.lines += 1; + continue; + } + Err(_) => { + self.finish_item(Line::StackCfi(cur)); + continue; + } + }, + _ => { + // We're not parsing sublines, move on to top level parser! + } + } + + // Ignore empty lines + if let Ok((new_input, _)) = my_eol(input) { + input = new_input; + self.lines += 1; + continue; + } + + // Parse a top-level item, and first handle the Result + let line = match line(input) { + Ok((new_input, line)) => { + // Success! Advance the input. + input = new_input; + line + } + Err(_) => { + // The file has a completely corrupt line, + // conservatively reject the entire parse. + return Err(SymbolError::ParseError("failed to parse file", self.lines)); + } + }; + + // Now store the item in our partial SymbolFile (or make it the cur_item + // if it has potential sublines we need to parse first). + match line { + Line::Module(_platform, _arch, module_id, debug_file) => { + // We don't use this but it MUST be the first line + if self.lines != 0 { + return Err(SymbolError::ParseError( + "MODULE line found after the start of the file", + self.lines, + )); + } + self.module_id = module_id; + self.debug_file = debug_file; + } + Line::Info(Info::Url(cached_url)) => { + self.url = Some(cached_url); + } + Line::Info(Info::Unknown) => { + // Don't care + } + Line::File(id, filename) => { + self.files.insert(id, filename.to_string()); + } + Line::InlineOrigin(id, function) => { + self.inline_origins.insert(id, function.to_string()); + } + Line::Public(p) => { + self.publics.push(p); + } + Line::StackWin(frame_type) => { + // PDB files contain lots of overlapping unwind info, so we have to filter + // some of it out. + fn insert_win_stack_info( + stack_win: &mut Vec<(Range, StackInfoWin)>, + info: StackInfoWin, + ) { + if let Some(memory_range) = info.memory_range() { + if let Some((last_range, last_info)) = stack_win.last_mut() { + if last_range.intersects(&memory_range) { + if info.address > last_info.address { + // Sometimes we get STACK WIN directives where each line + // has an accurate starting point, but the length just + // covers the entire function, like so: + // + // addr: 0, len: 10 + // addr: 1, len: 9 + // addr: 4, len: 6 + // + // In this case, the next instruction is the one that + // really defines the length of the previous one. So + // we need to fixup the lengths like so: + // + // addr: 0, len: 1 + // addr: 1, len: 2 + // addr: 4, len: 6 + last_info.size = (info.address - last_info.address) as u32; + *last_range = last_info.memory_range().unwrap(); + } else if *last_range != memory_range { + // We silently drop identical ranges because sometimes + // duplicates happen, but we complain for non-trivial duplicates. + warn!( + "STACK WIN entry had bad intersections, dropping it {:?}", + info + ); + return; + } + } + } + stack_win.push((memory_range, info)); + } else { + warn!("STACK WIN entry had invalid range, dropping it {:?}", info); + } + } + match frame_type { + WinFrameType::FrameData(s) => { + insert_win_stack_info(&mut self.win_stack_framedata_info, s); + } + WinFrameType::Fpo(s) => { + insert_win_stack_info(&mut self.win_stack_fpo_info, s); + } + // Just ignore other types. + _ => {} + } + } + item @ Line::Function(_, _, _) => { + // More sublines to parse + self.cur_item = Some(item); + } + item @ Line::StackCfi(_) => { + // More sublines to parse + self.cur_item = Some(item); + } + } + + // Make note that we've consumed a line of input. + self.lines += 1; + } + } + + /// Parses a single line which is following a FUNC line. + fn parse_func_subline<'a>( + &mut self, + input: &'a [u8], + lines: &mut Vec, + inlinees: &mut Vec, + ) -> IResult<&'a [u8], ()> { + // We can have three different types of sublines: INLINE_ORIGIN, INLINE, or line records. + // Check them one by one. + // We're not using nom's `alt()` here because we'd need to find a common return type. + if input.starts_with(b"INLINE_ORIGIN ") { + let (input, (id, function)) = inline_origin_line(input)?; + self.inline_origins.insert(id, function); + return Ok((input, ())); + } + if input.starts_with(b"INLINE ") { + let (input, new_inlinees) = inline_line(input)?; + inlinees.extend(new_inlinees); + return Ok((input, ())); + } + let (input, line) = func_line_data(input)?; + lines.push(line); + Ok((input, ())) + } + + /// Finish processing an item (cur_item) which had sublines. + /// We now have all the sublines, so it's complete. + fn finish_item(&mut self, item: Line) { + match item { + Line::Function(mut cur, lines, mut inlinees) => { + cur.lines = lines + .into_iter() + // Line data from PDB files often has a zero-size line entry, so just + // filter those out. + .filter(|l| l.size > 0) + .map(|l| { + let end_address = l.address.checked_add(l.size as u64 - 1); + let range = end_address.map(|end| Range::new(l.address, end)); + (range, l) + }) + .into_rangemap_safe(); + + inlinees.sort(); + cur.inlinees = inlinees; + + if let Some(range) = cur.memory_range() { + self.functions.push((range, cur)); + } + } + Line::StackCfi(mut cur) => { + cur.add_rules.sort(); + if let Some(range) = cur.memory_range() { + self.cfi_stack_info.push((range, cur)); + } + } + _ => { + unreachable!() + } + } + } + + /// Finish the parse and create the final SymbolFile. + /// + /// Call this when the parser has consumed all the input. + pub fn finish(mut self) -> SymbolFile { + // If there's a pending multiline item, finish it now. + if let Some(item) = self.cur_item.take() { + self.finish_item(item); + } + + // Now sort everything and bundle it up in its final format. + self.publics.sort(); + + SymbolFile { + module_id: self.module_id, + debug_file: self.debug_file, + files: self.files, + publics: self.publics, + functions: into_rangemap_safe(self.functions), + inline_origins: self.inline_origins, + cfi_stack_info: into_rangemap_safe(self.cfi_stack_info), + win_stack_framedata_info: into_rangemap_safe(self.win_stack_framedata_info), + win_stack_fpo_info: into_rangemap_safe(self.win_stack_fpo_info), + // Will get filled in by the caller + url: self.url, + ambiguities_repaired: 0, + ambiguities_discarded: 0, + corruptions_discarded: 0, + cfi_eval_corruptions: 0, + } + } +} + +// Copied from minidump-common, because we've preconstructed the array to sort. +fn into_rangemap_safe(mut input: Vec<(Range, V)>) -> RangeMap { + input.sort_by_key(|x| x.0); + let mut vec: Vec<(Range, V)> = Vec::with_capacity(input.len()); + for (range, val) in input { + if let Some((last_range, last_val)) = vec.last_mut() { + if range.start <= last_range.end && val != *last_val { + continue; + } + + if range.start <= last_range.end.saturating_add(1) && &val == last_val { + last_range.end = std::cmp::max(range.end, last_range.end); + continue; + } + } + vec.push((range, val)); + } + RangeMap::try_from_iter(vec).unwrap() +} + +#[cfg(test)] +fn parse_symbol_bytes(data: &[u8]) -> Result { + SymbolFile::parse(data, |_| ()) +} + +#[test] +fn test_module_line() { + let line = b"MODULE Linux x86 D3096ED481217FD4C16B29CD9BC208BA0 firefox-bin\n"; + let rest = &b""[..]; + assert_eq!( + module_line(line), + Ok(( + rest, + ( + "Linux".to_string(), + "x86".to_string(), + "D3096ED481217FD4C16B29CD9BC208BA0".to_string(), + "firefox-bin".to_string() + ) + )) + ); +} + +#[test] +fn test_module_line_filename_spaces() { + let line = b"MODULE Windows x86_64 D3096ED481217FD4C16B29CD9BC208BA0 firefox x y z\n"; + let rest = &b""[..]; + assert_eq!( + module_line(line), + Ok(( + rest, + ( + "Windows".to_string(), + "x86_64".to_string(), + "D3096ED481217FD4C16B29CD9BC208BA0".to_string(), + "firefox x y z".to_string() + ) + )) + ); +} + +/// Sometimes dump_syms on Windows does weird things and produces multiple carriage returns +/// before the line feed. +#[test] +fn test_module_line_crcrlf() { + let line = b"MODULE Windows x86_64 D3096ED481217FD4C16B29CD9BC208BA0 firefox\r\r\n"; + let rest = &b""[..]; + assert_eq!( + module_line(line), + Ok(( + rest, + ( + "Windows".to_string(), + "x86_64".to_string(), + "D3096ED481217FD4C16B29CD9BC208BA0".to_string(), + "firefox".to_string() + ) + )) + ); +} + +#[test] +fn test_info_line() { + let line = b"INFO blah blah blah\n"; + let bits = &b"blah blah blah"[..]; + let rest = &b""[..]; + assert_eq!(info_line(line), Ok((rest, bits))); +} + +#[test] +fn test_info_line2() { + let line = b"INFO CODE_ID abc xyz\n"; + let bits = &b"CODE_ID abc xyz"[..]; + let rest = &b""[..]; + assert_eq!(info_line(line), Ok((rest, bits))); +} + +#[test] +fn test_info_url() { + let line = b"INFO URL https://www.example.com\n"; + let url = "https://www.example.com".to_string(); + let rest = &b""[..]; + assert_eq!(info_url(line), Ok((rest, Info::Url(url)))); +} + +#[test] +fn test_file_line() { + let line = b"FILE 1 foo.c\n"; + let rest = &b""[..]; + assert_eq!(file_line(line), Ok((rest, (1, String::from("foo.c"))))); +} + +#[test] +fn test_file_line_spaces() { + let line = b"FILE 1234 foo bar.xyz\n"; + let rest = &b""[..]; + assert_eq!( + file_line(line), + Ok((rest, (1234, String::from("foo bar.xyz")))) + ); +} + +#[test] +fn test_public_line() { + let line = b"PUBLIC f00d d00d some func\n"; + let rest = &b""[..]; + assert_eq!( + public_line(line), + Ok(( + rest, + PublicSymbol { + address: 0xf00d, + parameter_size: 0xd00d, + name: "some func".to_string(), + } + )) + ); +} + +#[test] +fn test_public_with_m() { + let line = b"PUBLIC m f00d d00d some func\n"; + let rest = &b""[..]; + assert_eq!( + public_line(line), + Ok(( + rest, + PublicSymbol { + address: 0xf00d, + parameter_size: 0xd00d, + name: "some func".to_string(), + } + )) + ); +} + +#[test] +fn test_func_lines_no_lines() { + use range_map::RangeMap; + let line = b"FUNC c184 30 0 nsQueryInterfaceWithError::operator()(nsID const&, void**) const\n"; + let rest = &b""[..]; + assert_eq!( + func_line(line), + Ok(( + rest, + Function { + address: 0xc184, + size: 0x30, + parameter_size: 0, + name: "nsQueryInterfaceWithError::operator()(nsID const&, void**) const" + .to_string(), + lines: RangeMap::new(), + inlinees: Vec::new(), + } + )) + ); +} + +#[test] +fn test_truncated_func() { + let line = b"FUNC 1000\n1000 10 42 7\n"; + assert_eq!( + func_line(line), + Err(Err::Failure(Error { + input: &line[9..], + code: ErrorKind::Space + })) + ); +} + +#[test] +fn test_inline_line_single_range() { + let line = b"INLINE 0 3082 52 1410 49200 10\n"; + assert_eq!( + inline_line(line).unwrap().1.collect::>(), + vec![Inlinee { + depth: 0, + address: 0x49200, + size: 0x10, + call_file: 52, + call_line: 3082, + origin_id: 1410 + }] + ) +} + +#[test] +fn test_inline_line_multiple_ranges() { + let line = b"INLINE 6 642 8 207 8b110 18 8b154 18\n"; + assert_eq!( + inline_line(line).unwrap().1.collect::>(), + vec![ + Inlinee { + depth: 6, + address: 0x8b110, + size: 0x18, + call_file: 8, + call_line: 642, + origin_id: 207 + }, + Inlinee { + depth: 6, + address: 0x8b154, + size: 0x18, + call_file: 8, + call_line: 642, + origin_id: 207 + } + ] + ) +} + +#[test] +fn test_func_lines_and_lines() { + let data = b"FUNC 1000 30 10 some func +1000 10 42 7 +INLINE_ORIGIN 16 inlined_function_name() +1010 10 52 8 +INLINE 0 23 9 16 1020 10 +1020 10 62 15 +"; + let file = SymbolFile::from_bytes(data).expect("failed to parse!"); + let (_, f) = file.functions.ranges_values().next().unwrap(); + assert_eq!(f.address, 0x1000); + assert_eq!(f.size, 0x30); + assert_eq!(f.parameter_size, 0x10); + assert_eq!(f.name, "some func".to_string()); + assert_eq!( + f.lines.get(0x1000).unwrap(), + &SourceLine { + address: 0x1000, + size: 0x10, + file: 7, + line: 42, + } + ); + assert_eq!( + f.lines.ranges_values().collect::>(), + vec![ + &( + Range::::new(0x1000, 0x100F), + SourceLine { + address: 0x1000, + size: 0x10, + file: 7, + line: 42, + }, + ), + &( + Range::::new(0x1010, 0x101F), + SourceLine { + address: 0x1010, + size: 0x10, + file: 8, + line: 52, + }, + ), + &( + Range::::new(0x1020, 0x102F), + SourceLine { + address: 0x1020, + size: 0x10, + file: 15, + line: 62, + }, + ), + ] + ); + assert_eq!( + f.inlinees, + vec![Inlinee { + depth: 0, + address: 0x1020, + size: 0x10, + call_file: 9, + call_line: 23, + origin_id: 16 + }] + ); +} + +#[test] +fn test_nested_inlines() { + // 0x1000: outer_func() @ :60 -> mid_func() @ :12 -> inner_func1() :42 + // 0x1010: outer_func() @ :60 -> mid_func() @ :17 -> inner_func2() :52 + // 0x1020: outer_func() @ :62 + let data = b"FUNC 1000 30 10 outer_func() +INLINE_ORIGIN 1 inner_func_2() +INLINE_ORIGIN 2 mid_func() +INLINE_ORIGIN 3 inner_func_1() +INLINE 0 60 15 2 1000 20 +INLINE 1 12 4 3 1000 10 +INLINE 1 17 4 1 1010 10 +1000 10 42 7 +1010 10 52 8 +1020 10 62 15 +"; + let file = SymbolFile::from_bytes(data).expect("failed to parse!"); + let (_, f) = file.functions.ranges_values().next().unwrap(); + assert_eq!(f.address, 0x1000); + assert_eq!(f.size, 0x30); + assert_eq!(f.parameter_size, 0x10); + assert_eq!(f.name, "outer_func()".to_string()); + + // Check the source locations at the "outermost" level, i.e. the line + // numbers inside the "outer_func()" function. This function has its + // code in file 15, so all source locations at this level should be + // in that file. + assert_eq!(f.get_outermost_sourceloc(0x0fff), None); + assert_eq!( + f.get_outermost_sourceloc(0x1000), + Some((15, 60, 0x1000, Some(2))) + ); + assert_eq!( + f.get_outermost_sourceloc(0x100f), + Some((15, 60, 0x1000, Some(2))) + ); + assert_eq!( + f.get_outermost_sourceloc(0x1010), + Some((15, 60, 0x1000, Some(2))) + ); + assert_eq!( + f.get_outermost_sourceloc(0x101f), + Some((15, 60, 0x1000, Some(2))) + ); + assert_eq!( + f.get_outermost_sourceloc(0x1020), + Some((15, 62, 0x1020, None)) + ); + assert_eq!( + f.get_outermost_sourceloc(0x102f), + Some((15, 62, 0x1020, None)) + ); + assert_eq!(f.get_outermost_sourceloc(0x1030), None); + + // Check the first level of inlining. There is only one inlined call + // at this level, the call from outer_func() to mid_func(), spanning + // the range 0x1000..0x1020. + assert_eq!(f.get_inlinee_at_depth(0, 0x0fff), None); + assert_eq!(f.get_inlinee_at_depth(0, 0x1000), Some((15, 60, 0x1000, 2))); + assert_eq!(f.get_inlinee_at_depth(0, 0x100f), Some((15, 60, 0x1000, 2))); + assert_eq!(f.get_inlinee_at_depth(0, 0x1010), Some((15, 60, 0x1000, 2))); + assert_eq!(f.get_inlinee_at_depth(0, 0x101f), Some((15, 60, 0x1000, 2))); + assert_eq!(f.get_inlinee_at_depth(0, 0x1020), None); + assert_eq!(f.get_inlinee_at_depth(0, 0x102f), None); + assert_eq!(f.get_inlinee_at_depth(0, 0x1030), None); + + // Check the second level of inlining. Two function calls from mid_func() + // have been inlined at this level, the call to inner_func_1() and the + // call to inner_func_2(). + // The code for mid_func() is in file 4, so the location of the calls to + // inner_func_1() and inner_func_2() are in file 4. + assert_eq!(f.get_inlinee_at_depth(1, 0x0fff), None); + assert_eq!(f.get_inlinee_at_depth(1, 0x1000), Some((4, 12, 0x1000, 3))); + assert_eq!(f.get_inlinee_at_depth(1, 0x100f), Some((4, 12, 0x1000, 3))); + assert_eq!(f.get_inlinee_at_depth(1, 0x1010), Some((4, 17, 0x1010, 1))); + assert_eq!(f.get_inlinee_at_depth(1, 0x101f), Some((4, 17, 0x1010, 1))); + assert_eq!(f.get_inlinee_at_depth(1, 0x1020), None); + assert_eq!(f.get_inlinee_at_depth(1, 0x102f), None); + assert_eq!(f.get_inlinee_at_depth(1, 0x1030), None); + + // Check that there are no deeper inline calls. + assert_eq!(f.get_inlinee_at_depth(2, 0x0fff), None); + assert_eq!(f.get_inlinee_at_depth(2, 0x1000), None); + assert_eq!(f.get_inlinee_at_depth(2, 0x100f), None); + assert_eq!(f.get_inlinee_at_depth(2, 0x1010), None); + assert_eq!(f.get_inlinee_at_depth(2, 0x101f), None); + assert_eq!(f.get_inlinee_at_depth(2, 0x1020), None); + assert_eq!(f.get_inlinee_at_depth(2, 0x102f), None); + assert_eq!(f.get_inlinee_at_depth(2, 0x1030), None); + + // Check the "innermost" source locations. These locations describe the + // file and line at the deepest level of inlining at the given address. + // We have a location in inner_func_1() (whose code is in file 7), a location + // in inner_func_2() (whose code is in file 8), and a location in the outer + // function outer_func() (whose code is in file 15). + assert_eq!(f.get_innermost_sourceloc(0x0fff), None); + assert_eq!(f.get_innermost_sourceloc(0x1000), Some((7, 42, 0x1000))); + assert_eq!(f.get_innermost_sourceloc(0x100f), Some((7, 42, 0x1000))); + assert_eq!(f.get_innermost_sourceloc(0x1010), Some((8, 52, 0x1010))); + assert_eq!(f.get_innermost_sourceloc(0x101f), Some((8, 52, 0x1010))); + assert_eq!(f.get_innermost_sourceloc(0x1020), Some((15, 62, 0x1020))); + assert_eq!(f.get_innermost_sourceloc(0x102f), Some((15, 62, 0x1020))); + assert_eq!(f.get_innermost_sourceloc(0x1030), None); +} + +#[test] +fn test_func_with_m() { + let data = b"FUNC m 1000 30 10 some func +1000 10 42 7 +1010 10 52 8 +1020 10 62 15 +"; + let file = SymbolFile::from_bytes(data).expect("failed to parse!"); + let (_, _f) = file.functions.ranges_values().next().unwrap(); +} + +#[test] +fn test_stack_win_line_program_string() { + let line = + b"STACK WIN 4 2170 14 a1 b2 c3 d4 e5 f6 1 $eip 4 + ^ = $esp $ebp 8 + = $ebp $ebp ^ =\n"; + match stack_win_line(line) { + Ok((rest, WinFrameType::FrameData(stack))) => { + assert_eq!(rest, &b""[..]); + assert_eq!(stack.address, 0x2170); + assert_eq!(stack.size, 0x14); + assert_eq!(stack.prologue_size, 0xa1); + assert_eq!(stack.epilogue_size, 0xb2); + assert_eq!(stack.parameter_size, 0xc3); + assert_eq!(stack.saved_register_size, 0xd4); + assert_eq!(stack.local_size, 0xe5); + assert_eq!(stack.max_stack_size, 0xf6); + assert_eq!( + stack.program_string_or_base_pointer, + WinStackThing::ProgramString( + "$eip 4 + ^ = $esp $ebp 8 + = $ebp $ebp ^ =".to_string() + ) + ); + } + Err(e) => panic!("{}", format!("Parse error: {e:?}")), + _ => panic!("Something bad happened"), + } +} + +#[test] +fn test_stack_win_line_frame_data() { + let line = b"STACK WIN 0 1000 30 a1 b2 c3 d4 e5 f6 0 1\n"; + match stack_win_line(line) { + Ok((rest, WinFrameType::Fpo(stack))) => { + assert_eq!(rest, &b""[..]); + assert_eq!(stack.address, 0x1000); + assert_eq!(stack.size, 0x30); + assert_eq!(stack.prologue_size, 0xa1); + assert_eq!(stack.epilogue_size, 0xb2); + assert_eq!(stack.parameter_size, 0xc3); + assert_eq!(stack.saved_register_size, 0xd4); + assert_eq!(stack.local_size, 0xe5); + assert_eq!(stack.max_stack_size, 0xf6); + assert_eq!( + stack.program_string_or_base_pointer, + WinStackThing::AllocatesBasePointer(true) + ); + } + Err(e) => panic!("{}", format!("Parse error: {e:?}")), + _ => panic!("Something bad happened"), + } +} + +#[test] +fn test_stack_cfi() { + let line = b"STACK CFI deadf00d some rules\n"; + let rest = &b""[..]; + assert_eq!( + stack_cfi(line), + Ok(( + rest, + CfiRules { + address: 0xdeadf00d, + rules: "some rules".to_string(), + } + )) + ); +} + +#[test] +fn test_stack_cfi_init() { + let line = b"STACK CFI INIT badf00d abc init rules\n"; + let rest = &b""[..]; + assert_eq!( + stack_cfi_init(line), + Ok(( + rest, + StackInfoCfi { + init: CfiRules { + address: 0xbadf00d, + rules: "init rules".to_string(), + }, + size: 0xabc, + add_rules: vec![], + } + )) + ); +} + +#[test] +fn test_stack_cfi_lines() { + let data = b"STACK CFI INIT badf00d abc init rules +STACK CFI deadf00d some rules +STACK CFI deadbeef more rules + +"; + let file = SymbolFile::from_bytes(data).expect("failed to parse!"); + let (_, cfi) = file.cfi_stack_info.ranges_values().next().unwrap(); + assert_eq!( + cfi, + &StackInfoCfi { + init: CfiRules { + address: 0xbadf00d, + rules: "init rules".to_string(), + }, + size: 0xabc, + add_rules: vec![ + CfiRules { + address: 0xdeadbeef, + rules: "more rules".to_string(), + }, + CfiRules { + address: 0xdeadf00d, + rules: "some rules".to_string(), + }, + ], + } + ); +} + +#[test] +fn test_parse_symbol_bytes() { + let bytes = &b"MODULE Linux x86 D3096ED481217FD4C16B29CD9BC208BA0 firefox-bin +INFO blah blah blah +FILE 0 foo.c +FILE 100 bar.c +PUBLIC abcd 10 func 1 +PUBLIC ff00 3 func 2 +FUNC 900 30 10 some other func +FUNC 1000 30 10 some func +1000 10 42 7 +1010 10 52 8 +1020 10 62 15 +FUNC 1100 30 10 a third func +STACK WIN 4 900 30 a1 b2 c3 d4 e5 f6 1 prog string +STACK WIN 0 1000 30 a1 b2 c3 d4 e5 f6 0 1 +STACK CFI INIT badf00d abc init rules +STACK CFI deadf00d some rules +STACK CFI deadbeef more rules +STACK CFI INIT f00f f0 more init rules + +"[..]; + let sym = parse_symbol_bytes(bytes).unwrap(); + assert_eq!(sym.files.len(), 2); + assert_eq!(sym.files.get(&0).unwrap(), "foo.c"); + assert_eq!(sym.files.get(&100).unwrap(), "bar.c"); + assert_eq!(sym.publics.len(), 2); + { + let p = &sym.publics[0]; + assert_eq!(p.address, 0xabcd); + assert_eq!(p.parameter_size, 0x10); + assert_eq!(p.name, "func 1".to_string()); + } + { + let p = &sym.publics[1]; + assert_eq!(p.address, 0xff00); + assert_eq!(p.parameter_size, 0x3); + assert_eq!(p.name, "func 2".to_string()); + } + assert_eq!(sym.functions.ranges_values().count(), 3); + let funcs = sym + .functions + .ranges_values() + .map(|(_, f)| f) + .collect::>(); + { + let f = &funcs[0]; + assert_eq!(f.address, 0x900); + assert_eq!(f.size, 0x30); + assert_eq!(f.parameter_size, 0x10); + assert_eq!(f.name, "some other func".to_string()); + assert_eq!(f.lines.ranges_values().count(), 0); + } + { + let f = &funcs[1]; + assert_eq!(f.address, 0x1000); + assert_eq!(f.size, 0x30); + assert_eq!(f.parameter_size, 0x10); + assert_eq!(f.name, "some func".to_string()); + assert_eq!( + f.lines.ranges_values().collect::>(), + vec![ + &( + Range::new(0x1000, 0x100F), + SourceLine { + address: 0x1000, + size: 0x10, + file: 7, + line: 42, + }, + ), + &( + Range::new(0x1010, 0x101F), + SourceLine { + address: 0x1010, + size: 0x10, + file: 8, + line: 52, + }, + ), + &( + Range::new(0x1020, 0x102F), + SourceLine { + address: 0x1020, + size: 0x10, + file: 15, + line: 62, + }, + ), + ] + ); + } + { + let f = &funcs[2]; + assert_eq!(f.address, 0x1100); + assert_eq!(f.size, 0x30); + assert_eq!(f.parameter_size, 0x10); + assert_eq!(f.name, "a third func".to_string()); + assert_eq!(f.lines.ranges_values().count(), 0); + } + assert_eq!(sym.win_stack_framedata_info.ranges_values().count(), 1); + let ws = sym + .win_stack_framedata_info + .ranges_values() + .map(|(_, s)| s) + .collect::>(); + { + let stack = &ws[0]; + assert_eq!(stack.address, 0x900); + assert_eq!(stack.size, 0x30); + assert_eq!(stack.prologue_size, 0xa1); + assert_eq!(stack.epilogue_size, 0xb2); + assert_eq!(stack.parameter_size, 0xc3); + assert_eq!(stack.saved_register_size, 0xd4); + assert_eq!(stack.local_size, 0xe5); + assert_eq!(stack.max_stack_size, 0xf6); + assert_eq!( + stack.program_string_or_base_pointer, + WinStackThing::ProgramString("prog string".to_string()) + ); + } + assert_eq!(sym.win_stack_fpo_info.ranges_values().count(), 1); + let ws = sym + .win_stack_fpo_info + .ranges_values() + .map(|(_, s)| s) + .collect::>(); + { + let stack = &ws[0]; + assert_eq!(stack.address, 0x1000); + assert_eq!(stack.size, 0x30); + assert_eq!(stack.prologue_size, 0xa1); + assert_eq!(stack.epilogue_size, 0xb2); + assert_eq!(stack.parameter_size, 0xc3); + assert_eq!(stack.saved_register_size, 0xd4); + assert_eq!(stack.local_size, 0xe5); + assert_eq!(stack.max_stack_size, 0xf6); + assert_eq!( + stack.program_string_or_base_pointer, + WinStackThing::AllocatesBasePointer(true) + ); + } + assert_eq!(sym.cfi_stack_info.ranges_values().count(), 2); + let cs = sym + .cfi_stack_info + .ranges_values() + .map(|(_, s)| s.clone()) + .collect::>(); + assert_eq!( + cs[0], + StackInfoCfi { + init: CfiRules { + address: 0xf00f, + rules: "more init rules".to_string(), + }, + size: 0xf0, + add_rules: vec![], + } + ); + assert_eq!( + cs[1], + StackInfoCfi { + init: CfiRules { + address: 0xbadf00d, + rules: "init rules".to_string(), + }, + size: 0xabc, + add_rules: vec![ + CfiRules { + address: 0xdeadbeef, + rules: "more rules".to_string(), + }, + CfiRules { + address: 0xdeadf00d, + rules: "some rules".to_string(), + }, + ], + } + ); +} + +/// Test that parsing a symbol file with overlapping FUNC/line data works. +#[test] +fn test_parse_with_overlap() { + //TODO: deal with duplicate PUBLIC records? Not as important since they don't go + // into a RangeMap. + let bytes = b"MODULE Linux x86 D3096ED481217FD4C16B29CD9BC208BA0 firefox-bin +FILE 0 foo.c +PUBLIC abcd 10 func 1 +PUBLIC ff00 3 func 2 +FUNC 1000 30 10 some func +1000 10 42 0 +1000 10 43 0 +1001 10 44 0 +1001 5 45 0 +1010 10 52 0 +FUNC 1000 30 10 some func overlap exact +FUNC 1001 30 10 some func overlap end +FUNC 1001 10 10 some func overlap contained +"; + let sym = parse_symbol_bytes(&bytes[..]).unwrap(); + assert_eq!(sym.publics.len(), 2); + { + let p = &sym.publics[0]; + assert_eq!(p.address, 0xabcd); + assert_eq!(p.parameter_size, 0x10); + assert_eq!(p.name, "func 1".to_string()); + } + { + let p = &sym.publics[1]; + assert_eq!(p.address, 0xff00); + assert_eq!(p.parameter_size, 0x3); + assert_eq!(p.name, "func 2".to_string()); + } + assert_eq!(sym.functions.ranges_values().count(), 1); + let funcs = sym + .functions + .ranges_values() + .map(|(_, f)| f) + .collect::>(); + { + let f = &funcs[0]; + assert_eq!(f.address, 0x1000); + assert_eq!(f.size, 0x30); + assert_eq!(f.parameter_size, 0x10); + assert_eq!(f.name, "some func".to_string()); + assert_eq!( + f.lines.ranges_values().collect::>(), + vec![ + &( + Range::new(0x1000, 0x100F), + SourceLine { + address: 0x1000, + size: 0x10, + file: 0, + line: 42, + }, + ), + &( + Range::new(0x1010, 0x101F), + SourceLine { + address: 0x1010, + size: 0x10, + file: 0, + line: 52, + }, + ), + ] + ); + } +} + +#[test] +fn test_parse_symbol_bytes_malformed() { + assert!( + parse_symbol_bytes(&b"this is not a symbol file\n"[..]).is_err(), + "Should fail to parse junk" + ); + + assert!( + parse_symbol_bytes( + &b"MODULE Linux x86 xxxxxx +FILE 0 foo.c +"[..] + ) + .is_err(), + "Should fail to parse malformed MODULE line" + ); + + assert!( + parse_symbol_bytes( + &b"MODULE Linux x86 abcd1234 foo +FILE x foo.c +"[..] + ) + .is_err(), + "Should fail to parse malformed FILE line" + ); + + assert!( + parse_symbol_bytes( + &b"MODULE Linux x86 abcd1234 foo +FUNC xx 1 2 foo +"[..] + ) + .is_err(), + "Should fail to parse malformed FUNC line" + ); + + assert!( + parse_symbol_bytes( + &b"MODULE Linux x86 abcd1234 foo +this is some junk +"[..] + ) + .is_err(), + "Should fail to parse malformed file" + ); + + assert!( + parse_symbol_bytes( + &b"MODULE Linux x86 abcd1234 foo +FILE 0 foo.c +FILE"[..] + ) + .is_err(), + "Should fail to parse truncated file" + ); + + assert!( + parse_symbol_bytes(&b""[..]).is_err(), + "Should fail to parse empty file" + ); +} + +#[test] +fn test_parse_stack_win_inconsistent() { + // Various cases where the has_program_string value is inconsistent + // with the type of the STACK WIN entry. + // + // type=0 (FPO) should go with has_program_string==0 (false) + // type=4 (FrameData) should go with has_program_string==1 (true) + // + // Only 4d93e and 8d93e are totally valid. + // + // Current policy is to discard all the other ones, but all the cases + // are here in case we decide on a more sophisticated heuristic. + + let bytes = b"MODULE Windows x86 D3096ED481217FD4C16B29CD9BC208BA0 firefox-bin +FILE 0 foo.c +STACK WIN 0 1d93e 4 4 0 0 10 0 0 1 1 +STACK WIN 0 2d93e 4 4 0 0 10 0 0 1 0 +STACK WIN 0 3d93e 4 4 0 0 10 0 0 1 prog string +STACK WIN 0 4d93e 4 4 0 0 10 0 0 0 1 +STACK WIN 4 5d93e 4 4 0 0 10 0 0 0 1 +STACK WIN 4 6d93e 4 4 0 0 10 0 0 0 0 +STACK WIN 4 7d93e 4 4 0 0 10 0 0 0 prog string +STACK WIN 4 8d93e 4 4 0 0 10 0 0 1 prog string +"; + let sym = parse_symbol_bytes(&bytes[..]).unwrap(); + + assert_eq!(sym.win_stack_framedata_info.ranges_values().count(), 1); + let ws = sym + .win_stack_framedata_info + .ranges_values() + .map(|(_, s)| s) + .collect::>(); + { + let stack = &ws[0]; + assert_eq!(stack.address, 0x8d93e); + assert_eq!(stack.size, 0x4); + assert_eq!(stack.prologue_size, 0x4); + assert_eq!(stack.epilogue_size, 0); + assert_eq!(stack.parameter_size, 0); + assert_eq!(stack.saved_register_size, 0x10); + assert_eq!(stack.local_size, 0); + assert_eq!(stack.max_stack_size, 0); + assert_eq!( + stack.program_string_or_base_pointer, + WinStackThing::ProgramString("prog string".to_string()) + ); + } + assert_eq!(sym.win_stack_fpo_info.ranges_values().count(), 1); + let ws = sym + .win_stack_fpo_info + .ranges_values() + .map(|(_, s)| s) + .collect::>(); + { + let stack = &ws[0]; + assert_eq!(stack.address, 0x4d93e); + assert_eq!(stack.size, 0x4); + assert_eq!(stack.prologue_size, 0x4); + assert_eq!(stack.epilogue_size, 0); + assert_eq!(stack.parameter_size, 0); + assert_eq!(stack.saved_register_size, 0x10); + assert_eq!(stack.local_size, 0); + assert_eq!(stack.max_stack_size, 0); + assert_eq!( + stack.program_string_or_base_pointer, + WinStackThing::AllocatesBasePointer(true) + ); + } +} + +#[test] +fn address_size_overflow() { + let bytes = b"FUNC 1 2 3 x\nffffffffffffffff 2 0 0\n"; + let sym = parse_symbol_bytes(bytes.as_slice()).unwrap(); + let fun = sym.functions.get(1).unwrap(); + assert!(fun.lines.is_empty()); + assert!(fun.name == "x"); +} diff --git a/third_party/rust/breakpad-symbols/src/sym_file/types.rs b/third_party/rust/breakpad-symbols/src/sym_file/types.rs new file mode 100644 index 000000000000..15d688c0ad77 --- /dev/null +++ b/third_party/rust/breakpad-symbols/src/sym_file/types.rs @@ -0,0 +1,315 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +use range_map::{Range, RangeMap}; +use std::collections::HashMap; + +/// A publicly visible linker symbol. +#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)] +pub struct PublicSymbol { + /// The symbol's address relative to the module's load address. + /// + /// This field is declared first so that the derived Ord implementation sorts + /// by address first. We take advantage of the sort order during address lookup. + pub address: u64, + /// The name of the symbol. + pub name: String, + /// The size of parameters passed to the function. + pub parameter_size: u32, +} + +/// A mapping from machine code bytes to source line and file. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct SourceLine { + /// The start address relative to the module's load address. + pub address: u64, + /// The size of this range of instructions in bytes. + pub size: u32, + /// The source file name that generated this machine code. + /// + /// This is an index into `SymbolFile::files`. + pub file: u32, + /// The line number in `file` that generated this machine code. + pub line: u32, +} + +/// A single range which is covered by an inlined function call. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Inlinee { + /// The depth of the inline call. + pub depth: u32, + /// The start address relative to the module's load address. + pub address: u64, + /// The size of this range of instructions in bytes. + pub size: u32, + /// The source file which contains the function call. + /// + /// This is an index into `SymbolFile::files`. + pub call_file: u32, + /// The line number in `call_file` for the function call. + pub call_line: u32, + /// The function name, as an index into `SymbolFile::inline_origins`. + pub origin_id: u32, +} + +/// A source-language function. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Function { + /// The function's start address relative to the module's load address. + pub address: u64, + /// The size of the function in bytes. + pub size: u32, + /// The size of parameters passed to the function. + pub parameter_size: u32, + /// The name of the function as declared in the source. + pub name: String, + /// Source line information for this function. + pub lines: RangeMap, + /// Inlinee information for this function, sorted by (depth, address). + /// + /// Essentially this can be considered as "one vec per depth", just with + /// all those vecs concatenated into one. + /// + /// Inlinees form a nested structure, you can think of them like a flame graph. + pub inlinees: Vec, +} + +impl Function { + pub fn memory_range(&self) -> Option> { + if self.size == 0 { + return None; + } + Some(Range::new( + self.address, + self.address.checked_add(self.size as u64)? - 1, + )) + } + + /// Returns `(file_id, line, address, inline_origin)` of the line or inline record that + /// covers the given address at the outermost level (i.e. not inside any + /// inlined calls). + pub fn get_outermost_sourceloc(&self, addr: u64) -> Option<(u32, u32, u64, Option)> { + if let Some((call_file, call_line, address, origin)) = self.get_inlinee_at_depth(0, addr) { + return Some((call_file, call_line, address, Some(origin))); + } + // Otherwise we return the line record covering this address. + let line = self.lines.get(addr)?; + Some((line.file, line.line, line.address, None)) + } + + /// Returns `(file_id, line, address)` of the line record that covers the + /// given address. Line records describe locations at the deepest level of + /// inlining at that address. + /// + /// For example, if we have an "inline call stack" A -> B -> C at this + /// address, i.e. both the call to B and the call to C have been inlined all + /// the way into A (A being the "outer function"), then this method reports + /// locations in C. + pub fn get_innermost_sourceloc(&self, addr: u64) -> Option<(u32, u32, u64)> { + let line = self.lines.get(addr)?; + Some((line.file, line.line, line.address)) + } + + /// Returns `(call_file_id, call_line, address, inline_origin)` of the + /// inlinee record that covers the given address at the given depth. + /// + /// We start at depth zero. For example, if we have an "inline call stack" + /// A -> B -> C at an address, i.e. both the call to B and the call to C have + /// been inlined all the way into A (A being the "outer function"), then the + /// call A -> B is at level zero, and the call B -> C is at level one. + pub fn get_inlinee_at_depth(&self, depth: u32, addr: u64) -> Option<(u32, u32, u64, u32)> { + let inlinee = match self + .inlinees + .binary_search_by_key(&(depth, addr), |inlinee| (inlinee.depth, inlinee.address)) + { + // Exact match + Ok(index) => &self.inlinees[index], + // No match, insertion index is zero => before first element + Err(0) => return None, + // No exact match, insertion index points after inlinee whose (depth, addr) is < what were looking for + // => subtract 1 to get candidate + Err(index) => &self.inlinees[index - 1], + }; + if inlinee.depth != depth { + return None; + } + let end_address = inlinee.address.checked_add(inlinee.size as u64)?; + if addr < end_address { + Some(( + inlinee.call_file, + inlinee.call_line, + inlinee.address, + inlinee.origin_id, + )) + } else { + None + } + } +} + +/// Extra metadata that can be safely ignored, but may contain useful facts. +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub enum Info { + /// The URL this file was downloaded from. This is added to symbol files + /// by HttpSymbolSupplier when it stores them in its cache, so that we + /// can populate that info even on a cache hit. + Url(String), + /// An info line we either don't know about or don't care about. + Unknown, +} + +/// DWARF CFI rules for recovering registers at a specific address. +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct CfiRules { + /// The address in question. + pub address: u64, + /// Postfix expressions to evaluate to recover register values. + pub rules: String, +} + +/// Information used for unwinding stack frames using DWARF CFI. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct StackInfoCfi { + /// The initial rules for this address range. + pub init: CfiRules, + /// The size of this entire address range. + pub size: u32, + /// Additional rules to use at specified addresses. + pub add_rules: Vec, +} + +impl StackInfoCfi { + pub fn memory_range(&self) -> Option> { + if self.size == 0 { + return None; + } + Some(Range::new( + self.init.address, + self.init.address.checked_add(self.size as u64)? - 1, + )) + } +} + +/// Specific details about whether the frame uses a base pointer or has a program string to +/// evaluate. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum WinFrameType { + /// This frame uses FPO-style data. + Fpo(StackInfoWin), + /// This frame uses new-style frame data, has a program string. + FrameData(StackInfoWin), + /// Some other type of frame. + Unhandled, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum WinStackThing { + ProgramString(String), + AllocatesBasePointer(bool), +} + +/// Information used for unwinding stack frames using Windows frame info. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct StackInfoWin { + /// The address in question. + pub address: u64, + /// The size of the address range covered. + pub size: u32, + /// The size of the function's prologue. + pub prologue_size: u32, + /// The size of the function's epilogue. + pub epilogue_size: u32, + /// The size of arguments passed to this function. + pub parameter_size: u32, + /// The number of bytes in the stack frame for callee-saves registers. + pub saved_register_size: u32, + /// The number of bytes in the stack frame for local variables. + pub local_size: u32, + /// The maximum number of bytes pushed onto the stack by this frame. + pub max_stack_size: u32, + /// A program string or boolean regarding a base pointer. + pub program_string_or_base_pointer: WinStackThing, +} + +impl StackInfoWin { + pub fn memory_range(&self) -> Option> { + if self.size == 0 { + return None; + } + Some(Range::new( + self.address, + self.address.checked_add(self.size as u64)? - 1, + )) + } +} + +/// A parsed .sym file containing debug symbols. +#[derive(Debug, PartialEq, Eq)] +pub struct SymbolFile { + pub module_id: String, + pub debug_file: String, + /// The set of source files involved in compilation. + pub files: HashMap, + /// Publicly visible symbols. + pub publics: Vec, + /// Functions. + pub functions: RangeMap, + /// Function names for inlined functions. + pub inline_origins: HashMap, + /// DWARF CFI unwind information. + pub cfi_stack_info: RangeMap, + /// Windows unwind information (frame data). + pub win_stack_framedata_info: RangeMap, + /// Windows unwind information (FPO data). + pub win_stack_fpo_info: RangeMap, + + // Statistics which are strictly best-effort. Generally this + // means we might undercount in situations where we forgot to + // log an event. + /// If the symbol file was loaded from a URL, this is the url + pub url: Option, + /// The number of times the parser found that the symbol file was + /// strictly ambiguous but simple heuristics repaired it. (e.g. + /// two STACK WIN entries overlapped, but the second was a suffix of + /// the first, so we just truncated the first.) + /// + /// Ideally dump_syms would never output this kind of thing, but it's + /// tolerable. + pub ambiguities_repaired: u64, + /// The number of times the parser found that the symbol file was + /// ambiguous and just randomly picked one of the options to make + /// progress. + /// + /// e.g. two STACK WIN entries with identical ranges but + /// different values, so one was discarded arbitrarily. + pub ambiguities_discarded: u64, + /// The number of times the parser found that a section of the file + /// (generally a line) was corrupt and discarded it. + /// + /// e.g. a STACK WIN entry where the `type` and `has_program` fields + /// have inconsistent values. + pub corruptions_discarded: u64, + /// The number of times the cfi evaluator failed out in a way that + /// implies the cfi entry is fundamentally corrupt. + /// + /// This isn't detectected during parsing for two reasons: + /// + /// * We don't parse cfi program strings until we are requested to + /// execute them (there's ~millions of program strings which will + /// never need to be parsed, so eagerly parsing them would be + /// horribly expensive and pointless for anything but debug stats.) + /// + /// * A program string may technically parse but still be impossible + /// to fully evaluate. For instance, it might try to pop values from + /// its internal stack when there are none left. + /// + /// This number may be inflated if a corrupt cfi entry occurs in multiple + /// frames, as each attempted eval will be counted. + /// + /// This number does not include cfi evaluations that failed in ways that + /// may be a result of incorrect input memory/registers (e.g. failing + /// to evaluate a "dereference pointer" instruction because the pointer + /// was not mapped memory). In these situations the cfi entry *may* + /// still be correct. + pub cfi_eval_corruptions: u64, +} diff --git a/third_party/rust/breakpad-symbols/src/sym_file/walker.rs b/third_party/rust/breakpad-symbols/src/sym_file/walker.rs new file mode 100644 index 000000000000..8a2471d4376d --- /dev/null +++ b/third_party/rust/breakpad-symbols/src/sym_file/walker.rs @@ -0,0 +1,1836 @@ +//! This module implements support for breakpad's text-based STACK CFI and STACK WIN +//! unwinding instructions. This isn't something you need to actually use +//! directly, it's just public so these docs will get a nice pretty rendering. +//! +//! The rest of this documentation is discussion of STACK CFI and STACK WIN +//! format -- both how to parse and evaluate them. +//! +//! Each STACK line provides instructions on how to unwind the program at +//! a given instruction address. Specifically this means how to restore +//! registers, which most importantly include the instruction pointer ($rip/$eip/pc) +//! and stack pointer ($rsp/$esp/sp). +//! +//! STACK WIN lines are completely self-contained while STACK CFI lines may +//! depend on the lines above them. +//! +//! Note that all addresses are relative to the start of the module -- resolving +//! the module and applying that offset is left as an exercise to the reader. +//! +//! See also [the upstream breakpad docs](https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md) +//! which are *ok* but can be a bit hazy around the details (I think they've +//! just partially bitrotted). To the best of my ability I have tried to make +//! these docs as accurate and detailed as possible. +//! +//! I also try to be honest about the places where I'm uncertain about the +//! semantics. +//! +//! +//! +//! +//! +//! # Known Differences Between This Implementation and Breakpad +//! +//! I haven't thoroughly tested the two implementations for compatibility, +//! but where I have noticed a difference I'll put it here so it's +//! documented *somewhere*. +//! +//! +//! +//! ## Register Names +//! +//! Breakpad assumes register names are prefixed with `$` *EXCEPT* +//! on ARM variants. These prefixes are hardcoded, so if you hand it +//! `$rax` or `x11` it will be happy, but if you hand it `rax` or `$x11` +//! it will freak out and be unable to parse the CFI expressions. +//! +//! This implementation doesn't have any notion of "valid" registers +//! for a particular execution, and so just unconditionally strips leading +//! $'s. So `$rax`, `$x11`, `rax`, and `x11` should all be valid. +//! +//! Registers names are otherwise only "validated" by the [FrameWalker][], +//! in that it will return an error if we try to get or set a register name +//! *it* doesn't recognize (or doesn't have a valid value for). But it +//! doesn't ever expect `$`'s, so that detail has been erased by +//! the time it's involved. +//! +//! The author of this document may or may not know this as a result of +//! accidentally causing mozilla/dump_syms to emit `$x11` in some situations. +//! If that is the case, they fixed it, so everything's fine, right? +//! +//! It's bad to be a permissive parser, but symbol files are already +//! an inconsistent mess, so you kind of *have* to be permissive in random +//! places? And we don't have a conformance test suite to keep everything +//! perfectly bug-compatible with breakpad when it doesn't document +//! everything enough to know what's "intended". +//! +//! +//! +//! ## cfi_scan hacks +//! +//! This is technically a technique that the *user* of walker.rs would +//! implement, but it's worth discussing here since it relates to cfi +//! evaluation. +//! +//! When evaluating STACK WIN expressions, breakpad will apply several +//! heuristics to adjust values. This includes scanning the stack to +//! try to "refine" the inputs and outputs. +//! +//! At the moment, we implement very few of these heuristics. We definitely +//! don't do any scanning when evaluating STACK WIN. +//! +//! The ones we *do* implement (and that I can recall) are: +//! +//! * changing the value of searchStart based on whether the program +//! includes an `@`. +//! +//! * trying to forward the value of `$ebx` in more situations +//! than the STACK WIN suggests you should. +//! +//! At this point I don't recall if these were implemented to fix actual +//! issues found during development, or if I just cargo-culted them +//! because they seemed relatively inoffensive. +//! +//! +//! +//! +//! +//! # STACK CFI +//! +//! STACK CFI lines comes in two forms: +//! +//! `STACK CFI INIT instruction_address num_bytes registers` +//! +//! `STACK CFI instruction_address registers` +//! +//! +//! A `STACK CFI INIT` line specifies how to restore registers for the given +//! range of addresses. +//! +//! Example: `STACK CFI INIT 804c4b0 40 .cfa: $esp 4 + $eip: .cfa 4 - ^` +//! +//! Arguments: +//! * instruction_address (hex u64) is the first address in the module this line applies to +//! * num_bytes (hex u64) is the number of bytes it (and its child STACK CFI lines) covers +//! * registers (string) is the register restoring instructions (see the next section) +//! +//! +//! A `STACK CFI` line always follows a "parent" `STACK CFI INIT` line. It +//! updates the instructions on how to restore registers for anything within +//! the parent STACK CFI INIT's range after the given address (inclusive). +//! It only specifies rules for registers that have new instructions. +//! +//! To get the final rules for a given address, start with its `STACK CFI INIT` +//! and then apply all the applicable `STACK CFI` "diffs" in order. +//! +//! Example: `STACK CFI 804c4b1 .cfa: $esp 8 + $ebp: .cfa 8 - ^` +//! +//! Arguments: +//! * instruction_address (hex u64) is the first address to apply these instructions +//! * registers (string) is the new register restoring instructions (see the next section) +//! +//! +//! +//! ## STACK CFI registers +//! +//! A line's STACK CFI registers are of the form +//! +//! `REG: EXPR REG: EXPR REG: EXPR...` +//! +//! Where REG is `.cfa`, `.ra`, `$`, or `` +//! (but not a valid integer literal). +//! +//! And EXPR is `` (see next section for details) +//! +//! Each `REG: EXPR` pair specifies how to compute the register REG for the +//! caller. There are three kinds of registers: +//! +//! * `$XXX` or `XXX` refers to an actual general-purpose register. In REG position it +//! refers to the caller, in an EXPR it refers to the callee. Register names +//! can in theory be any alphanumeric string that isn't a valid integer literal. +//! e.g. `$rax`, `x11`. `$` prefixes are expected for all platforms except ARM +//! variants. This parser is more permissive and allows for either form on all +//! platforms. Completely invalid register names (`x99`) will be caught at evaluation time. +//! +//! * `.cfa` is the "canonical frame address" (CFA), as used in DWARF CFI. It +//! abstractly represents the base address of the frame. On x86, x64, and +//! ARM64 the CFA is the caller's stack pointer from *before* the call. As +//! such on those platforms you will never see instructions to restore the +//! frame pointer -- it must be implicitly restored from the cfa. `.cfa` +//! always refers to the caller, and therefore must be computed without +//! use of itself. +//! +//! * `.ra` is the "return address", which just abstractly refers to the +//! instruction pointer/program counter. It only ever appears in REG +//! position. +//! +//! `.cfa` and `.ra` must always have defined rules, or the STACK CFI is malformed. +//! +//! The CFA is special because its computed value can be used by every other EXPR. +//! As such it should always be computed first so that its value is available. +//! The purpose of the CFA is to cleanly handle the very common case of registers +//! saved to the stack. Every register saved this way lives at a fixed offset +//! from the start of the frame. So we can specify their rules once, and just +//! update the CFA. +//! +//! For example: +//! +//! ```text +//! STACK CFI INIT 0x10 16 .cfa: $rsp 8 + .ra: .cfa -8 + ^ +//! STACK CFI 0x11 .cfa: $rsp 16 + $rax: .cfa -16 + ^ +//! STACK CFI 0x12 .cfa: $rsp 24 + +//! ``` +//! +//! Can be understood as (pseudo-rust): +//! +//! ```rust,ignore +//! let mut cfa = 0; +//! let mut ra = None; +//! let mut caller_rax = None; +//! +//! +//! // STACK CFI INIT 0x10's original state +//! cfa = callee_rsp + 8; +//! ra = Some(|| { *(cfa - 8) }); // Defer evaluation +//! +//! +//! // STACK CFI 0x11's diff +//! if address >= 0x11 { +//! cfa = callee_rsp + 16; +//! caller_rax = Some(|| { *(cfa - 16) }); // Defer evaluation +//! } +//! +//! +//! // STACK CFI 0x12's diff +//! if address >= 0x12 { +//! cfa = callee_rsp + 24; +//! } +//! +//! caller.stack_pointer = cfa; +//! +//! // Finally evaluate all other registers using the current cfa +//! caller.instruction_pointer = ra.unwrap()(); +//! caller.rax = caller_rax.map(|func| func()); +//! ``` +//! +//! +//! +//! ## STACK CFI expressions +//! +//! STACK CFI expressions are in postfix (Reverse Polish) notation with tokens +//! separated by whitespace. e.g. +//! +//! ```text +//! .cfa $rsp 3 + * ^ +//! ``` +//! +//! Is the postfix form of +//! +//! ```text +//! ^(.cfa * ($rsp + 3)) +//! ``` +//! +//! The benefit of postfix notation is that it can be evaluated while +//! processing the input left-to-right without needing to maintain any +//! kind of parse tree. +//! +//! The only state a postfix evaluator needs to maintain is a stack of +//! computed values. When a value (see below) is encountered, it is pushed +//! onto the stack. When an operator (see below) is encountered, it can be +//! evaluated immediately by popping its inputs off the stack and pushing +//! its output onto the stack. +//! +//! If the postfix expression is valid, then at the end of the token +//! stream the stack should contain a single value, which is the result. +//! +//! For binary operators the right-hand-side (rhs) will be the first +//! value popped from the stack. +//! +//! Supported operations are: +//! +//! * `+`: Binary Add +//! * `-`: Binary Subtract +//! * `*`: Binary Multiply +//! * `/`: Binary Divide +//! * `%`: Binary Remainder +//! * `@`: Binary Align (truncate lhs to be a multiple of rhs) +//! * `^`: Unary Dereference (load from stack memory) +//! +//! Supported values are: +//! +//! * `.cfa`: read the CFA +//! * `.undef`: terminate execution, the output is explicitly unknown +//! * ``: read this integer constant (limited to i64 precision) +//! * `$`: read a general purpose register from the callee's frame +//! * ``: same as above (can't be an integer literal) +//! +//! Whether registers should be `$reg` or `reg` depends on the platform. +//! This parser is permissive, and just accepts both on all platforms. +//! +//! But I believe `$` is "supposed" to be used on every platform except for +//! ARM variants. +//! +//! +//! +//! # STACK WIN +//! +//! STACK WIN lines try to encode the more complex unwinding rules produced by +//! x86 Windows toolchains. On any other target (x64 windows, x86 linux, etc), +//! only STACK CFI should be used. This is a good thing, because STACK WIN is +//! a bit of a hacky mess, as you'll see. +//! +//! +//! ```text +//! STACK WIN type instruction_address num_bytes prologue_size epilogue_size parameter_size +//! saved_register_size local_size max_stack_size has_program_string +//! program_string_OR_allocates_base_pointer +//! ``` +//! +//! +//! Examples: +//! +//! ```text +//! STACK WIN 4 a1080 fa 9 0 c 0 0 0 1 $T0 .raSearch = $eip $T0 ^ = $esp $T0 4 + =` +//! +//! STACK WIN 0 1cab960 68 0 0 10 0 8 0 0 0 +//! ``` +//! +//! +//! Arguments: +//! * type is either 4 ("framedata") or 0 ("fpo"), see their sections below +//! * instruction_address (hex u64) is the first address in the module this line applies to +//! * num_bytes (hex u64) is the number of bytes it covers +//! * has_program_string (0 or 1) indicates the meaning of the next argument (implied by type?) +//! * program_string_OR_allocates_base_pointer is one of: +//! * program_string (string) is the expression to evaluate for "framedata" (see that section) +//! * allocates_base_pointer (0 or 1) whether ebp is pushed for "fpo" (see that section) +//! +//! The rest of the arguments are just values you may need to use in the STACK WIN +//! evaluation algorithms: +//! +//! * prologue_size +//! * epilogue_size +//! * parameter_size +//! * saved_register_size +//! * local_size +//! * max_stack_size +//! +//! Two useful values derived from these values are: +//! +//! ```rust,ignore +//! grand_callee_parameter_size = callee.parameter_size +//! frame_size = local_size + saved_register_size + grand_callee_parameter_size +//! ``` +//! +//! Having frame_size allows you to find the offset from $esp to the return +//! address (and other saved registers). This requires grand_callee_parameter_size +//! because certain windows calling conventions makes the caller responsible for +//! destroying the callee's arguments, which means they are part of the caller's +//! frame, and therefore change the offset to the return address. (During unwinding +//! we generally refer to the current frame as the "callee" and the next frame as +//! the "caller", but here we're concerned with callee's callee, hence grand_callee.) +//! +//! Note that grand_callee_paramter_size is using the STACK WIN entry of the +//! *previous* frame. Although breakpad symbol files have FUNC entries which claim +//! to provide parameter_size as well, those values are not to be trusted (or +//! at least, the grand-callee's STACK WIN entry is to be preferred). The two +//! values are frequently different, and the STACK WIN ones are more accurate. +//! +//! If there is no grand_callee (i.e. you are unwinding the first frame of the +//! stack), grand_callee_parameter_size can be defaulted to 0. +//! +//! +//! +//! +//! # STACK WIN frame pointer mode ("fpo") +//! +//! This is an older mode that just gives you minimal information to unwind: +//! the size of the stack frame (`frame_size`). All you can do is find the +//! return address, update `$esp`, and optionally restore `$ebp` (if allocates_base_pointer). +//! +//! This is best described by pseudocode: +//! +//! ```text +//! $eip := *($esp + frame_size) +//! +//! if allocates_base_pointer: +//! // $ebp was being used as a general purpose register, old value saved here +//! $ebp := *($esp + grand_callee_parameter_size + saved_register_size - 8) +//! else: +//! // Assume both ebp and ebx are preserved (if they were previously valid) +//! $ebp := $ebp +//! $ebx := $ebx +//! +//! $esp := $esp + frame_size + 4 +//! ``` +//! +//! I don't have an interesting explanation for why that position is specifically +//! where $ebp is saved, it just is. The algorithm tries to forward $ebx when $ebp +//! wasn't messed with as a bit of a hacky way to encourage certain Windows system +//! functions to unwind better. Evidently some of them have framedata expressions +//! that depend on $ebx, so preserving it whenever it's plausible is desirable? +//! +//! +//! +//! +//! # STACK WIN expression mode ("framedata") +//! +//! This is the general purpose mode that has you execute a tiny language to compute +//! arbitrary registers. +//! +//! STACK WIN expressions use many of the same concepts as STACK CFI, but rather +//! than using `REG: EXPR` pairs to specify outputs, it maintains a map of variables +//! whose values can be read and written by each expression. +//! +//! I personally find this easiest to understand as an extension to the STACK CFI +//! expressions, so I'll describe it in those terms: +//! +//! The supported operations add one binary operation: +//! +//! * `=`: Binary Assign (assign the rhs's integer to the lhs's variable) +//! +//! This operation requires us to have a distinction between *integers* and +//! *variables*, which the postfix evaluator's stack must hold. +//! +//! All other operators operate only on integers. If a variable is passed where +//! an integer is expected, that means the current value of the variable should +//! be used. +//! +//! "values" then become: +//! +//! * `.`: a variable containing some initial constants (see below) +//! * `$`: a variable representing a general purpose register or temporary +//! * ``: same as above, but can't be an integer literal +//! * `.undef`: delete the variable if this is assigned to it (like Option::None) +//! * ``: read this integer constant (limited to i64 precision) +//! +//! +//! Before evaluating a STACK WIN expression: +//! +//! * The variables `$ebp` and `$esp` should be initialized from the callee's +//! values for those registers (error out if those are unknown). `$ebx` should +//! similarly be initialized if it's available, since some things use it, but +//! it's optional. +//! +//! * The following constant variables should be set accordingly: +//! * `.cbParams = parameter_size` +//! * `.cbCalleeParams = grand_callee_parameter_size` (only for breakpad-generated exprs?) +//! * `.cbSavedRegs = saved_register_size` +//! * `.cbLocals = local_size` +//! * `.raSearch = $esp + frame_size` +//! * `.raSearchStart = .raSearch` (synonym that sometimes shows up?) +//! +//! Note that `.raSearch(Start)` roughly corresponds to STACK CFI's `.cfa`, in that +//! it generally points to where the return address is. However breakpad seems to +//! believe there are many circumstances where this value can be slightly wrong +//! (due to the frame pointer having mysterious extra alignment?). As such, +//! breakpad has several messy heuristics to "refine" `.raSearchStart`, such as +//! scanning the stack. This implementation does not (yet?) implement those +//! heuristics. As of this writing I have not encountered an instance of this +//! problem in the wild (but I haven't done much testing!). +//! +//! +//! After evaluating a STACK WIN expression: +//! +//! The caller's registers are stored in `$eip`, `$esp`, `$ebp`, `$ebx`, `$esi`, +//! and `$edi`. If those variables are undefined, then their values in the caller +//! are unknown. Do not implicitly forward registers that weren't explicitly set. +//! +//! (Should it be an error if the stack isn't empty at the end? It's +//! arguably malformed input but also it doesn't matter since the output is +//! in the variables? *shrug*) +//! +//! +//! +//! ## Example STACK WIN framedata evaluation +//! +//! Here is an example of framedata for a function with the standard prologue. +//! Given the input: +//! +//! ```text +//! $T0 $ebp = $eip $T0 4 + ^ = $ebp $T0 ^ = $esp $T0 8 + = +//! ``` +//! +//! and initial state: +//! +//! ```text +//! ebp: 16, esp: 1600 +//! ``` +//! +//! Then evaluation proceeds as follows: +//! +//! ```text +//! Token | Stack | Vars +//! ---------+--------------+---------------------------------------------------- +//! | | $ebp: 16, $esp: 1600, +//! $T0 | $T0 | $ebp: 16, $esp: 1600, +//! $ebp | $T0 $ebp | $ebp: 16, $esp: 1600, +//! = | | $ebp: 16, $esp: 1600, $T0: 16, +//! $eip | $eip | $ebp: 16, $esp: 1600, $T0: 16, +//! $T0 | $eip $T0 | $ebp: 16, $esp: 1600, $T0: 16, +//! 4 | $eip $T0 4 | $ebp: 16, $esp: 1600, $T0: 16, +//! + | $eip 20 | $ebp: 16, $esp: 1600, $T0: 16, +//! ^ | $eip (*20) | $ebp: 16, $esp: 1600, $T0: 16, +//! = | | $ebp: 16, $esp: 1600, $T0: 16, $eip: (*20) +//! $ebp | $ebp | $ebp: 16, $esp: 1600, $T0: 16, $eip: (*20) +//! $T0 | $ebp $T0 | $ebp: 16, $esp: 1600, $T0: 16, $eip: (*20) +//! ^ | $ebp (*16) | $ebp: 16, $esp: 1600, $T0: 16, $eip: (*20) +//! = | | $ebp: (*16), $esp: 1600, $T0: 16, $eip: (*20) +//! $esp | $esp | $ebp: (*16), $esp: 1600, $T0: 16, $eip: (*20) +//! $T0 | $esp $T0 | $ebp: (*16), $esp: 1600, $T0: 16, $eip: (*20) +//! 8 | $esp $T0 8 | $ebp: (*16), $esp: 1600, $T0: 16, $eip: (*20) +//! + | $esp 24 | $ebp: (*16), $esp: 1600, $T0: 16, $eip: (*20) +//! = | | $ebp: (*16), $esp: 24, $T0: 16, $eip: (*20) +//! ``` +//! +//! Giving a final output of `ebp=(*16)`, `esp=24`, `eip=(*20)`. + +use super::{CfiRules, StackInfoWin, WinStackThing}; +use crate::FrameWalker; +use std::collections::HashMap; +use std::str::FromStr; +use tracing::{debug, trace}; + +pub fn walk_with_stack_cfi( + init: &CfiRules, + additional: &[CfiRules], + walker: &mut dyn FrameWalker, +) -> Option<()> { + trace!("trying STACK CFI exprs"); + trace!(" {}", init.rules); + for line in additional { + trace!(" {}", line.rules); + } + + // First we must collect up all the `REG: EXPR` pairs in these lines. + // If a REG occurs twice, we prefer the one that comes later. This allows + // STACK CFI records to apply incremental updates to the instructions. + let mut exprs = HashMap::new(); + parse_cfi_exprs(&init.rules, &mut exprs)?; + for line in additional { + parse_cfi_exprs(&line.rules, &mut exprs)?; + } + trace!("STACK CFI parse successful"); + + // These two are special and *must* always be present + let cfa_expr = exprs.remove(&CfiReg::Cfa)?; + let ra_expr = exprs.remove(&CfiReg::Ra)?; + trace!("STACK CFI seems reasonable, evaluating"); + + // Evaluating the CFA cannot itself use the CFA + let cfa = eval_cfi_expr(cfa_expr, walker, None)?; + trace!("successfully evaluated .cfa (frame address)"); + let ra = eval_cfi_expr(ra_expr, walker, Some(cfa))?; + trace!("successfully evaluated .ra (return address)"); + + walker.set_cfa(cfa)?; + walker.set_ra(ra)?; + + for (reg, expr) in exprs { + if let CfiReg::Other(reg) = reg { + // If this eval fails, just don't emit this particular register + // and keep going on. It's fine to lose some general purpose regs, + // but make sure to clear it in case it would have been implicitly + // forwarded from the callee. + match eval_cfi_expr(expr, walker, Some(cfa)) { + Some(val) => { + walker.set_caller_register(reg, val); + trace!("successfully evaluated {}", reg); + } + None => { + walker.clear_caller_register(reg); + trace!("optional register {} failed to evaluate, dropping it", reg); + } + } + } else { + // All special registers should already have been removed?? + unreachable!() + } + } + + Some(()) +} + +fn parse_cfi_exprs<'a>(input: &'a str, output: &mut HashMap, &'a str>) -> Option<()> { + // Note this is an ascii format so we can think chars == bytes! + + let base_addr = input.as_ptr() as usize; + let mut cur_reg = None; + let mut expr_first: Option<&str> = None; + let mut expr_last: Option<&str> = None; + for token in input.split_ascii_whitespace() { + if let Some(token) = token.strip_suffix(':') { + // This token is a "REG:", indicating the end of the previous EXPR + // and start of the next. If we already have an active register, + // then now is the time to commit it to our output. + if let Some(reg) = cur_reg { + // We compute the the expr substring by just abusing the fact that rust substrings + // point into the original string, so we can use map addresses in the substrings + // back into indices into the original string. + let min_addr = expr_first?.as_ptr() as usize; + let max_addr = expr_last?.as_ptr() as usize + expr_last?.len(); + let expr = &input[min_addr - base_addr..max_addr - base_addr]; + + // Intentionally overwrite any pre-existing entries for this register, + // because that's how CFI records work. + output.insert(reg, expr); + + expr_first = None; + expr_last = None; + } + + cur_reg = if token == ".cfa" { + Some(CfiReg::Cfa) + } else if token == ".ra" { + Some(CfiReg::Ra) + } else if let Some(token) = token.strip_prefix('$') { + // x86-style $rax register + Some(CfiReg::Other(token)) + } else { + // arm-style x11 register + Some(CfiReg::Other(token)) + }; + } else { + // First token *must* be a register! + cur_reg.as_ref()?; + + // This is just another part of the current EXPR, update first/last accordingly. + if expr_first.is_none() { + expr_first = Some(token); + } + expr_last = Some(token); + } + } + + // Process the final rule (there must be a defined reg!) + let min_addr = expr_first?.as_ptr() as usize; + let max_addr = expr_last?.as_ptr() as usize + expr_last?.len(); + let expr = &input[min_addr - base_addr..max_addr - base_addr]; + + output.insert(cur_reg?, expr); + + Some(()) +} + +fn eval_cfi_expr(expr: &str, walker: &mut dyn FrameWalker, cfa: Option) -> Option { + // FIXME: this should be an ArrayVec or something, most exprs are simple. + let mut stack: Vec = Vec::new(); + for token in expr.split_ascii_whitespace() { + match token { + // FIXME?: not sure what overflow/sign semantics are, but haven't run into + // something where it actually matters (I wouldn't expect it to come up + // normally?). + "+" => { + // Add + let rhs = stack.pop()?; + let lhs = stack.pop()?; + stack.push(lhs.wrapping_add(rhs)); + } + "-" => { + // Subtract + let rhs = stack.pop()?; + let lhs = stack.pop()?; + stack.push(lhs.wrapping_sub(rhs)); + } + "*" => { + // Multiply + let rhs = stack.pop()?; + let lhs = stack.pop()?; + stack.push(lhs.wrapping_mul(rhs)); + } + "/" => { + // Divide + let rhs = stack.pop()?; + let lhs = stack.pop()?; + if rhs == 0 { + // Div by 0 + return None; + } + stack.push(lhs.wrapping_div(rhs)); + } + "%" => { + // Remainder + let rhs = stack.pop()?; + let lhs = stack.pop()?; + if rhs == 0 { + // Div by 0 + return None; + } + stack.push(lhs.wrapping_rem(rhs)); + } + "@" => { + // Align (truncate) + let rhs = stack.pop()?; + let lhs = stack.pop()?; + + if rhs == 0 || !rhs.is_power_of_two() { + return None; + } + + // ~Bit Magic Corner~ + // + // A power of two has only one bit set (e.g. 4 is 0b100), and + // subtracting 1 from that gets you all 1's below that bit (e.g. 0b011). + // -1 is all 1's. + // + // So XORing -1 with (power_of_2 - 1) gets you all ones except + // for the bits lower than the power of 2. ANDing that value + // to a number consequently makes it a multiple of that power + // of two (all the bits smaller than the power are cleared). + stack.push(lhs & (-1i64 as u64 ^ (rhs - 1))) + } + "^" => { + // Deref the value + let ptr = stack.pop()?; + stack.push(walker.get_register_at_address(ptr)?); + } + ".cfa" => { + // Push the CFA. Note the CFA shouldn't be used to compute + // itself, so this returns None if that happens. + stack.push(cfa?); + } + ".undef" => { + // This register is explicitly undefined! + return None; + } + _ => { + // More complex cases + if let Some((_, reg)) = token.split_once('$') { + // Push a register + stack.push(walker.get_callee_register(reg)?); + } else if let Ok(value) = i64::from_str(token) { + // Push a constant + // FIXME?: We do everything in wrapping arithmetic, so it's + // probably fine to squash i64's into u64's, but it seems sketchy? + // Division/remainder in particular seem concerning, but also + // it would be surprising to see negatives for those..? + stack.push(value as u64) + } else if let Some(reg) = walker.get_callee_register(token) { + // Maybe the register just didn't have a $ prefix? + // (seems to be how ARM syntax works). + stack.push(reg); + } else { + // Unknown expr + debug!( + "STACK CFI expression eval failed - unknown token: {}", + token + ); + return None; + } + } + } + } + + if stack.len() == 1 { + stack.pop() + } else { + None + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum CfiReg<'a> { + Cfa, + Ra, + Other(&'a str), +} + +#[cfg(feature = "fuzz")] +pub fn eval_win_expr_for_fuzzer( + expr: &str, + info: &StackInfoWin, + walker: &mut dyn FrameWalker, +) -> Option<()> { + eval_win_expr(expr, info, walker) +} + +fn eval_win_expr(expr: &str, info: &StackInfoWin, walker: &mut dyn FrameWalker) -> Option<()> { + // TODO?: do a bunch of heuristics to make this more robust. + // So far I haven't encountered an in-the-wild example that needs the + // extra heuristics that breakpad uses, so leaving them out until they + // become a problem. + + let mut vars = HashMap::new(); + + let callee_esp = walker.get_callee_register("esp")? as u32; + let callee_ebp = walker.get_callee_register("ebp")? as u32; + let grand_callee_param_size = walker.get_grand_callee_parameter_size(); + let frame_size = win_frame_size(info, grand_callee_param_size); + + // First setup the initial variables + vars.insert("$esp", callee_esp); + vars.insert("$ebp", callee_ebp); + if let Some(callee_ebx) = walker.get_callee_register("ebx") { + vars.insert("$ebx", callee_ebx as u32); + } + + let search_start = if expr.contains('@') { + // The frame has been aligned, so don't trust $esp. Assume $ebp + // is valid and that the standard calling convention is used + // (so the caller's $ebp was pushed right after the return address, + // and now $ebp points to that.) + trace!("program used @ operator, using $ebp instead of $esp for return addr"); + callee_ebp.checked_add(4)? + } else { + // $esp should be reasonable, get the return address from that + callee_esp.checked_add(frame_size)? + }; + + trace!( + "raSearchStart = 0x{:08x} (0x{:08x}, 0x{:08x}, 0x{:08x})", + search_start, + grand_callee_param_size, + info.local_size, + info.saved_register_size + ); + + // Magic names from breakpad + vars.insert(".cbParams", info.parameter_size); + vars.insert(".cbCalleeParams", grand_callee_param_size); + vars.insert(".cbSavedRegs", info.saved_register_size); + vars.insert(".cbLocals", info.local_size); + vars.insert(".raSearch", search_start); + vars.insert(".raSearchStart", search_start); + + // FIXME: this should be an ArrayVec or something..? + let mut stack: Vec = Vec::new(); + + // hack to fix bug where "= NEXT_TOKEN" is sometimes "=NEXT_TOKEN" + // for some windows toolchains. + let tokens = expr + .split_ascii_whitespace() + .flat_map(|x| { + if x.starts_with('=') && x.len() > 1 { + [Some(&x[0..1]), Some(&x[1..])] + } else { + [Some(x), None] + } + }) // get rid of the Array + .flatten(); // get rid of the Option::None's + + // Evaluate the expressions + + for token in tokens { + match token { + // FIXME: not sure what overflow/sign semantics are + "+" => { + // Add + let rhs = stack.pop()?.into_int(&vars)?; + let lhs = stack.pop()?.into_int(&vars)?; + stack.push(WinVal::Int(lhs.wrapping_add(rhs))); + } + "-" => { + // Subtract + let rhs = stack.pop()?.into_int(&vars)?; + let lhs = stack.pop()?.into_int(&vars)?; + stack.push(WinVal::Int(lhs.wrapping_sub(rhs))); + } + "*" => { + // Multiply + let rhs = stack.pop()?.into_int(&vars)?; + let lhs = stack.pop()?.into_int(&vars)?; + stack.push(WinVal::Int(lhs.wrapping_mul(rhs))); + } + "/" => { + // Divide + let rhs = stack.pop()?.into_int(&vars)?; + let lhs = stack.pop()?.into_int(&vars)?; + if rhs == 0 { + // Div by 0 + return None; + } + stack.push(WinVal::Int(lhs.wrapping_div(rhs))); + } + "%" => { + // Remainder + let rhs = stack.pop()?.into_int(&vars)?; + let lhs = stack.pop()?.into_int(&vars)?; + if rhs == 0 { + // Div by 0 + return None; + } + stack.push(WinVal::Int(lhs.wrapping_rem(rhs))); + } + "@" => { + // Align (truncate) + let rhs = stack.pop()?.into_int(&vars)?; + let lhs = stack.pop()?.into_int(&vars)?; + + if rhs == 0 || !rhs.is_power_of_two() { + return None; + } + + // ~Bit Magic Corner~ + // + // A power of two has only one bit set (e.g. 4 is 0b100), and + // subtracting 1 from that gets you all 1's below that bit (e.g. 0b011). + // -1 is all 1's. + // + // So XORing -1 with (power_of_2 - 1) gets you all ones except + // for the bits lower than the power of 2. ANDing that value + // to a number consequently makes it a multiple of that power + // of two (all the bits smaller than the power are cleared). + stack.push(WinVal::Int(lhs & (-1i32 as u32 ^ (rhs - 1)))); + } + "=" => { + // Assign lhs = rhs + let rhs = stack.pop()?; + let lhs = stack.pop()?.into_var()?; + + if let WinVal::Undef = rhs { + vars.remove(&lhs); + } else { + vars.insert(lhs, rhs.into_int(&vars)?); + } + } + "^" => { + // Deref the value + let ptr = stack.pop()?.into_int(&vars)?; + stack.push(WinVal::Int( + walker.get_register_at_address(ptr as u64)? as u32 + )); + } + ".undef" => { + // This register is explicitly undefined! + stack.push(WinVal::Undef); + } + _ => { + // More complex cases + if token == ".undef" { + stack.push(WinVal::Undef); + } else if token.starts_with('$') || token.starts_with('.') { + // Push a register + stack.push(WinVal::Var(token)); + } else if let Ok(value) = i32::from_str(token) { + // Push a constant + // FIXME: We do everything in wrapping arithmetic, so it's fine to squash + // i32's into u32's? + stack.push(WinVal::Int(value as u32)); + } else { + // Unknown expr + trace!( + "STACK WIN expression eval failed - unknown token: {}", + token + ); + return None; + } + } + } + } + + let output_regs = ["$eip", "$esp", "$ebp", "$ebx", "$esi", "$edi"]; + for reg in &output_regs { + if let Some(&val) = vars.get(reg) { + walker.set_caller_register(®[1..], val as u64)?; + } + } + + trace!("STACK WIN expression eval succeeded!"); + + Some(()) +} + +fn win_frame_size(info: &StackInfoWin, grand_callee_param_size: u32) -> u32 { + info.local_size + info.saved_register_size + grand_callee_param_size +} + +enum WinVal<'a> { + Var(&'a str), + Int(u32), + Undef, +} + +impl<'a> WinVal<'a> { + fn into_var(self) -> Option<&'a str> { + if let WinVal::Var(var) = self { + Some(var) + } else { + None + } + } + fn into_int(self, map: &HashMap<&'a str, u32>) -> Option { + match self { + WinVal::Var(var) => map.get(&var).cloned(), + WinVal::Int(int) => Some(int), + WinVal::Undef => None, + } + } +} + +pub fn walk_with_stack_win_framedata( + info: &StackInfoWin, + walker: &mut dyn FrameWalker, +) -> Option<()> { + if let WinStackThing::ProgramString(ref expr) = info.program_string_or_base_pointer { + trace!("trying STACK WIN framedata -- {}", expr); + clear_stack_win_caller_registers(walker); + eval_win_expr(expr, info, walker) + } else { + unreachable!() + } +} + +pub fn walk_with_stack_win_fpo(info: &StackInfoWin, walker: &mut dyn FrameWalker) -> Option<()> { + if let WinStackThing::AllocatesBasePointer(allocates_base_pointer) = + info.program_string_or_base_pointer + { + // FIXME: do a bunch of heuristics to make this more robust. + // Haven't needed the heuristics breakpad uses yet. + trace!("trying STACK WIN fpo"); + clear_stack_win_caller_registers(walker); + + let grand_callee_param_size = walker.get_grand_callee_parameter_size(); + let frame_size = win_frame_size(info, grand_callee_param_size) as u64; + + let callee_esp = walker.get_callee_register("esp")?; + let mut eip_address = callee_esp + frame_size; + let mut caller_eip = walker.get_register_at_address(eip_address)?; + + // Check for a "leftover return address": in some pathological cases the return address isn't popped off the stack + // after a return instruction. According to breakpad, this can happen for "frame-pointer-optimized + // system calls", which implies that the callee must be a context frame. + // + // To detect these cases, we check whether + // 1. we are in a context frame. We approximate this by checking whether there's a grand-callee. + // 2. the caller's eip (aka the return address) is the same as the callee's eip. + // + // If we detect a leftover return address, we skip it and try again one word + // further down the stack. + let callee_is_context_frame = !walker.has_grand_callee(); + if callee_is_context_frame && caller_eip == walker.get_callee_register("eip")? { + eip_address += 4; + caller_eip = walker.get_register_at_address(eip_address)?; + } + let caller_esp = eip_address + 4; + + trace!("found caller $eip and $esp"); + + let caller_ebp = if allocates_base_pointer { + let ebp_address = + callee_esp + grand_callee_param_size as u64 + info.saved_register_size as u64 - 8; + walker.get_register_at_address(ebp_address)? + } else { + // Per Breakpad: We also propagate %ebx through, as it is commonly unmodifed after + // calling simple forwarding functions in ntdll (that are this non-EBP + // using type). It's not clear that this is always correct, but it is + // important for some functions to get a correct walk. + if let Some(callee_ebx) = walker.get_callee_register("ebx") { + walker.set_caller_register("ebx", callee_ebx)?; + } + + walker.get_callee_register("ebp")? + }; + trace!("found caller $ebp"); + + walker.set_caller_register("eip", caller_eip)?; + walker.set_caller_register("esp", caller_esp)?; + walker.set_caller_register("ebp", caller_ebp)?; + + trace!("STACK WIN fpo eval succeeded!"); + Some(()) + } else { + unreachable!() + } +} + +/// STACK WIN doesn't want implicit register forwarding +fn clear_stack_win_caller_registers(walker: &mut dyn FrameWalker) { + let output_regs = ["$eip", "$esp", "$ebp", "$ebx", "$esi", "$edi"]; + for reg in output_regs { + walker.clear_caller_register(reg); + } +} + +#[cfg(test)] +mod test { + use super::super::types::{CfiRules, StackInfoWin, WinStackThing}; + use super::{eval_win_expr, walk_with_stack_cfi, walk_with_stack_win_fpo}; + use crate::FrameWalker; + use std::collections::HashMap; + + // Eugh, need this to memoize register names to static + static STATIC_REGS: [&str; 14] = [ + "cfa", "ra", "esp", "eip", "ebp", "eax", "ebx", "rsp", "rip", "rbp", "rax", "rbx", "x11", + "x12", + ]; + + struct TestFrameWalker { + instruction: Reg, + has_grand_callee: bool, + grand_callee_param_size: u32, + callee_regs: HashMap<&'static str, Reg>, + caller_regs: HashMap<&'static str, Reg>, + stack: Vec, + } + + trait Int { + const BYTES: usize; + fn from_bytes(bytes: &[u8]) -> Self; + fn into_u64(self) -> u64; + fn from_u64(val: u64) -> Self; + } + impl Int for u32 { + const BYTES: usize = 4; + fn from_bytes(bytes: &[u8]) -> Self { + let mut buf = [0; Self::BYTES]; + buf.copy_from_slice(bytes); + u32::from_le_bytes(buf) + } + fn into_u64(self) -> u64 { + self as u64 + } + fn from_u64(val: u64) -> Self { + val as u32 + } + } + impl Int for u64 { + const BYTES: usize = 8; + fn from_bytes(bytes: &[u8]) -> Self { + let mut buf = [0; Self::BYTES]; + buf.copy_from_slice(bytes); + u64::from_le_bytes(buf) + } + fn into_u64(self) -> u64 { + self + } + fn from_u64(val: u64) -> Self { + val + } + } + + impl FrameWalker for TestFrameWalker { + fn get_instruction(&self) -> u64 { + self.instruction.into_u64() + } + fn has_grand_callee(&self) -> bool { + self.has_grand_callee + } + fn get_grand_callee_parameter_size(&self) -> u32 { + self.grand_callee_param_size + } + /// Get a register-sized value stored at this address. + fn get_register_at_address(&self, address: u64) -> Option { + let addr = address as usize; + self.stack + .get(addr..addr + Reg::BYTES) + .map(|slice| Reg::from_bytes(slice).into_u64()) + } + /// Get the value of a register from the callee's frame. + fn get_callee_register(&self, name: &str) -> Option { + self.callee_regs.get(name).map(|val| val.into_u64()) + } + /// Set the value of a register for the caller's frame. + fn set_caller_register(&mut self, name: &str, val: u64) -> Option<()> { + STATIC_REGS.iter().position(|®| reg == name).map(|idx| { + let memoized_reg = STATIC_REGS[idx]; + self.caller_regs.insert(memoized_reg, Reg::from_u64(val)); + }) + } + fn clear_caller_register(&mut self, name: &str) { + self.caller_regs.remove(name); + } + /// Set whatever registers in the caller should be set based on the cfa (e.g. rsp). + fn set_cfa(&mut self, val: u64) -> Option<()> { + self.caller_regs.insert("cfa", Reg::from_u64(val)); + Some(()) + } + /// Set whatever registers in the caller should be set based on the return address (e.g. rip). + fn set_ra(&mut self, val: u64) -> Option<()> { + self.caller_regs.insert("ra", Reg::from_u64(val)); + Some(()) + } + } + + impl TestFrameWalker { + fn new(stack: Vec, callee_regs: HashMap<&'static str, Reg>) -> Self { + TestFrameWalker { + stack, + callee_regs, + caller_regs: HashMap::new(), + + // Arbitrary values + instruction: Reg::from_u64(0xF1CEFA32), + has_grand_callee: true, + grand_callee_param_size: 4, + } + } + } + + /// Arbitrary default values in case needed. + fn whatever_win_info() -> StackInfoWin { + StackInfoWin { + address: 0xFEA4A123, + size: 16, + prologue_size: 4, + epilogue_size: 8, + parameter_size: 16, + saved_register_size: 12, + local_size: 24, + max_stack_size: 64, + program_string_or_base_pointer: WinStackThing::AllocatesBasePointer(false), + } + } + + fn build_cfi_rules(init: &str, additional: &[&str]) -> (CfiRules, Vec) { + let init = CfiRules { + address: 0, + rules: init.to_string(), + }; + let additional = additional + .iter() + .enumerate() + .map(|(idx, rules)| CfiRules { + address: idx as u64 + 1, + rules: rules.to_string(), + }) + .collect::>(); + + (init, additional) + } + + #[test] + fn test_stack_win_doc_example() { + // Final output of `ebp=(*16)`, `esp=24`, `eip=(*20)`. + let expr = "$T0 $ebp = $eip $T0 4 + ^ = $ebp $T0 ^ = $esp $T0 8 + ="; + let input = vec![("ebp", 16u32), ("esp", 1600)].into_iter().collect(); + let mut stack = vec![0; 1600]; + + const FINAL_EBP: u32 = 0xFA1EF2E6; + const FINAL_EIP: u32 = 0xB3EF04CE; + + stack[16..20].copy_from_slice(&FINAL_EBP.to_le_bytes()); + stack[20..24].copy_from_slice(&FINAL_EIP.to_le_bytes()); + + let mut walker = TestFrameWalker::new(stack, input); + let info = whatever_win_info(); + + eval_win_expr(expr, &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 3); + assert_eq!(walker.caller_regs["esp"], 24); + assert_eq!(walker.caller_regs["ebp"], FINAL_EBP); + assert_eq!(walker.caller_regs["eip"], FINAL_EIP); + } + + #[test] + fn test_stack_win_ops() { + // Making sure all the operators do what they should. + let input = vec![("esp", 32u32), ("ebp", 1600)].into_iter().collect(); + let stack = vec![0; 1600]; + + let mut walker = TestFrameWalker::new(stack, input); + let info = whatever_win_info(); + + // Addition! + walker.caller_regs.clear(); + eval_win_expr("$esp 1 2 + = $ebp -4 0 + =", &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["esp"], 3); + assert_eq!(walker.caller_regs["ebp"], -4i32 as u32); + + // Subtraction! + walker.caller_regs.clear(); + eval_win_expr("$esp 5 3 - = $ebp -4 2 - =", &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["esp"], 2); + assert_eq!(walker.caller_regs["ebp"], -6i32 as u32); + + // Multiplication! + walker.caller_regs.clear(); + eval_win_expr("$esp 5 3 * = $ebp -4 2 * =", &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["esp"], 15); + assert_eq!(walker.caller_regs["ebp"], -8i32 as u32); + + // Division! + walker.caller_regs.clear(); + eval_win_expr("$esp 5 3 / = $ebp -4 2 / =", &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["esp"], 1); + // TODO: oh no this fails, u64/u32 mismatches ARE a problem... at least + // for this synthetic example! + // assert_eq!(walker.caller_regs["ebp"], -2i32 as u32); + + // Modulo! + walker.caller_regs.clear(); + eval_win_expr("$esp 5 3 % = $ebp -1 2 % = ", &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["esp"], 2); + assert_eq!(walker.caller_regs["ebp"], 1); + + // Align! + walker.caller_regs.clear(); + eval_win_expr("$esp 8 16 @ = $ebp 161 8 @ = ", &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["esp"], 0); + assert_eq!(walker.caller_regs["ebp"], 160); + + // Operator Errors - Missing Inputs + + // + missing args + assert!(eval_win_expr("1 + ", &info, &mut walker).is_none()); + + // - missing args + assert!(eval_win_expr("1 -", &info, &mut walker).is_none()); + + // * missing args + assert!(eval_win_expr("1 *", &info, &mut walker).is_none()); + + // / missing args + assert!(eval_win_expr("1 /", &info, &mut walker).is_none()); + + // % missing args + assert!(eval_win_expr("1 %", &info, &mut walker).is_none()); + + // @ missing args + assert!(eval_win_expr("1 @", &info, &mut walker).is_none()); + + // ^ missing arg + assert!(eval_win_expr("^", &info, &mut walker).is_none()); + + // Operator Errors - Invalid Inputs + + // / by 0 + assert!(eval_win_expr("$esp 1 0 / = $ebp 1 =", &info, &mut walker).is_none()); + + // % by 0 + assert!(eval_win_expr("$esp 1 0 % = $ebp 1 =", &info, &mut walker).is_none()); + + // @ by 0 + assert!(eval_win_expr("$esp 1 0 @ = $ebp 1 =", &info, &mut walker).is_none()); + + // @ not power of 2 + assert!(eval_win_expr("$esp 1 3 @ = $ebp 1 =", &info, &mut walker).is_none()); + } + + #[test] + fn test_stack_win_corners() { + // Making sure all the operators do what they should. + let input = vec![("esp", 32u32), ("ebp", 1600)].into_iter().collect(); + let stack = vec![0; 1600]; + + let mut walker = TestFrameWalker::new(stack, input); + let info = whatever_win_info(); + + // Empty expression is ok, just forward through registers + walker.caller_regs.clear(); + eval_win_expr("", &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["esp"], 32); + assert_eq!(walker.caller_regs["ebp"], 1600); + + // Undef works + walker.caller_regs.clear(); + eval_win_expr("$esp .undef = $ebp .undef =", &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 0); + + // Idempotent works + walker.caller_regs.clear(); + eval_win_expr("$esp $esp = $ebp $ebp =", &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["esp"], 32); + assert_eq!(walker.caller_regs["ebp"], 1600); + + // Trailing garbage in the stack is ok + walker.caller_regs.clear(); + eval_win_expr("$esp 1 = $ebp 2 = 3 4 5", &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["esp"], 1); + assert_eq!(walker.caller_regs["ebp"], 2); + + // Trailing garbage in the stack is ok (with variables) + walker.caller_regs.clear(); + eval_win_expr("$esp 1 = $ebp 2 = 3 4 5 $esp $eax", &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["esp"], 1); + assert_eq!(walker.caller_regs["ebp"], 2); + + // Temporaries don't get assigned to output + walker.caller_regs.clear(); + eval_win_expr("$t0 1 = $esp $t0 5 + = $ebp 2 =", &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["esp"], 6); + assert_eq!(walker.caller_regs["ebp"], 2); + + // Variables can be assigned after they are pushed + walker.caller_regs.clear(); + eval_win_expr("$esp $T0 $T0 2 = = $ebp 3 =", &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["esp"], 2); + assert_eq!(walker.caller_regs["ebp"], 3); + } + + #[test] + fn test_stack_win_errors() { + // Making sure all the operators do what they should. + let input = vec![("esp", 32u32), ("ebp", 1600)].into_iter().collect(); + let stack = vec![0; 1600]; + + let mut walker = TestFrameWalker::new(stack, input); + let info = whatever_win_info(); + + // Deref out of bounds + assert!(eval_win_expr("$esp 2000 ^ =", &info, &mut walker).is_none()); + + // Reading undefined value + assert!(eval_win_expr("$esp $kitties =", &info, &mut walker).is_none()); + + // Reading value before defined + assert!(eval_win_expr("$esp $kitties = $kitties 1 =", &info, &mut walker).is_none()); + + // Reading deleted value + assert!(eval_win_expr("$esp .undef = $ebp $esp =", &info, &mut walker).is_none()); + + // Assigning value to value + assert!(eval_win_expr("0 2 =", &info, &mut walker).is_none()); + + // Assigning variable to value + assert!(eval_win_expr("0 $esp =", &info, &mut walker).is_none()); + + // Variables must start with $ or . + assert!(eval_win_expr("esp 2 = ebp 3 =", &info, &mut walker).is_none()); + } + + #[test] + fn test_stack_win_equal_fixup() { + // Bug in old windows toolchains that sometimes cause = to lose + // its trailing space. Although we would ideally reject this, we're + // at the mercy of what toolchains emit :( + + // TODO: this test currently fails! (hence the #[ignore]) + + let input = vec![("esp", 32u32), ("ebp", 1600)].into_iter().collect(); + let stack = vec![0; 1600]; + + let mut walker = TestFrameWalker::new(stack, input); + let info = whatever_win_info(); + + eval_win_expr("$esp 1 =$ebp 2 =", &info, &mut walker).unwrap(); + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["esp"], 1); + assert_eq!(walker.caller_regs["ebp"], 2); + } + + #[test] + #[ignore] + fn test_stack_win_negative_division() { + // Negative division issues + + // TODO: this test currently fails! (hence the #[ignore]) + + let input = vec![("esp", 32u32), ("ebp", 1600)].into_iter().collect(); + let stack = vec![0; 1600]; + + let mut walker = TestFrameWalker::new(stack, input); + let info = whatever_win_info(); + + // Division! + walker.caller_regs.clear(); + eval_win_expr("$esp 5 3 / = $ebp -4 2 / =", &info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["esp"], 1); + assert_eq!(walker.caller_regs["ebp"], -2i32 as u32); + } + + #[test] + fn test_stack_win_leftover_return_address() { + // The return address on top of the stack (0xABCD_1234) is equal to the callee's eip, indicating + // a return address that was left over from a return. The stackwalker should skip it and + // return the second value on the stack (0xABCD_5678) as the caller's eip. + let stack = vec![0x34, 0x12, 0xCD, 0xAB, 0x78, 0x56, 0xCD, 0xAB]; + let mut walker = TestFrameWalker { + instruction: 0xABCD_1234u32, + has_grand_callee: false, + grand_callee_param_size: 0, + callee_regs: vec![("eip", 0xABCD_1234), ("esp", 0), ("ebp", 17)] + .into_iter() + .collect(), + caller_regs: HashMap::new(), + stack, + }; + + // these are all dummy values + let info = StackInfoWin { + address: 0, + size: 0, + prologue_size: 0, + epilogue_size: 0, + parameter_size: 0, + saved_register_size: 0, + local_size: 0, + max_stack_size: 0, + program_string_or_base_pointer: WinStackThing::AllocatesBasePointer(false), + }; + + walk_with_stack_win_fpo(&info, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs["esp"], 8); + assert_eq!(walker.caller_regs["ebp"], 17); + assert_eq!(walker.caller_regs["eip"], 0xABCD_5678); + } + + #[test] + fn test_stack_cfi_doc_example() { + // Final output of: + // + // cfa = callee_rsp + 24 + // ra = *(cfa - 8) + // rax = *(cfa - 16) + + let init = ".cfa: $rsp 8 + .ra: .cfa -8 + ^"; + let additional = &[".cfa: $rsp 16 + $rax: .cfa -16 + ^", ".cfa: $rsp 24 +"]; + let input = vec![("rsp", 32u64), ("rip", 1600)].into_iter().collect(); + let mut stack = vec![0; 1600]; + + const FINAL_CFA: usize = 32 + 24; + const FINAL_RA: u64 = 0xFA1E_F2E6_A2DF_2B68; + const FINAL_RAX: u64 = 0xB3EF_04CE_4321_FE2A; + + stack[FINAL_CFA - 8..FINAL_CFA].copy_from_slice(&FINAL_RA.to_le_bytes()); + stack[FINAL_CFA - 16..FINAL_CFA - 8].copy_from_slice(&FINAL_RAX.to_le_bytes()); + + let mut walker = TestFrameWalker::new(stack, input); + let (init, additional) = build_cfi_rules(init, additional); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 3); + assert_eq!(walker.caller_regs["cfa"], FINAL_CFA as u64); + assert_eq!(walker.caller_regs["ra"], FINAL_RA); + assert_eq!(walker.caller_regs["rax"], FINAL_RAX); + } + + #[test] + fn test_stack_cfi_ops() { + // Making sure all the operators do what they should, using 32-bit + // to stress truncation issues from u64 <-> u32 mapping of the + // abstraction. + let input = vec![("esp", 32u32), ("eip", 1600)].into_iter().collect(); + let stack = vec![0; 1600]; + + let mut walker = TestFrameWalker::new(stack, input); + + // Addition! + walker.caller_regs.clear(); + let (init, additional) = build_cfi_rules(".cfa: 1 2 + .ra: -4 0 +", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["cfa"], 3); + assert_eq!(walker.caller_regs["ra"], -4i32 as u32); + + // Subtraction! + walker.caller_regs.clear(); + let (init, additional) = build_cfi_rules(".cfa: 5 3 - .ra: -4 2 -", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["cfa"], 2); + assert_eq!(walker.caller_regs["ra"], -6i32 as u32); + + // Multiplication! + walker.caller_regs.clear(); + let (init, additional) = build_cfi_rules(".cfa: 5 3 * .ra: -4 2 *", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["cfa"], 15); + assert_eq!(walker.caller_regs["ra"], -8i32 as u32); + + // Division! + walker.caller_regs.clear(); + let (init, additional) = build_cfi_rules(".cfa: 5 3 / .ra: -4 2 /", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["cfa"], 1); + assert_eq!(walker.caller_regs["ra"], -2i32 as u32); + + // Modulo! + walker.caller_regs.clear(); + let (init, additional) = build_cfi_rules(".cfa: 5 3 % .ra: -1 2 %", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["cfa"], 2); + assert_eq!(walker.caller_regs["ra"], 1); + + // Align! + walker.caller_regs.clear(); + let (init, additional) = build_cfi_rules(".cfa: 8 16 @ .ra: 161 8 @", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["cfa"], 0); + assert_eq!(walker.caller_regs["ra"], 160); + + // Operator Errors - Missing Inputs + + // + missing args + let (init, additional) = build_cfi_rules(".cfa: 1 + .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // - missing args + let (init, additional) = build_cfi_rules(".cfa: 1 - .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // * missing args + let (init, additional) = build_cfi_rules(".cfa: 1 * .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // / missing args + let (init, additional) = build_cfi_rules(".cfa: 1 / .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // % missing args + let (init, additional) = build_cfi_rules(".cfa: 1 % .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // @ missing args + let (init, additional) = build_cfi_rules(".cfa: 1 @ .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // ^ missing arg + let (init, additional) = build_cfi_rules(".cfa: ^ .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // Operator Errors - Invalid Inputs + + // / by 0 + let (init, additional) = build_cfi_rules(".cfa: 1 0 / .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // % by 0 + let (init, additional) = build_cfi_rules(".cfa: 1 0 % .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // @ by 0 + let (init, additional) = build_cfi_rules(".cfa: 1 0 @ .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // @ not power of 2 + let (init, additional) = build_cfi_rules(".cfa: 1 3 @ .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + } + + #[test] + fn test_stack_cfi_errors() { + // Checking various issues that we should bail on + let input = vec![("rsp", 32u64), ("rip", 1600)].into_iter().collect(); + let stack = vec![0; 1600]; + + let mut walker = TestFrameWalker::new(stack, input); + + // Basic syntax + + // Missing .ra + let (init, additional) = build_cfi_rules(".cfa: 8 16 +", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // Missing .cfa + let (init, additional) = build_cfi_rules(".ra: 8 16 *", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // No : at all + let (init, additional) = build_cfi_rules(".cfa 8 16 *", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // Doesn't start with a REG + let (init, additional) = build_cfi_rules(".esp 8 16 * .cfa: 16 .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // .cfa has extra junk on stack + let (init, additional) = build_cfi_rules(".cfa: 8 12 .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // REG has empty expr (trailing) + let (init, additional) = build_cfi_rules(".cfa: 12 .ra: 8 $rax:", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // REG has empty expr (trailing with space) + let (init, additional) = build_cfi_rules(".cfa: 12 .ra: 8 $rax: ", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // REG has empty expr (middle) + let (init, additional) = build_cfi_rules(".cfa: 12 .ra: 8 $rax: $rbx: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // Make sure = operator isn't supported in this implementation + let (init, additional) = build_cfi_rules(".cfa: 12 .ra: $rsp $rip =", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // .cfa is undef + let (init, additional) = build_cfi_rules(".cfa: .undef .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // .ra is undef + let (init, additional) = build_cfi_rules(".cfa: 8 .ra: .undef", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // Reading out of bounds + let (init, additional) = build_cfi_rules(".cfa: 2000 ^ .ra: 8", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // Reading fake $reg + let (init, additional) = build_cfi_rules(".cfa: 8 .ra: $kitties", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // Reading real but still undefined $reg + let (init, additional) = build_cfi_rules(".cfa: 8 .ra: $rax", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // Reading .cfa for .cfa's own value + let (init, additional) = build_cfi_rules(".cfa: .cfa .ra: 2", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // Reading .ra for .cfa's value + let (init, additional) = build_cfi_rules(".cfa: .ra .ra: 2", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // Reading .ra for .ra's value + let (init, additional) = build_cfi_rules(".cfa: 1 .ra: .ra", &[]); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + + // Malformed doc example shouldn't work (found while typoing docs) + // Note the first .cfa in the additional lines has no `:`! + let (init, additional) = build_cfi_rules( + ".cfa: $rsp 8 + .ra: .cfa -8 + ^", + &[".cfa $rsp 16 + $rax: .cfa -16 + ^", ".cfa $rsp 24 +"], + ); + assert!(walk_with_stack_cfi(&init, &additional, &mut walker).is_none()); + } + + #[test] + fn test_stack_cfi_corners() { + // Checking various issues that we should bail on + let input = vec![("rsp", 32u64), ("rip", 1600)].into_iter().collect(); + let stack = vec![0; 1600]; + + let mut walker = TestFrameWalker::new(stack, input); + + // Just a value for each reg (no ops to execute) + walker.caller_regs.clear(); + let (init, additional) = build_cfi_rules(".cfa: 8 .ra: 12 $rax: 16", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 3); + assert_eq!(walker.caller_regs["cfa"], 8); + assert_eq!(walker.caller_regs["ra"], 12); + assert_eq!(walker.caller_regs["rax"], 16); + + // Undef $REGs are ok, Undef in the middle of expr ok + walker.caller_regs.clear(); + let (init, additional) = + build_cfi_rules(".cfa: 8 .ra: 12 $rax: .undef $rbx: 1 .undef +", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["cfa"], 8); + assert_eq!(walker.caller_regs["ra"], 12); + + // Unknown $reg output is ok; evaluated but value discarded + walker.caller_regs.clear(); + let (init, additional) = build_cfi_rules(".cfa: 8 .ra: 12 $kitties: 16", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["cfa"], 8); + assert_eq!(walker.caller_regs["ra"], 12); + + // Smooshed regs are garbage but we don't validate the string so it should work + // the same as an unknown reg (dubious behaviour but hey let's be aware of it). + walker.caller_regs.clear(); + let (init, additional) = build_cfi_rules(".cfa: 12 .ra: 8 $rax:$rbx: 8", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["cfa"], 12); + assert_eq!(walker.caller_regs["ra"], 8); + + // Evaluation errors for $reg output ok; value is discarded + walker.caller_regs.clear(); + let (init, additional) = build_cfi_rules(".cfa: 1 .ra: 8 $rax: 1 0 /", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["cfa"], 1); + assert_eq!(walker.caller_regs["ra"], 8); + + // Duplicate records are ok (use the later one) + walker.caller_regs.clear(); + let (init, additional) = + build_cfi_rules(".cfa: 1 .cfa: 2 .ra: 3 .ra: 4 $rax: 5 $rax: 6", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 3); + assert_eq!(walker.caller_regs["cfa"], 2); + assert_eq!(walker.caller_regs["ra"], 4); + assert_eq!(walker.caller_regs["rax"], 6); + + // Using .cfa works fine + walker.caller_regs.clear(); + let (init, additional) = build_cfi_rules(".cfa: 7 .ra: .cfa 1 + $rax: .cfa 2 -", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 3); + assert_eq!(walker.caller_regs["cfa"], 7); + assert_eq!(walker.caller_regs["ra"], 8); + assert_eq!(walker.caller_regs["rax"], 5); + + // Reading .ra for $REG's value is ok; value is discarded + walker.caller_regs.clear(); + let (init, additional) = build_cfi_rules(".cfa: 1 .ra: 2 $rax: .ra", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["cfa"], 1); + assert_eq!(walker.caller_regs["ra"], 2); + + // Undefined destination .reg is assumed to be an ARM-style register, is dropped + let (init, additional) = build_cfi_rules(".cfa: 8 .ra: 12 .kitties: 16", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["cfa"], 8); + assert_eq!(walker.caller_regs["ra"], 12); + + // Trying to write to .undef is assumed to be an ARM-style register, is dropped + let (init, additional) = build_cfi_rules(".cfa: 8 .ra: 12 .undef: 16", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + assert_eq!(walker.caller_regs.len(), 2); + assert_eq!(walker.caller_regs["cfa"], 8); + assert_eq!(walker.caller_regs["ra"], 12); + } + + #[test] + fn test_stack_cfi_arm() { + // ARM doesn't prefix registers with $ + // Checking various issues that we should bail on + let input = vec![("pc", 32u64), ("x11", 1600)].into_iter().collect(); + let stack = vec![0; 1600]; + + let mut walker = TestFrameWalker::new(stack, input); + + // Just a value for each reg (no ops to execute) + walker.caller_regs.clear(); + let (init, additional) = build_cfi_rules(".cfa: 8 .ra: 12 x11: 16 x12: x11 .cfa +", &[]); + walk_with_stack_cfi(&init, &additional, &mut walker).unwrap(); + + assert_eq!(walker.caller_regs.len(), 4); + assert_eq!(walker.caller_regs["cfa"], 8); + assert_eq!(walker.caller_regs["ra"], 12); + assert_eq!(walker.caller_regs["x11"], 16); + assert_eq!(walker.caller_regs["x12"], 1608); + } +} diff --git a/third_party/rust/cachemap2/.cargo-checksum.json b/third_party/rust/cachemap2/.cargo-checksum.json new file mode 100644 index 000000000000..ee326a625ba6 --- /dev/null +++ b/third_party/rust/cachemap2/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"04431cabfb5ac68f7bfc57cee9777a69d22c8375081a87a6957daf7d70c4bff7","ChangeLog.md":"7f2bf552c898421a93086be57074bbc3063ac25576eb7e56dc95035b97e31131","LICENSE":"b5b44ae2ab9e1ef50d9aeba9686c5cf3ec666b402420cb3abf98caf996755d6e","README.md":"6ebb8d035c049bac3ae17ece73378a75df7542b0177d04470199ecce178383be","src/dashmap_impl.rs":"8708bc2e0b3803ebb0a19ba3025c7d465d5a4dc7f1ee4a4fbc7411871c21b19d","src/hashmap_impl.rs":"3571e8c921a77a9b97edf4987000d8e5d7f2548208bc5b59d09ecf375ae101a9","src/lib.rs":"20b4db6ca813533c07717a25d19436b1c4a7d430691b8f5a6568f005909335f5"},"package":"68ccbd3153aa153b2f5eff557537ffce81e4dd6c50ae0eddc41dc8d0c388436f"} \ No newline at end of file diff --git a/third_party/rust/cachemap2/Cargo.toml b/third_party/rust/cachemap2/Cargo.toml new file mode 100644 index 000000000000..a822f71acf25 --- /dev/null +++ b/third_party/rust/cachemap2/Cargo.toml @@ -0,0 +1,39 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +name = "cachemap2" +version = "0.3.0" +authors = ["Alex Franchuk "] +description = "A concurrent insert-only hashmap for caching values" +homepage = "https://github.com/afranchuk/cachemap2" +readme = "README.md" +keywords = [ + "sync", + "data-structure", + "cache", + "hash-map", +] +categories = [ + "concurrency", + "data-structures", + "caching", +] +license = "MIT" + +[dependencies.abi_stable] +version = ">=0.9" +optional = true + +[dependencies.dashmap] +version = "5.1" +optional = true diff --git a/third_party/rust/cachemap2/ChangeLog.md b/third_party/rust/cachemap2/ChangeLog.md new file mode 100644 index 000000000000..158b1a98a147 --- /dev/null +++ b/third_party/rust/cachemap2/ChangeLog.md @@ -0,0 +1,10 @@ +# cachemap2 + +## 0.3.0 -- 2024-01-19 +* Add support for custom hashers (#1), thanks @zohnannor. + +### Breaking changes +* The minimum version of dashmap is upped to 5.1. + +## 0.2.0 -- 2023-03-21 +* Initial forked release. diff --git a/third_party/rust/cachemap2/LICENSE b/third_party/rust/cachemap2/LICENSE new file mode 100644 index 000000000000..c95b98dd20ba --- /dev/null +++ b/third_party/rust/cachemap2/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 hclarke + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/rust/cachemap2/README.md b/third_party/rust/cachemap2/README.md new file mode 100644 index 000000000000..a742933e2509 --- /dev/null +++ b/third_party/rust/cachemap2/README.md @@ -0,0 +1,36 @@ +# CacheMap + +CacheMap is a data structure for concurrently caching values. + +The `cache` function will look up a value in the map, or generate and store a new one using the +provided function. + +This is a updated and maintained fork of [hclarke/cachemap](https://github.com/hclarke/cachemap). + +## Example + +``` +use cachemap::CacheMap; + +let m = CacheMap::new(); + +let fst = m.cache("key", || 5u32); +let snd = m.cache("key", || 7u32); + +assert_eq!(*fst, *snd); +assert_eq!(*fst, 5u32); +``` + +## Features + +- Can cache values concurrently (using `&CacheMap` rather than `&mut CacheMap`). +- Returned references use the map's lifetime, so clients can avoid smart pointers. +- Clients can optionally enable the `dashmap` feature, which uses `dashmap` internally and allows: + - getting `Arc` pointers, in case values need to outlive the map, and + - adding `Arc` directly, allowing unsized values, and re-using `Arc`s from elsewhere. +- Clients can optionally enable the `abi_stable` feature which will derive `abi_stable::StableAbi` + on the type. + +## AntiFeatures + +- There is no cache invalidation: the only way to remove things from a CacheMap is to drop it. diff --git a/third_party/rust/cachemap2/src/dashmap_impl.rs b/third_party/rust/cachemap2/src/dashmap_impl.rs new file mode 100644 index 000000000000..1c1995f1cece --- /dev/null +++ b/third_party/rust/cachemap2/src/dashmap_impl.rs @@ -0,0 +1,248 @@ +use dashmap::DashMap; +use std::ops::Deref; +use std::sync::Arc; +use std::{collections::hash_map::RandomState, hash::Hash}; +use std::{hash::BuildHasher, marker::PhantomData}; + +/// An insert-only map for caching the result of functions +pub struct CacheMap { + inner: DashMap, S>, +} + +/// A handle that can be converted to a &T or an Arc +pub struct ArcRef<'a, T: ?Sized> { + // this pointer never gets dereferenced, but it has to be T, so that Ref is the right size for wide pointers + #[allow(dead_code)] + fake_ptr: *const T, + phantom: PhantomData<&'a T>, +} + +impl<'a, T: ?Sized> Clone for ArcRef<'a, T> { + fn clone(&self) -> Self { + *self + } +} +impl<'a, T: ?Sized> Copy for ArcRef<'a, T> {} + +impl Deref for ArcRef<'_, T> { + type Target = Arc; + fn deref(&self) -> &Self::Target { + unsafe { std::mem::transmute(self) } + } +} + +impl<'a, T: ?Sized> ArcRef<'a, T> { + /// Converts the ArcRef into an Arc + pub fn to_arc(self) -> Arc { + self.deref().clone() + } + + /// Converts the ArcRef into a &T + pub fn as_ref(self) -> &'a T { + let ptr = &**self as *const T; + unsafe { &*ptr } + } +} + +impl Default for CacheMap { + fn default() -> Self { + CacheMap { + inner: Default::default(), + } + } +} + +impl std::iter::FromIterator<(K, V)> + for CacheMap +{ + fn from_iter(iter: T) -> Self + where + T: IntoIterator, + { + CacheMap { + inner: iter.into_iter().map(|(k, v)| (k, Arc::new(v))).collect(), + } + } +} + +pub struct IntoIter(dashmap::iter::OwningIter, S>); + +impl Iterator for IntoIter { + type Item = (K, Arc); + + fn next(&mut self) -> Option { + self.0.next() + } +} + +impl IntoIterator for CacheMap { + type Item = (K, Arc); + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter(self.inner.into_iter()) + } +} + +impl CacheMap { + /// Fetch the value associated with the key, or run the provided function to insert one. + /// + /// # Example + /// + /// ``` + /// use cachemap2::CacheMap; + /// + /// let m = CacheMap::new(); + /// + /// let fst = m.cache("key", || 5u32).as_ref(); + /// let snd = m.cache("key", || 7u32).as_ref(); + /// + /// assert_eq!(*fst, *snd); + /// assert_eq!(*fst, 5u32); + /// ``` + pub fn cache V>(&self, key: K, f: F) -> ArcRef<'_, V> { + self.cache_arc(key, || Arc::new(f())) + } + + /// Fetch the value associated with the key, or insert a default value. + pub fn cache_default(&self, key: K) -> ArcRef<'_, V> + where + V: Default, + { + self.cache(key, || Default::default()) + } + + /// Return whether the map contains the given key. + pub fn contains_key(&self, key: &Q) -> bool + where + K: std::borrow::Borrow, + Q: Hash + Eq, + { + self.inner.contains_key(key) + } +} + +impl CacheMap { + /// Creates a new CacheMap + pub fn new() -> Self { + CacheMap { + inner: DashMap::new(), + } + } +} + +impl CacheMap { + /// Creates a new CacheMap with the provided hasher + pub fn with_hasher(hash_builder: S) -> Self { + Self { + inner: DashMap::with_hasher(hash_builder), + } + } + + /// Fetch the value associated with the key, or run the provided function to insert one. + /// With this version, the function returns an Arc, whch allows caching unsized types. + /// + /// # Example + /// + /// ``` + /// use cachemap2::CacheMap; + /// + /// let m: CacheMap<_, [usize]> = CacheMap::new(); + /// + /// let a = m.cache_arc("a", || { + /// let a = &[1,2,3][..]; + /// a.into() + /// }).as_ref(); + /// + /// let b = m.cache_arc("b", || { + /// let b = &[9,9][..]; + /// b.into() + /// }).as_ref(); + /// + /// assert_eq!(a, &[1,2,3]); + /// assert_eq!(b, &[9,9]); + /// ``` + pub fn cache_arc Arc>(&self, key: K, f: F) -> ArcRef<'_, V> { + let val = self.inner.entry(key).or_insert_with(f); + let arc: &Arc = &*val; + let arc_ref: &ArcRef<'_, V> = unsafe { std::mem::transmute(arc) }; + *arc_ref + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn single_insert() { + let m = CacheMap::new(); + + let a = m.cache("key", || 21u32).as_ref(); + assert_eq!(21, *a); + } + + #[test] + fn contains_key() { + let m = CacheMap::new(); + + m.cache("key", || 21u32); + assert!(m.contains_key("key")); + assert!(!m.contains_key("other")); + } + + #[test] + fn double_insert() { + let m = CacheMap::new(); + + let a = m.cache("key", || 5u32).as_ref(); + let b = m.cache("key", || 7u32).as_ref(); + + assert_eq!(*a, *b); + assert_eq!(5, *a); + } + + #[test] + fn insert_two() { + let m = CacheMap::new(); + + let a = m.cache("a", || 5u32).as_ref(); + let b = m.cache("b", || 7u32).as_ref(); + + assert_eq!(5, *a); + assert_eq!(7, *b); + + let c = m.cache("a", || 9u32).as_ref(); + let d = m.cache("b", || 11u32).as_ref(); + + assert_eq!(*a, *c); + assert_eq!(*b, *d); + + assert_eq!(5, *a); + assert_eq!(7, *b); + } + + #[test] + fn use_after_drop() { + #[derive(Clone)] + struct Foo(usize); + impl Drop for Foo { + fn drop(&mut self) { + assert_eq!(33, self.0); + } + } + + { + let mut arc = { + let m = CacheMap::new(); + let a = m.cache("key", || Foo(99)).to_arc(); + assert_eq!(99, (*a).0); + a + }; + + Arc::make_mut(&mut arc).0 = 33; + } + + assert!(true); + } +} diff --git a/third_party/rust/cachemap2/src/hashmap_impl.rs b/third_party/rust/cachemap2/src/hashmap_impl.rs new file mode 100644 index 000000000000..d31001d3c267 --- /dev/null +++ b/third_party/rust/cachemap2/src/hashmap_impl.rs @@ -0,0 +1,276 @@ +use std::collections::hash_map::RandomState; +use std::hash::{BuildHasher, Hash}; + +#[cfg(not(feature = "abi_stable"))] +mod basic_impl { + pub type BoxImpl = Box; + pub type HashMapImpl = std::collections::HashMap; + pub type MutexImpl = std::sync::Mutex; + pub type MutexGuardImpl<'a, T> = std::sync::MutexGuard<'a, T>; + pub type IterImpl<'a, K, V> = std::collections::hash_map::Iter<'a, K, V>; + pub type IntoIterImpl = std::collections::hash_map::IntoIter; + + pub fn box_into_inner_impl(b: BoxImpl) -> T { + *b + } + + pub fn mutex_lock_impl<'a, T>(m: &'a MutexImpl) -> MutexGuardImpl<'a, T> { + m.lock().unwrap() + } + + pub fn mutex_into_inner_impl(m: MutexImpl) -> T { + m.into_inner().unwrap() + } +} + +#[cfg(not(feature = "abi_stable"))] +use basic_impl::*; + +#[cfg(feature = "abi_stable")] +mod abi_stable_impl { + use abi_stable::{ + external_types::RMutex, + std_types::{RBox, RHashMap}, + }; + pub type BoxImpl = RBox; + pub type HashMapImpl = RHashMap; + pub type MutexImpl = RMutex; + pub type MutexGuardImpl<'a, T> = + abi_stable::external_types::parking_lot::mutex::RMutexGuard<'a, T>; + pub type IterImpl<'a, K, V> = abi_stable::std_types::map::Iter<'a, K, V>; + pub type IntoIterImpl = abi_stable::std_types::map::IntoIter; + + pub fn box_into_inner_impl(b: BoxImpl) -> T { + RBox::into_inner(b) + } + + pub fn mutex_lock_impl<'a, T>(m: &'a MutexImpl) -> MutexGuardImpl<'a, T> { + m.lock() + } + + pub fn mutex_into_inner_impl(m: MutexImpl) -> T { + m.into_inner() + } +} + +#[cfg(feature = "abi_stable")] +use abi_stable_impl::*; + +/// An insert-only map for caching the result of functions +#[cfg_attr(feature = "abi_stable", derive(abi_stable::StableAbi))] +#[cfg_attr(feature = "abi_stable", repr(C))] +pub struct CacheMap { + inner: MutexImpl, S>>, +} + +impl Default for CacheMap { + fn default() -> Self { + CacheMap { + inner: MutexImpl::new(Default::default()), + } + } +} + +impl std::iter::FromIterator<(K, V)> + for CacheMap +{ + fn from_iter(iter: T) -> Self + where + T: IntoIterator, + { + CacheMap { + inner: MutexImpl::new( + iter.into_iter() + .map(|(k, v)| (k, BoxImpl::new(v))) + .collect(), + ), + } + } +} + +pub struct IntoIter(IntoIterImpl>); + +impl Iterator for IntoIter { + type Item = (K, V); + + fn next(&mut self) -> Option { + self.0.next().map(|t| (t.0, box_into_inner_impl(t.1))) + } +} + +impl IntoIterator for CacheMap { + type Item = (K, V); + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter(mutex_into_inner_impl(self.inner).into_iter()) + } +} + +pub struct Iter<'a, K, V, S> { + iter: IterImpl<'a, K, BoxImpl>, + _guard: MutexGuardImpl<'a, HashMapImpl, S>>, +} + +impl<'a, K, V, S> Iterator for Iter<'a, K, V, S> { + type Item = (&'a K, &'a V); + + fn next(&mut self) -> Option { + self.iter.next().map(|t| (t.0, t.1.as_ref())) + } +} + +impl<'a, K, V, S> IntoIterator for &'a CacheMap { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V, S>; + + fn into_iter(self) -> Self::IntoIter { + let guard = mutex_lock_impl(&self.inner); + let iter = unsafe { + std::mem::transmute::>, IterImpl>>(guard.iter()) + }; + Iter { + iter, + _guard: guard, + } + } +} + +impl CacheMap { + /// Fetch the value associated with the key, or run the provided function to insert one. + /// + /// # Example + /// + /// ``` + /// use cachemap2::CacheMap; + /// + /// let m = CacheMap::new(); + /// + /// let fst = m.cache("key", || 5u32); + /// let snd = m.cache("key", || 7u32); + /// + /// assert_eq!(*fst, *snd); + /// assert_eq!(*fst, 5u32); + /// ``` + pub fn cache V>(&self, key: K, f: F) -> &V { + let v = std::ptr::NonNull::from( + mutex_lock_impl(&self.inner) + .entry(key) + .or_insert_with(|| BoxImpl::new(f())) + .as_ref(), + ); + // Safety: We only support adding entries to the hashmap, and as long as a reference is + // maintained the value will be present. + unsafe { v.as_ref() } + } + + /// Fetch the value associated with the key, or insert a default value. + pub fn cache_default(&self, key: K) -> &V + where + V: Default, + { + self.cache(key, || Default::default()) + } + + /// Return whether the map contains the given key. + pub fn contains_key(&self, key: &Q) -> bool + where + K: std::borrow::Borrow, + Q: Hash + Eq, + { + mutex_lock_impl(&self.inner).contains_key(key) + } + + /// Return an iterator over the map. + /// + /// This iterator will lock the underlying map until it is dropped. + pub fn iter(&self) -> Iter { + self.into_iter() + } +} + +impl CacheMap { + /// Creates a new CacheMap + pub fn new() -> Self { + Default::default() + } +} + +impl CacheMap { + /// Creates a new CacheMap with the provided hasher + pub fn with_hasher(hash_builder: S) -> Self { + Self { + inner: MutexImpl::new(HashMapImpl::with_hasher(hash_builder)), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn single_insert() { + let m = CacheMap::new(); + + let a = m.cache("key", || 21u32); + assert_eq!(21, *a); + } + + #[test] + fn contains_key() { + let m = CacheMap::new(); + + m.cache("key", || 21u32); + assert!(m.contains_key("key")); + assert!(!m.contains_key("other")); + } + + #[test] + fn double_insert() { + let m = CacheMap::new(); + + let a = m.cache("key", || 5u32); + let b = m.cache("key", || 7u32); + + assert_eq!(*a, *b); + assert_eq!(5, *a); + } + + #[test] + fn insert_two() { + let m = CacheMap::new(); + + let a = m.cache("a", || 5u32); + let b = m.cache("b", || 7u32); + + assert_eq!(5, *a); + assert_eq!(7, *b); + + let c = m.cache("a", || 9u32); + let d = m.cache("b", || 11u32); + + assert_eq!(*a, *c); + assert_eq!(*b, *d); + + assert_eq!(5, *a); + assert_eq!(7, *b); + } + + #[test] + fn iter() { + use std::collections::HashMap; + use std::iter::FromIterator; + let m = CacheMap::new(); + m.cache("a", || 5u32); + m.cache("b", || 7u32); + + let mut expected = HashMap::<&'static str, u32>::from_iter([("a", 5u32), ("b", 7u32)]); + + for (k, v) in &m { + assert!(expected.remove(k).expect("unexpected key") == *v); + } + + assert!(expected.is_empty()); + } +} diff --git a/third_party/rust/cachemap2/src/lib.rs b/third_party/rust/cachemap2/src/lib.rs new file mode 100644 index 000000000000..dc5691b2307a --- /dev/null +++ b/third_party/rust/cachemap2/src/lib.rs @@ -0,0 +1,14 @@ +#[cfg(feature = "dashmap")] +mod dashmap_impl; + +#[cfg(feature = "dashmap")] +pub use dashmap_impl::*; + +#[cfg(all(feature = "dashmap", feature = "abi_stable"))] +compile_error!("abi_stable and dashmap features cannot be used together"); + +#[cfg(not(feature = "dashmap"))] +mod hashmap_impl; + +#[cfg(not(feature = "dashmap"))] +pub use hashmap_impl::*; diff --git a/third_party/rust/circular/.cargo-checksum.json b/third_party/rust/circular/.cargo-checksum.json new file mode 100644 index 000000000000..f20a5de4a979 --- /dev/null +++ b/third_party/rust/circular/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"8b979e7165afbae65030082527ef11775cbf311e8406018260f60289695f2164","LICENSE":"cef6497a64266f9c392ee9134aceb82739dadd176422fead980102109bf46d10","README.md":"718abbc1f45007f3c8b54e922abfc3c6e15c2d9eb70163cd1233ee17d4035343","src/lib.rs":"44c514d6556c7a1b130a7fd30246d3d02c4fe3305c2bb222a45c3f4970a5bca4"},"package":"b0fc239e0f6cb375d2402d48afb92f76f5404fd1df208a41930ec81eda078bea"} \ No newline at end of file diff --git a/third_party/rust/circular/Cargo.toml b/third_party/rust/circular/Cargo.toml new file mode 100644 index 000000000000..d71fa437c0e4 --- /dev/null +++ b/third_party/rust/circular/Cargo.toml @@ -0,0 +1,23 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +name = "circular" +version = "0.3.0" +authors = ["Geoffroy Couprie "] +include = ["LICENSE", "README.md", ".gitignore", ".travis.yml", "Cargo.toml", "src/*.rs"] +description = "A stream abstraction designed for use with nom" +readme = "README.md" +license = "MIT" +repository = "https://github.com/sozu-proxy/circular" + +[dependencies] diff --git a/third_party/rust/circular/LICENSE b/third_party/rust/circular/LICENSE new file mode 100644 index 000000000000..6980919be437 --- /dev/null +++ b/third_party/rust/circular/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2017 Geoffroy Couprie + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/circular/README.md b/third_party/rust/circular/README.md new file mode 100644 index 000000000000..3b6ab9102dd4 --- /dev/null +++ b/third_party/rust/circular/README.md @@ -0,0 +1,5 @@ +# Circular + +Circular is a stream abstraction designed for use with nom. It can expose the +available data, a mutable slice of the available space, and it separates +reading data from actually consuming it from the buffer. diff --git a/third_party/rust/circular/src/lib.rs b/third_party/rust/circular/src/lib.rs new file mode 100644 index 000000000000..1b0e13ddd308 --- /dev/null +++ b/third_party/rust/circular/src/lib.rs @@ -0,0 +1,415 @@ +//! Circular, a stream abstraction designed for use with nom +//! +//! Circular provides a `Buffer` type that wraps a `Vec` with a position +//! and end. Compared to a stream abstraction that would use `std::io::Read`, +//! it separates the reading and consuming phases. `Read` is designed to write +//! the data in a mutable slice and consume it from the stream as it does that. +//! +//! When used in streaming mode, nom will try to parse a slice, then tell you +//! how much it consumed. So you don't know how much data was actually used +//! until the parser returns. `Circular::Buffer` exposes a `data()` method +//! that gives an immutable slice of all the currently readable data, +//! and a `consume()` method to advance the position in the stream. +//! The `space()` and `fill()` methods are the write counterparts to those methods. +//! +//! ``` +//! extern crate circular; +//! +//! use circular::Buffer; +//! use std::io::Write; +//! +//! fn main() { +//! +//! // allocate a new Buffer +//! let mut b = Buffer::with_capacity(10); +//! assert_eq!(b.available_data(), 0); +//! assert_eq!(b.available_space(), 10); +//! +//! let res = b.write(&b"abcd"[..]); +//! assert_eq!(res.ok(), Some(4)); +//! assert_eq!(b.available_data(), 4); +//! assert_eq!(b.available_space(), 6); +//! +//! //the 4 bytes we wrote are immediately available and usable for parsing +//! assert_eq!(b.data(), &b"abcd"[..]); +//! +//! // this will advance the position from 0 to 2. it does not modify the underlying Vec +//! b.consume(2); +//! assert_eq!(b.available_data(), 2); +//! assert_eq!(b.available_space(), 6); +//! assert_eq!(b.data(), &b"cd"[..]); +//! +//! // shift moves the available data at the beginning of the buffer. +//! // the position is now 0 +//! b.shift(); +//! assert_eq!(b.available_data(), 2); +//! assert_eq!(b.available_space(), 8); +//! assert_eq!(b.data(), &b"cd"[..]); +//! } +//! +use std::{cmp, ptr}; +use std::io::{self,Write,Read}; +use std::iter::repeat; + +/// the Buffer contains the underlying memory and data positions +/// +/// In all cases, `0 ≤ position ≤ end ≤ capacity` should be true +#[derive(Debug,PartialEq,Clone)] +pub struct Buffer { + /// the Vec containing the data + memory: Vec, + /// the current capacity of the Buffer + capacity: usize, + /// the current beginning of the available data + position: usize, + /// the current end of the available data + /// and beginning of the available space + end: usize +} + +impl Buffer { + /// allocates a new buffer of maximum size `capacity` + pub fn with_capacity(capacity: usize) -> Buffer { + let mut v = Vec::with_capacity(capacity); + v.extend(repeat(0).take(capacity)); + Buffer { + memory: v, + capacity: capacity, + position: 0, + end: 0 + } + } + + /// allocates a new buffer containing the slice `data` + /// + /// the buffer starts full, its available data size is exactly `data.len()` + pub fn from_slice(data: &[u8]) -> Buffer { + Buffer { + memory: Vec::from(data), + capacity: data.len(), + position: 0, + end: data.len() + } + } + + /// increases the size of the buffer + /// + /// this does nothing if the buffer is already large enough + pub fn grow(&mut self, new_size: usize) -> bool { + if self.capacity >= new_size { + return false; + } + + self.memory.resize(new_size, 0); + self.capacity = new_size; + true + } + + /// returns how much data can be read from the buffer + pub fn available_data(&self) -> usize { + self.end - self.position + } + + /// returns how much free space is available to write to + pub fn available_space(&self) -> usize { + self.capacity - self.end + } + + /// returns the underlying vector's size + pub fn capacity(&self) -> usize { + self.capacity + } + + /// returns true if there is no more data to read + pub fn empty(&self) -> bool { + self.position == self.end + } + + /// advances the position tracker + /// + /// if the position gets past the buffer's half, + /// this will call `shift()` to move the remaining data + /// to the beginning of the buffer + pub fn consume(&mut self, count: usize) -> usize { + let cnt = cmp::min(count, self.available_data()); + self.position += cnt; + if self.position > self.capacity / 2 { + //trace!("consume shift: pos {}, end {}", self.position, self.end); + self.shift(); + } + cnt + } + + /// advances the position tracker + /// + /// This method is similar to `consume()` but will not move data + /// to the beginning of the buffer + pub fn consume_noshift(&mut self, count: usize) -> usize { + let cnt = cmp::min(count, self.available_data()); + self.position += cnt; + cnt + } + + /// after having written data to the buffer, use this function + /// to indicate how many bytes were written + /// + /// if there is not enough available space, this function can call + /// `shift()` to move the remaining data to the beginning of the + /// buffer + pub fn fill(&mut self, count: usize) -> usize { + let cnt = cmp::min(count, self.available_space()); + self.end += cnt; + if self.available_space() < self.available_data() + cnt { + //trace!("fill shift: pos {}, end {}", self.position, self.end); + self.shift(); + } + + cnt + } + + /// Get the current position + /// + /// # Examples + /// ``` + /// use circular::Buffer; + /// use std::io::{Read,Write}; + /// + /// let mut output = [0;5]; + /// + /// let mut b = Buffer::with_capacity(10); + /// + /// let res = b.write(&b"abcdefgh"[..]); + /// + /// b.read(&mut output); + /// + /// // Position must be 5 + /// assert_eq!(b.position(), 5); + /// assert_eq!(b.available_data(), 3); + /// ``` + pub fn position(&self) -> usize { + self.position + } + + /// moves the position and end trackers to the beginning + /// this function does not modify the data + pub fn reset(&mut self) { + self.position = 0; + self.end = 0; + } + + /// returns a slice with all the available data + pub fn data(&self) -> &[u8] { + &self.memory[self.position..self.end] + } + + /// returns a mutable slice with all the available space to + /// write to + pub fn space(&mut self) -> &mut[u8] { + &mut self.memory[self.end..self.capacity] + } + + /// moves the data at the beginning of the buffer + /// + /// if the position was more than 0, it is now 0 + pub fn shift(&mut self) { + if self.position > 0 { + unsafe { + let length = self.end - self.position; + ptr::copy( (&self.memory[self.position..self.end]).as_ptr(), (&mut self.memory[..length]).as_mut_ptr(), length); + self.position = 0; + self.end = length; + } + } + } + + //FIXME: this should probably be rewritten, and tested extensively + #[doc(hidden)] + pub fn delete_slice(&mut self, start: usize, length: usize) -> Option { + if start + length >= self.available_data() { + return None + } + + unsafe { + let begin = self.position + start; + let next_end = self.end - length; + ptr::copy( + (&self.memory[begin+length..self.end]).as_ptr(), + (&mut self.memory[begin..next_end]).as_mut_ptr(), + self.end - (begin+length) + ); + self.end = next_end; + } + Some(self.available_data()) + } + + //FIXME: this should probably be rewritten, and tested extensively + #[doc(hidden)] + pub fn replace_slice(&mut self, data: &[u8], start: usize, length: usize) -> Option { + let data_len = data.len(); + if start + length > self.available_data() || + self.position + start + data_len > self.capacity { + return None + } + + unsafe { + let begin = self.position + start; + let slice_end = begin + data_len; + // we reduced the data size + if data_len < length { + ptr::copy(data.as_ptr(), (&mut self.memory[begin..slice_end]).as_mut_ptr(), data_len); + + ptr::copy((&self.memory[start+length..self.end]).as_ptr(), (&mut self.memory[slice_end..]).as_mut_ptr(), self.end - (start + length)); + self.end = self.end - (length - data_len); + + // we put more data in the buffer + } else { + ptr::copy((&self.memory[start+length..self.end]).as_ptr(), (&mut self.memory[start+data_len..]).as_mut_ptr(), self.end - (start + length)); + ptr::copy(data.as_ptr(), (&mut self.memory[begin..slice_end]).as_mut_ptr(), data_len); + self.end = self.end + data_len - length; + } + } + Some(self.available_data()) + } + + //FIXME: this should probably be rewritten, and tested extensively + #[doc(hidden)] + pub fn insert_slice(&mut self, data: &[u8], start: usize) -> Option { + let data_len = data.len(); + if start > self.available_data() || + self.position + self.end + data_len > self.capacity { + return None + } + + unsafe { + let begin = self.position + start; + let slice_end = begin + data_len; + ptr::copy((&self.memory[start..self.end]).as_ptr(), (&mut self.memory[start+data_len..]).as_mut_ptr(), self.end - start); + ptr::copy(data.as_ptr(), (&mut self.memory[begin..slice_end]).as_mut_ptr(), data_len); + self.end = self.end + data_len; + } + Some(self.available_data()) + } +} + +impl Write for Buffer { + fn write(&mut self, buf: &[u8]) -> io::Result { + match self.space().write(buf) { + Ok(size) => { self.fill(size); Ok(size) }, + err => err + } + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +impl Read for Buffer { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let len = cmp::min(self.available_data(), buf.len()); + unsafe { + ptr::copy((&self.memory[self.position..self.position+len]).as_ptr(), buf.as_mut_ptr(), len); + self.position += len; + } + Ok(len) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + #[test] + fn fill_and_consume() { + let mut b = Buffer::with_capacity(10); + assert_eq!(b.available_data(), 0); + assert_eq!(b.available_space(), 10); + let res = b.write(&b"abcd"[..]); + assert_eq!(res.ok(), Some(4)); + assert_eq!(b.available_data(), 4); + assert_eq!(b.available_space(), 6); + + assert_eq!(b.data(), &b"abcd"[..]); + + b.consume(2); + assert_eq!(b.available_data(), 2); + assert_eq!(b.available_space(), 6); + assert_eq!(b.data(), &b"cd"[..]); + + b.shift(); + assert_eq!(b.available_data(), 2); + assert_eq!(b.available_space(), 8); + assert_eq!(b.data(), &b"cd"[..]); + + assert_eq!(b.write(&b"efghijklmnop"[..]).ok(), Some(8)); + assert_eq!(b.available_data(), 10); + assert_eq!(b.available_space(), 0); + assert_eq!(b.data(), &b"cdefghijkl"[..]); + b.shift(); + assert_eq!(b.available_data(), 10); + assert_eq!(b.available_space(), 0); + assert_eq!(b.data(), &b"cdefghijkl"[..]); + } + + #[test] + fn delete() { + let mut b = Buffer::with_capacity(10); + let _ = b.write(&b"abcdefgh"[..]); + assert_eq!(b.available_data(), 8); + assert_eq!(b.available_space(), 2); + + assert_eq!(b.delete_slice(2, 3), Some(5)); + assert_eq!(b.available_data(), 5); + assert_eq!(b.available_space(), 5); + assert_eq!(b.data(), &b"abfgh"[..]); + + assert_eq!(b.delete_slice(5, 2), None); + assert_eq!(b.delete_slice(4, 2), None); + } + + #[test] + fn replace() { + let mut b = Buffer::with_capacity(10); + let _ = b.write(&b"abcdefgh"[..]); + assert_eq!(b.available_data(), 8); + assert_eq!(b.available_space(), 2); + + assert_eq!(b.replace_slice(&b"ABC"[..], 2, 3), Some(8)); + assert_eq!(b.available_data(), 8); + assert_eq!(b.available_space(), 2); + assert_eq!(b.data(), &b"abABCfgh"[..]); + + assert_eq!(b.replace_slice(&b"XYZ"[..], 8, 3), None); + assert_eq!(b.replace_slice(&b"XYZ"[..], 6, 3), None); + + assert_eq!(b.replace_slice(&b"XYZ"[..], 2, 4), Some(7)); + assert_eq!(b.available_data(), 7); + assert_eq!(b.available_space(), 3); + assert_eq!(b.data(), &b"abXYZgh"[..]); + + assert_eq!(b.replace_slice(&b"123"[..], 2, 2), Some(8)); + assert_eq!(b.available_data(), 8); + assert_eq!(b.available_space(), 2); + assert_eq!(b.data(), &b"ab123Zgh"[..]); + } + + use std::str; + #[test] + fn set_position() { + let mut output = [0;5]; + let mut b = Buffer::with_capacity(10); + let _ = b.write(&b"abcdefgh"[..]); + let _ = b.read(&mut output); + assert_eq!(b.available_data(), 3); + println!("{:?}", b.position()); + } + + #[test] + fn consume_without_shift() { + let mut b = Buffer::with_capacity(10); + let _ = b.write(&b"abcdefgh"[..]); + b.consume_noshift(6); + assert_eq!(b.position(), 6); + } +} diff --git a/third_party/rust/framehop/.cargo-checksum.json b/third_party/rust/framehop/.cargo-checksum.json new file mode 100644 index 000000000000..7c7cc70ada49 --- /dev/null +++ b/third_party/rust/framehop/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"6b9b3906662b434710edcd87739806a1b4d1312794f969b50e50705025c9d611","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"9ec2734f45b0d65192b9fee2307e05176b805a19efa994a553dcc5b2d3219a1e","Readme.md":"f1c7d3ed5b9ec8dbc9f87d742b213573d0667ca4b4d148cf1e72681115093e67","src/aarch64/arch.rs":"894d1d66ba487363cdf5f2dd66c20ff6560e3906e7c6ecd80e5fc11a682fa5d9","src/aarch64/cache.rs":"88bfc7ee6d38bd0a0fb5532f29fbcec88fd6a222db0e1a3398e0168d7c05d82c","src/aarch64/dwarf.rs":"800bb304e8d746fb1857c2da5486f278242103ea7eac45b1cdc135b1eb3b92f1","src/aarch64/instruction_analysis/epilogue.rs":"fe45d3fbb92dc7224526fb1e613019ebf36a9f2aa3f8fb52217d20744fbda3ae","src/aarch64/instruction_analysis/mod.rs":"6298b1c5b96a5ac8b1ca39a9764b1b71af8ca4a60b6a435e7404e0b02e700f6a","src/aarch64/instruction_analysis/prologue.rs":"065172ee6e2cb868c76dad0d704f0da15397e94424cb0c5522e4bffcae1b0f19","src/aarch64/macho.rs":"ec88fb0c02707d3d96a41f22bb2f698909af26b41ac9cca6b0244e837e240504","src/aarch64/mod.rs":"a94c4c0b1d3e08bce5b0baf9a6ba1b59f42da2809ce970b8a9050b9c3c46e00a","src/aarch64/pe.rs":"6800dfee18cb8eb96d8802c4a175cfca511d9503a7b6c09d0ce7e84c28d8a1a8","src/aarch64/unwind_rule.rs":"1119387590f16f4582672095f6c9462a94e3d4eaf448baa19c432c5e57fa055d","src/aarch64/unwinder.rs":"1dd24b21a49cf1b2fdcb5fada2afb54b2df269d3560be1e1f0063604593f26f1","src/aarch64/unwindregs.rs":"19e5fd82d62eac135c9075e75c0b031f3037a4b670060b3bc6746ef6d71685f8","src/add_signed.rs":"8c52b1d7c7dfc5cbdd477ff9dcce2e888f663a19e8ef6b89c209c06f7a532203","src/arch.rs":"f7dff12cdc2cf91986a5cb3c8d492f608264bd789841a0cfab1c7042233f0488","src/cache.rs":"90569eba164d72c3d20a0465d05a92bc35ceba38c21b944ced3c759f61be3268","src/code_address.rs":"1e2bd03a5813c0100171c7020dc05d8457e2670c7ef58c0c4e3627bf1d82f1b1","src/display_utils.rs":"2f874fd4778e7e304365d8b65d60dc4a5a8fa5ee2715740dc64e334989a1276d","src/dwarf.rs":"79689d0d16a5ccdb5a6c90d690602d1b9bb0100543c2922b47a4c5715004c581","src/error.rs":"bbcaa2ede65b465bff515e19c50f4a8b76c4fcb481297a50427fd21689121294","src/instruction_analysis.rs":"1023577c008a71805338cd45b8582774dd8c69c7bb349990992733297761743e","src/lib.rs":"f57770c147c5de29b4a3600675b459ce26925ad8c5be19ab0c9545883a7a9320","src/macho.rs":"472cd64d0ef4c4d7b91f3d19307875f61db67de52273fef186da9ede89016982","src/pe.rs":"d50f13dd153d124c3b76df7930295e2310381e7490d45563382775422a858bfe","src/rule_cache.rs":"d764fe5e9202314b77e536a7ebe7cb4d574494eeaeb76d2e7a13ff6b0770cf3b","src/unwind_result.rs":"ec6898d9e66b455978880884199d5570fd220c7d3d1101c6b854b9a2b6cea88d","src/unwind_rule.rs":"3335e0d2af34961ba4eff2d89db6bdde5950909f352539e96852c42b3ca16139","src/unwinder.rs":"1ccd6b02770ed54f8837615cd0da02be75e92da9db304e17a14b6cf8f36dd3e0","src/x86_64/arch.rs":"12ea62c70058eac1c2aa698594cc83fafc5d8ec7205596c4b6f6ff325bd1ed8d","src/x86_64/cache.rs":"57eecbc7a0eea21269ba87e80efd985b13d420b2546722ae1b7c73e2e1731169","src/x86_64/dwarf.rs":"6643cc16ac524c325c02ae3a980dd95da38f660328d7b75c1081454b85e24925","src/x86_64/instruction_analysis/epilogue.rs":"21b98f794ec11d501497904b352017d678ea57a2a1f1617a625b1044de1c79c5","src/x86_64/instruction_analysis/mod.rs":"df9089f73861574607dab07fda68b8c5bf1ff426401840a6c35503bda9996143","src/x86_64/instruction_analysis/prologue.rs":"57f2a9376a70ca708c0d9c85bd324edff8062f73102aa57a9c6319627d8189ad","src/x86_64/macho.rs":"1b8eb6622d36115ac664c54d2a8768cbadd17bdcf252e368cf5ea8a35339d5b9","src/x86_64/mod.rs":"160ad03cce68b6263028fa9eaf460a89fee57795a81adac8bed9c7d4fdf0ebad","src/x86_64/pe.rs":"25d850fc896e635831c90c1e4073900b32b02fff88858aa579a986aa4052d54e","src/x86_64/register_ordering.rs":"e4e01b5506eaf1d448874e10930035d4a67b8077803e2ceee305d9c5aa88cd2f","src/x86_64/unwind_rule.rs":"f5be036172ac469cbc8248047411cbd43931a1def52cc9fcacce54210abc9824","src/x86_64/unwinder.rs":"2d7228655cc427266e31f1405f44b9e81bb119b9eb0f4abb9a29b39697db2b44","src/x86_64/unwindregs.rs":"63b358fe31b613d456982360ff659927d540b502f9c1a3145c4ba66beb4afdfc"},"package":"0fd28d2036d4fd99e3629487baca659e5af1c5d554e320168613be79028610fc"} \ No newline at end of file diff --git a/third_party/rust/framehop/Cargo.toml b/third_party/rust/framehop/Cargo.toml new file mode 100644 index 000000000000..0901d8bbe0d7 --- /dev/null +++ b/third_party/rust/framehop/Cargo.toml @@ -0,0 +1,89 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +name = "framehop" +version = "0.12.1" +authors = ["Markus Stange "] +exclude = [ + "/.github", + "/.vscode", + "/tests", + "/fixtures", + "/big-fixtures", +] +description = "Stack frame unwinding support for various formats" +documentation = "https://docs.rs/framehop/" +readme = "Readme.md" +keywords = [ + "unwind", + "stackwalk", + "profiling", + "debug", +] +categories = ["development-tools::debugging"] +license = "MIT/Apache-2.0" +repository = "https://github.com/mstange/framehop/" + +[profile.release] +debug = 2 + +[dependencies.arrayvec] +version = "0.7.4" +default-features = false + +[dependencies.cfg-if] +version = "1.0.0" + +[dependencies.fallible-iterator] +version = "0.3.0" + +[dependencies.gimli] +version = "0.30" +features = ["read"] +default-features = false + +[dependencies.macho-unwind-info] +version = "0.4.0" +optional = true + +[dependencies.object] +version = "0.36" +features = ["read_core"] +optional = true +default-features = false + +[dependencies.pe-unwind-info] +version = "0.2.1" +optional = true + +[dev-dependencies.flate2] +version = "1.0.28" + +[dev-dependencies.itertools] +version = "0.13" + +[dev-dependencies.object] +version = "0.36" + +[features] +default = [ + "std", + "macho", + "pe", +] +macho = ["macho-unwind-info"] +pe = ["pe-unwind-info"] +std = [ + "arrayvec/std", + "gimli/std", +] diff --git a/third_party/rust/framehop/LICENSE-APACHE b/third_party/rust/framehop/LICENSE-APACHE new file mode 100644 index 000000000000..16fe87b06e80 --- /dev/null +++ b/third_party/rust/framehop/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/third_party/rust/framehop/LICENSE-MIT b/third_party/rust/framehop/LICENSE-MIT new file mode 100644 index 000000000000..e9485b4d39f4 --- /dev/null +++ b/third_party/rust/framehop/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2018 Markus Stange + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/framehop/Readme.md b/third_party/rust/framehop/Readme.md new file mode 100644 index 000000000000..32724a98a169 --- /dev/null +++ b/third_party/rust/framehop/Readme.md @@ -0,0 +1,157 @@ +[![crates.io page](https://img.shields.io/crates/v/framehop.svg)](https://crates.io/crates/framehop) +[![docs.rs page](https://docs.rs/framehop/badge.svg)](https://docs.rs/framehop/) + +# framehop + +Framehop is a stack frame unwinder written in 100% Rust. It produces high quality stacks at high speed, on multiple platforms and architectures, without an expensive pre-processing step for unwind information. This makes it suitable for sampling profilers. + +It currently supports unwinding x86_64 and aarch64, with unwind information formats commonly used on Windows, macOS, Linux and Android. + +You give framehop register values, stack memory and unwind data, and framehop produces a list of return addresses. + +Framehop can be used in the following scenarios: + + - Live unwinding of a remote process. This is how [`samply`](https://github.com/mstange/samply/) uses it. + - Offline unwinding from saved registers and stack bytes, even on a different machine, a different OS, or a different CPU architecture. + - Live unwinding inside the same process. This is currently unproven, but should work as long as you can do heap allocation before sampling, in order to allocate a cache and to update the list of modules. The actual unwinding does not require any heap allocation and should work even inside a signal handler, as long as you use `MustNotAllocateDuringUnwind`. + +As a user of framehop, your responsibilities are the following: + + - You need to enumerate the modules (libraries) that are loaded in the sampled process ahead of time, or ideally maintain a live list which is updated whenever modules are loaded / unloaded. + - You need to provide address ranges and unwind section data for those modules. + - When sampling, you provide the register values and a callback to read arbitrary stack memory without segfaulting. + - On aarch64, picking the right bitmask to strip pointer authentication bits from return addresses is up to you. + - You will need to do symbol resolution yourself, if you want function names. Framehop only produces addresses, it does not do any symbolication. + +In turn, framehop solves the following problems: + + - It parses a number of different unwind information formats. At the moment, it supports the following: + - Apple's Compact Unwinding Format, in `__unwind_info` (macOS) + - DWARF CFI in `.eh_frame` (using `.eh_frame_hdr` as an index, if available) + - DWARF CFI in `.debug_frame` + - PE unwind info in `.pdata`, `.rdata` and `.xdata` (for Windows x86_64) + - It supports correct unwinding even when the program is interrupted inside a function prologue or epilogue. On macOS, it has to analyze assembly instructions in order to do this. + - On x86_64 and aarch64, it falls back to frame pointer unwinding if it cannot find unwind information for an address. + - It caches the unwind rule for each address in a fixed-size cache, so that repeated unwinding from the same address is even faster. + - It generates binary search indexes for unwind information formats which don't have them. Specifically, for `.debug_frame` and for `.eh_frame` without `.eh_frame_hdr`. + - It does a reasonable job of detecting the end of the stack, so that you can differentiate between properly terminated stacks and prematurely truncated stacks. + +Framehop is not suitable for debuggers or to implement exception handling. Debuggers usually need to recover all register values for every frame whereas framehop only cares about return addresses. And exception handling needs the ability to call destructors, which is also a non-goal for framehop. + +## Speed + +Framehop is so fast that stack walking is a miniscule part of sampling in both scenarios where I've tried it. + +In [this samply example](https://share.firefox.dev/3s6mQKl) of profiling a single-threaded Rust application, walking the stack takes a quarter of the time it take to query macOS for the thread's register values. In [another samply example](https://share.firefox.dev/3ksWaPt) of profiling a Firefox build without frame pointers, the dwarf unwinding takes 4x as long as the querying of the register values, but is still overall cheaper than the cost of thread_suspend + thread_get_state + thread_resume. + +In [this example of processing a `perf.data` file](https://share.firefox.dev/3vSQOTb), the bottleneck is reading the bytes from disk, rather than stackwalking. [With a warm file cache](https://share.firefox.dev/3Kt6sK1), the cost of stack walking is still comparable to the cost of copying the bytes from the file cache, and most of the stack walking time is spent reading return addresses from the stack bytes. + +Framehop achieves this speed in the following ways: + + 1. It only recovers registers which are needed for computing return addresses. On x86_64 that's `rip`, `rsp` and `rbp`, and on aarch64 that's `lr`, `sp` and `fp`. All other registers are not needed - in theory they could be used as inputs to DWARF CFI expressions, but in practice they are not. + 2. It uses zero-copy parsing wherever possible. For example, the bytes in `__unwind_info` are only accessed during unwinding, and the binary search happens right inside the original `__unwind_info` memory. For DWARF unwinding, framehop uses the excellent [`gimli` crate](https://github.com/gimli-rs/gimli/), which was written with performance in mind. + 3. It uses binary search to find the correct unwind rule in all supported unwind information formats. For formats without an built-in index, it creates an index when the module is added. + 4. It caches unwind rules based on address. In practice, the 509-slot cache achieves a hit rate of around 80% on complicated code like Firefox (with the cache being shared across all Firefox processes). When profiling simpler applications, the hit rate is likely much higher. + +Furthermore, adding a module is fast too because framehop only does minimal up-front parsing and processing - really, the only thing it does is to create the index of FDE offsets for `.eh_frame` / `.debug_frame`. + +## Current State and Roadmap + +Framehop is still a work in progress. Its API is subject to change. The API churn probably won't quieten down at least until we have one or two 32 bit architectures implemented. + +That said, framehop works remarkably well on the supported platforms, and is definitely worth a try if you can stomach the frequent API breakages. Please file issues if you run into any trouble or have suggestions. + +Eventually I'd like to use framehop as a replacement for Lul in the Gecko profiler (Firefox's built-in profiler). For that we'll also want to add x86 support (for 32 bit Windows and Linux) and EHABI / EXIDX support (for 32 bit ARM Android). + +## Example + +```rust +use framehop::aarch64::{CacheAarch64, UnwindRegsAarch64, UnwinderAarch64}; +use framehop::{ExplicitModuleSectionInfo, FrameAddress, Module}; + +let mut cache = CacheAarch64::<_>::new(); +let mut unwinder = UnwinderAarch64::new(); + +let module = Module::new( + "mybinary".to_string(), + 0x1003fc000..0x100634000, + 0x1003fc000, + ExplicitModuleSectionInfo { + base_svma: 0x100000000, + text_svma: Some(0x100000b64..0x1001d2d18), + text: Some(vec![/* __text */]), + stubs_svma: Some(0x1001d2d18..0x1001d309c), + stub_helper_svma: Some(0x1001d309c..0x1001d3438), + got_svma: Some(0x100238000..0x100238010), + unwind_info: Some(vec![/* __unwind_info */]), + eh_frame_svma: Some(0x100237f80..0x100237ffc), + eh_frame: Some(vec![/* __eh_frame */]), + text_segment_svma: Some(0x1003fc000..0x100634000), + text_segment: Some(vec![/* __TEXT */]), + ..Default::default() + }, +); +unwinder.add_module(module); + +let pc = 0x1003fc000 + 0x1292c0; +let lr = 0x1003fc000 + 0xe4830; +let sp = 0x10; +let fp = 0x20; +let stack = [ + 1, 2, 3, 4, 0x40, 0x1003fc000 + 0x100dc4, + 5, 6, 0x70, 0x1003fc000 + 0x12ca28, + 7, 8, 9, 10, 0x0, 0x0, +]; +let mut read_stack = |addr| stack.get((addr / 8) as usize).cloned().ok_or(()); + +use framehop::Unwinder; +let mut iter = unwinder.iter_frames( + pc, + UnwindRegsAarch64::new(lr, sp, fp), + &mut cache, + &mut read_stack, +); + +let mut frames = Vec::new(); +while let Ok(Some(frame)) = iter.next() { + frames.push(frame); +} + +assert_eq!( + frames, + vec![ + FrameAddress::from_instruction_pointer(0x1003fc000 + 0x1292c0), + FrameAddress::from_return_address(0x1003fc000 + 0x100dc4).unwrap(), + FrameAddress::from_return_address(0x1003fc000 + 0x12ca28).unwrap() + ] +); +``` + +## Recommended Reading and Tools + +Here's a list of articles I found useful during development: + + - [Reliable and Fast DWARF-Based Stack Unwinding](https://hal.inria.fr/hal-02297690/document), also available [as a presentation](https://deepspec.org/events/dsw18/zappa-nardelli-deepspec18.pdf). This is **the** unwinding reference document. If want to read just one thing, read this. This article explains the background super clearly, and is very approachable. It shows how assembly and unwind information correspond to each other and has lots of examples that are easy to understand. + - [How fast can CFI/EXIDX-based stack unwinding be?](https://blog.mozilla.org/jseward/2013/08/29/how-fast-can-cfiexidx-based-stack-unwinding-be/), by Julian Seward + - [Unwinding a Stack by Hand with Frame Pointers and ORC](https://blogs.oracle.com/linux/post/unwinding-stack-frame-pointers-and-orc), by Stephen Brennan + - [Aarch64 DWARF register names](https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst#dwarf-register-names) + +I used these tools very frequently: + + - [Hopper Disassembler](https://www.hopperapp.com/), to look at assembly code. + - `llvm-dwarfdump --eh-frame mylib.so` to display DWARF unwind information. + - `llvm-objdump --section-headers mylib.so` to display section information. + - `unwindinfodump mylib.dylib` to display compact unwind information. (Install using `cargo install --examples macho-unwind-info`, see [macho-unwind-info](https://github.com/mstange/macho-unwind-info/blob/main/examples/unwindinfodump.rs).) + +## License + +Licensed under either of + + * Apache License, Version 2.0 ([`LICENSE-APACHE`](./LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([`LICENSE-MIT`](./LICENSE-MIT) or http://opensource.org/licenses/MIT) + +at your option. + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. diff --git a/third_party/rust/framehop/src/aarch64/arch.rs b/third_party/rust/framehop/src/aarch64/arch.rs new file mode 100644 index 000000000000..910d85851d69 --- /dev/null +++ b/third_party/rust/framehop/src/aarch64/arch.rs @@ -0,0 +1,10 @@ +use super::unwind_rule::UnwindRuleAarch64; +use super::unwindregs::UnwindRegsAarch64; +use crate::arch::Arch; + +/// The Aarch64 CPU architecture. +pub struct ArchAarch64; +impl Arch for ArchAarch64 { + type UnwindRule = UnwindRuleAarch64; + type UnwindRegs = UnwindRegsAarch64; +} diff --git a/third_party/rust/framehop/src/aarch64/cache.rs b/third_party/rust/framehop/src/aarch64/cache.rs new file mode 100644 index 000000000000..4b207eacbcc3 --- /dev/null +++ b/third_party/rust/framehop/src/aarch64/cache.rs @@ -0,0 +1,32 @@ +use super::unwind_rule::*; +use crate::cache::*; + +/// The unwinder cache type for [`UnwinderAarch64`](super::UnwinderAarch64). +pub struct CacheAarch64( + pub Cache, +); + +impl CacheAarch64 { + /// Create a new cache. + pub fn new() -> Self { + Self(Cache::new()) + } +} + +impl CacheAarch64

{ + /// Create a new cache. + pub fn new_in() -> Self { + Self(Cache::new()) + } + + /// Returns a snapshot of the cache usage statistics. + pub fn stats(&self) -> CacheStats { + self.0.rule_cache.stats() + } +} + +impl Default for CacheAarch64

{ + fn default() -> Self { + Self::new_in() + } +} diff --git a/third_party/rust/framehop/src/aarch64/dwarf.rs b/third_party/rust/framehop/src/aarch64/dwarf.rs new file mode 100644 index 000000000000..c3ea10aa0fb3 --- /dev/null +++ b/third_party/rust/framehop/src/aarch64/dwarf.rs @@ -0,0 +1,195 @@ +use gimli::{ + AArch64, CfaRule, Encoding, EvaluationStorage, Reader, ReaderOffset, Register, RegisterRule, + UnwindContextStorage, UnwindSection, UnwindTableRow, +}; + +use super::{arch::ArchAarch64, unwind_rule::UnwindRuleAarch64, unwindregs::UnwindRegsAarch64}; + +use crate::unwind_result::UnwindResult; + +use crate::dwarf::{ + eval_cfa_rule, eval_register_rule, ConversionError, DwarfUnwindRegs, DwarfUnwinderError, + DwarfUnwinding, +}; + +impl DwarfUnwindRegs for UnwindRegsAarch64 { + fn get(&self, register: Register) -> Option { + match register { + AArch64::SP => Some(self.sp()), + AArch64::X29 => Some(self.fp()), + AArch64::X30 => Some(self.lr()), + _ => None, + } + } +} + +impl DwarfUnwinding for ArchAarch64 { + fn unwind_frame( + section: &impl UnwindSection, + unwind_info: &UnwindTableRow, + encoding: Encoding, + regs: &mut Self::UnwindRegs, + is_first_frame: bool, + read_stack: &mut F, + ) -> Result, DwarfUnwinderError> + where + F: FnMut(u64) -> Result, + R: Reader, + UCS: UnwindContextStorage, + ES: EvaluationStorage, + { + let cfa_rule = unwind_info.cfa(); + let fp_rule = unwind_info.register(AArch64::X29); + let lr_rule = unwind_info.register(AArch64::X30); + + match translate_into_unwind_rule(cfa_rule, &fp_rule, &lr_rule) { + Ok(unwind_rule) => return Ok(UnwindResult::ExecRule(unwind_rule)), + Err(_err) => { + // Could not translate into a cacheable unwind rule. Fall back to the generic path. + // eprintln!("Unwind rule translation failed: {:?}", err); + } + } + + let cfa = eval_cfa_rule::(section, cfa_rule, encoding, regs) + .ok_or(DwarfUnwinderError::CouldNotRecoverCfa)?; + + let lr = regs.lr(); + let fp = regs.fp(); + let sp = regs.sp(); + + let (fp, lr) = if !is_first_frame { + if cfa <= sp { + return Err(DwarfUnwinderError::StackPointerMovedBackwards); + } + let fp = eval_register_rule::( + section, fp_rule, cfa, encoding, fp, regs, read_stack, + ) + .ok_or(DwarfUnwinderError::CouldNotRecoverFramePointer)?; + let lr = eval_register_rule::( + section, lr_rule, cfa, encoding, lr, regs, read_stack, + ) + .ok_or(DwarfUnwinderError::CouldNotRecoverReturnAddress)?; + (fp, lr) + } else { + // For the first frame, be more lenient when encountering errors. + // TODO: Find evidence of what this gives us. I think on macOS the prologue often has Unknown register rules + // and we only encounter prologues for the first frame. + let fp = eval_register_rule::( + section, fp_rule, cfa, encoding, fp, regs, read_stack, + ) + .unwrap_or(fp); + let lr = eval_register_rule::( + section, lr_rule, cfa, encoding, lr, regs, read_stack, + ) + .unwrap_or(lr); + (fp, lr) + }; + + regs.set_fp(fp); + regs.set_sp(cfa); + regs.set_lr(lr); + + Ok(UnwindResult::Uncacheable(lr)) + } + + fn rule_if_uncovered_by_fde() -> Self::UnwindRule { + UnwindRuleAarch64::NoOpIfFirstFrameOtherwiseFp + } +} + +fn register_rule_to_cfa_offset( + rule: &RegisterRule, +) -> Result, ConversionError> { + match *rule { + RegisterRule::Undefined | RegisterRule::SameValue => Ok(None), + RegisterRule::Offset(offset) => Ok(Some(offset)), + _ => Err(ConversionError::RegisterNotStoredRelativeToCfa), + } +} + +fn translate_into_unwind_rule( + cfa_rule: &CfaRule, + fp_rule: &RegisterRule, + lr_rule: &RegisterRule, +) -> Result { + match cfa_rule { + CfaRule::RegisterAndOffset { register, offset } => match *register { + AArch64::SP => { + let sp_offset_by_16 = + u16::try_from(offset / 16).map_err(|_| ConversionError::SpOffsetDoesNotFit)?; + let lr_cfa_offset = register_rule_to_cfa_offset(lr_rule)?; + let fp_cfa_offset = register_rule_to_cfa_offset(fp_rule)?; + match (lr_cfa_offset, fp_cfa_offset) { + (None, Some(_)) => Err(ConversionError::RestoringFpButNotLr), + (None, None) => { + if let RegisterRule::Undefined = lr_rule { + // If the return address is undefined, this could have two reasons: + // - The column for the return address may have been manually set to "undefined" + // using DW_CFA_undefined. This usually means that the function never returns + // and can be treated as the root of the stack. + // - The column for the return may have been omitted from the DWARF CFI table. + // Per spec (at least as of DWARF >= 3), this means that it should be treated + // as undefined. But it seems that compilers often do this when they really mean + // "same value". + // Gimli follows DWARF 3 and does not differentiate between "omitted" and "undefined". + Ok( + UnwindRuleAarch64::OffsetSpIfFirstFrameOtherwiseStackEndsHere { + sp_offset_by_16, + }, + ) + } else { + Ok(UnwindRuleAarch64::OffsetSp { sp_offset_by_16 }) + } + } + (Some(lr_cfa_offset), None) => { + let lr_storage_offset_from_sp_by_8 = + i16::try_from((offset + lr_cfa_offset) / 8) + .map_err(|_| ConversionError::LrStorageOffsetDoesNotFit)?; + Ok(UnwindRuleAarch64::OffsetSpAndRestoreLr { + sp_offset_by_16, + lr_storage_offset_from_sp_by_8, + }) + } + (Some(lr_cfa_offset), Some(fp_cfa_offset)) => { + let lr_storage_offset_from_sp_by_8 = + i16::try_from((offset + lr_cfa_offset) / 8) + .map_err(|_| ConversionError::LrStorageOffsetDoesNotFit)?; + let fp_storage_offset_from_sp_by_8 = + i16::try_from((offset + fp_cfa_offset) / 8) + .map_err(|_| ConversionError::FpStorageOffsetDoesNotFit)?; + Ok(UnwindRuleAarch64::OffsetSpAndRestoreFpAndLr { + sp_offset_by_16, + fp_storage_offset_from_sp_by_8, + lr_storage_offset_from_sp_by_8, + }) + } + } + } + AArch64::X29 => { + let lr_cfa_offset = register_rule_to_cfa_offset(lr_rule)? + .ok_or(ConversionError::FramePointerRuleDoesNotRestoreLr)?; + let fp_cfa_offset = register_rule_to_cfa_offset(fp_rule)? + .ok_or(ConversionError::FramePointerRuleDoesNotRestoreFp)?; + if *offset == 16 && fp_cfa_offset == -16 && lr_cfa_offset == -8 { + Ok(UnwindRuleAarch64::UseFramePointer) + } else { + let sp_offset_from_fp_by_8 = u16::try_from(offset / 8) + .map_err(|_| ConversionError::SpOffsetFromFpDoesNotFit)?; + let lr_storage_offset_from_fp_by_8 = + i16::try_from((offset + lr_cfa_offset) / 8) + .map_err(|_| ConversionError::LrStorageOffsetDoesNotFit)?; + let fp_storage_offset_from_fp_by_8 = + i16::try_from((offset + fp_cfa_offset) / 8) + .map_err(|_| ConversionError::FpStorageOffsetDoesNotFit)?; + Ok(UnwindRuleAarch64::UseFramepointerWithOffsets { + sp_offset_from_fp_by_8, + fp_storage_offset_from_fp_by_8, + lr_storage_offset_from_fp_by_8, + }) + } + } + _ => Err(ConversionError::CfaIsOffsetFromUnknownRegister), + }, + CfaRule::Expression(_) => Err(ConversionError::CfaIsExpression), + } +} diff --git a/third_party/rust/framehop/src/aarch64/instruction_analysis/epilogue.rs b/third_party/rust/framehop/src/aarch64/instruction_analysis/epilogue.rs new file mode 100644 index 000000000000..b9a82f27f4fc --- /dev/null +++ b/third_party/rust/framehop/src/aarch64/instruction_analysis/epilogue.rs @@ -0,0 +1,702 @@ +use super::super::unwind_rule::UnwindRuleAarch64; + +struct EpilogueDetectorAarch64 { + sp_offset: i32, + fp_offset_from_initial_sp: Option, + lr_offset_from_initial_sp: Option, +} + +enum EpilogueStepResult { + NeedMore, + FoundBodyInstruction(UnexpectedInstructionType), + FoundReturn, + FoundTailCall, + CouldBeAuthTailCall, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +enum EpilogueResult { + ProbablyStillInBody(UnexpectedInstructionType), + ReachedFunctionEndWithoutReturn, + FoundReturnOrTailCall { + sp_offset: i32, + fp_offset_from_initial_sp: Option, + lr_offset_from_initial_sp: Option, + }, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +enum UnexpectedInstructionType { + LoadOfWrongSize, + LoadReferenceRegisterNotSp, + AddSubNotOperatingOnSp, + AutibspNotFollowedByExpectedTailCall, + BranchWithUnadjustedStackPointer, + Unknown, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +enum EpilogueInstructionType { + NotExpectedInEpilogue, + CouldBeTailCall { + /// If auth tail call, the offset in bytes where the autibsp would be. + /// If regular tail call, we just check if the previous instruction + /// adjusts the stack pointer. + offset_of_expected_autibsp: u8, + }, + CouldBePartOfAuthTailCall { + /// In bytes + offset_of_expected_autibsp: u8, + }, + VeryLikelyPartOfEpilogue, +} + +impl EpilogueDetectorAarch64 { + pub fn new() -> Self { + Self { + sp_offset: 0, + fp_offset_from_initial_sp: None, + lr_offset_from_initial_sp: None, + } + } + + pub fn analyze_slice(&mut self, function_bytes: &[u8], pc_offset: usize) -> EpilogueResult { + let mut bytes = &function_bytes[pc_offset..]; + if bytes.len() < 4 { + return EpilogueResult::ReachedFunctionEndWithoutReturn; + } + let mut word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); + bytes = &bytes[4..]; + match Self::analyze_instruction(word) { + EpilogueInstructionType::NotExpectedInEpilogue => { + return EpilogueResult::ProbablyStillInBody(UnexpectedInstructionType::Unknown) + } + EpilogueInstructionType::CouldBeTailCall { + offset_of_expected_autibsp, + } => { + if pc_offset >= offset_of_expected_autibsp as usize { + let auth_tail_call_bytes = + &function_bytes[pc_offset - offset_of_expected_autibsp as usize..]; + if auth_tail_call_bytes[0..4] == [0xff, 0x23, 0x03, 0xd5] + && Self::is_auth_tail_call(&auth_tail_call_bytes[4..]) + { + return EpilogueResult::FoundReturnOrTailCall { + sp_offset: 0, + fp_offset_from_initial_sp: None, + lr_offset_from_initial_sp: None, + }; + } + } + if pc_offset >= 4 { + let prev_b = &function_bytes[pc_offset - 4..pc_offset]; + let prev_word = + u32::from_le_bytes([prev_b[0], prev_b[1], prev_b[2], prev_b[3]]); + if Self::instruction_adjusts_stack_pointer(prev_word) { + return EpilogueResult::FoundReturnOrTailCall { + sp_offset: 0, + fp_offset_from_initial_sp: None, + lr_offset_from_initial_sp: None, + }; + } + } + return EpilogueResult::ProbablyStillInBody(UnexpectedInstructionType::Unknown); + } + EpilogueInstructionType::CouldBePartOfAuthTailCall { + offset_of_expected_autibsp, + } => { + if pc_offset >= offset_of_expected_autibsp as usize { + let auth_tail_call_bytes = + &function_bytes[pc_offset - offset_of_expected_autibsp as usize..]; + if auth_tail_call_bytes[0..4] == [0xff, 0x23, 0x03, 0xd5] + && Self::is_auth_tail_call(&auth_tail_call_bytes[4..]) + { + return EpilogueResult::FoundReturnOrTailCall { + sp_offset: 0, + fp_offset_from_initial_sp: None, + lr_offset_from_initial_sp: None, + }; + } + } + return EpilogueResult::ProbablyStillInBody(UnexpectedInstructionType::Unknown); + } + EpilogueInstructionType::VeryLikelyPartOfEpilogue => {} + } + + loop { + match self.step_instruction(word) { + EpilogueStepResult::NeedMore => { + if bytes.len() < 4 { + return EpilogueResult::ReachedFunctionEndWithoutReturn; + } + word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); + bytes = &bytes[4..]; + continue; + } + EpilogueStepResult::FoundBodyInstruction(uit) => { + return EpilogueResult::ProbablyStillInBody(uit); + } + EpilogueStepResult::FoundReturn | EpilogueStepResult::FoundTailCall => {} + EpilogueStepResult::CouldBeAuthTailCall => { + if !Self::is_auth_tail_call(bytes) { + return EpilogueResult::ProbablyStillInBody( + UnexpectedInstructionType::AutibspNotFollowedByExpectedTailCall, + ); + } + } + } + return EpilogueResult::FoundReturnOrTailCall { + sp_offset: self.sp_offset, + fp_offset_from_initial_sp: self.fp_offset_from_initial_sp, + lr_offset_from_initial_sp: self.lr_offset_from_initial_sp, + }; + } + } + + fn instruction_adjusts_stack_pointer(word: u32) -> bool { + // Detect load from sp-relative offset with writeback. + if (word >> 22) & 0b1011111011 == 0b1010100011 && (word >> 5) & 0b11111 == 31 { + return true; + } + // Detect sub sp, sp, 0xXXXX + if (word >> 23) & 0b111111111 == 0b100100010 + && word & 0b11111 == 31 + && (word >> 5) & 0b11111 == 31 + { + return true; + } + false + } + + fn is_auth_tail_call(bytes_after_autibsp: &[u8]) -> bool { + // libsystem_malloc.dylib contains over a hundred of these. + // At the end of the function, after restoring the registers from the stack, + // there's an autibsp instruction, followed by some check (not sure what it + // does), and then a tail call. These instructions should all be counted as + // part of the epilogue; returning at this point is just "follow lr" instead + // of "use the frame pointer". + // + // 180139058 ff 23 03 d5 autibsp + // + // 18013905c d0 07 1e ca eor x16, lr, lr, lsl #1 + // 180139060 50 00 f0 b6 tbz x16, 0x3e, $+0x8 + // 180139064 20 8e 38 d4 brk #0xc471 ; "breakpoint trap" + // + // and then a tail call, of one of these forms: + // + // 180139068 13 00 00 14 b some_outside_function + // + // 18013a364 f0 36 88 d2 mov x16, #0xXXXX + // 18013a368 70 08 1f d7 braa xX, x16 + // + + if bytes_after_autibsp.len() < 16 { + return false; + } + let eor_tbz_brk = &bytes_after_autibsp[..12]; + if eor_tbz_brk + != [ + 0xd0, 0x07, 0x1e, 0xca, 0x50, 0x00, 0xf0, 0xb6, 0x20, 0x8e, 0x38, 0xd4, + ] + { + return false; + } + + let first_tail_call_instruction_opcode = u32::from_le_bytes([ + bytes_after_autibsp[12], + bytes_after_autibsp[13], + bytes_after_autibsp[14], + bytes_after_autibsp[15], + ]); + let bits_26_to_32 = first_tail_call_instruction_opcode >> 26; + if bits_26_to_32 == 0b000101 { + // This is a `b` instruction. We've found the tail call. + return true; + } + + // If we get here, it's either not a recognized instruction sequence, + // or the tail call is of the form `mov x16, #0xXXXX`, `braa xX, x16`. + if bytes_after_autibsp.len() < 20 { + return false; + } + + let bits_23_to_32 = first_tail_call_instruction_opcode >> 23; + let is_64_mov = (bits_23_to_32 & 0b111000111) == 0b110000101; + let result_reg = first_tail_call_instruction_opcode & 0b11111; + if !is_64_mov || result_reg != 16 { + return false; + } + + let braa_opcode = u32::from_le_bytes([ + bytes_after_autibsp[16], + bytes_after_autibsp[17], + bytes_after_autibsp[18], + bytes_after_autibsp[19], + ]); + (braa_opcode & 0xff_ff_fc_00) == 0xd7_1f_08_00 && (braa_opcode & 0b11111) == 16 + } + + pub fn analyze_instruction(word: u32) -> EpilogueInstructionType { + // Detect ret and retab + if word == 0xd65f03c0 || word == 0xd65f0fff { + return EpilogueInstructionType::VeryLikelyPartOfEpilogue; + } + // Detect autibsp + if word == 0xd50323ff { + return EpilogueInstructionType::CouldBePartOfAuthTailCall { + offset_of_expected_autibsp: 0, + }; + } + // Detect `eor x16, lr, lr, lsl #1` + if word == 0xca1e07d0 { + return EpilogueInstructionType::CouldBePartOfAuthTailCall { + offset_of_expected_autibsp: 4, + }; + } + // Detect `tbz x16, 0x3e, $+0x8` + if word == 0xb6f00050 { + return EpilogueInstructionType::CouldBePartOfAuthTailCall { + offset_of_expected_autibsp: 8, + }; + } + // Detect `brk #0xc471` + if word == 0xd4388e20 { + return EpilogueInstructionType::CouldBePartOfAuthTailCall { + offset_of_expected_autibsp: 12, + }; + } + // Detect `b` and `br xX` + if (word >> 26) == 0b000101 || word & 0xff_ff_fc_1f == 0xd6_1f_00_00 { + // This could be a branch with a target inside this function, or + // a tail call outside of this function. + return EpilogueInstructionType::CouldBeTailCall { + offset_of_expected_autibsp: 16, + }; + } + // Detect `mov x16, #0xXXXX` + if (word >> 23) & 0b111000111 == 0b110000101 && word & 0b11111 == 16 { + return EpilogueInstructionType::CouldBePartOfAuthTailCall { + offset_of_expected_autibsp: 16, + }; + } + // Detect `braa xX, x16` + if word & 0xff_ff_fc_00 == 0xd7_1f_08_00 && word & 0b11111 == 16 { + return EpilogueInstructionType::CouldBePartOfAuthTailCall { + offset_of_expected_autibsp: 20, + }; + } + if (word >> 22) & 0b1011111001 == 0b1010100001 { + // Section C3.3, Loads and stores. + // but only loads that are commonly seen in prologues / epilogues (bits 29 and 31 are set) + let writeback_bits = (word >> 23) & 0b11; + if writeback_bits == 0b00 { + // Not 64-bit load. + return EpilogueInstructionType::NotExpectedInEpilogue; + } + let reference_reg = ((word >> 5) & 0b11111) as u16; + if reference_reg != 31 { + return EpilogueInstructionType::NotExpectedInEpilogue; + } + return EpilogueInstructionType::VeryLikelyPartOfEpilogue; + } + if (word >> 23) & 0b111111111 == 0b100100010 { + // Section C3.4, Data processing - immediate + // unsigned add imm, size class X (8 bytes) + let result_reg = (word & 0b11111) as u16; + let input_reg = ((word >> 5) & 0b11111) as u16; + if result_reg != 31 || input_reg != 31 { + return EpilogueInstructionType::NotExpectedInEpilogue; + } + return EpilogueInstructionType::VeryLikelyPartOfEpilogue; + } + EpilogueInstructionType::NotExpectedInEpilogue + } + + pub fn step_instruction(&mut self, word: u32) -> EpilogueStepResult { + // Detect ret and retab + if word == 0xd65f03c0 || word == 0xd65f0fff { + return EpilogueStepResult::FoundReturn; + } + // Detect autibsp + if word == 0xd50323ff { + return EpilogueStepResult::CouldBeAuthTailCall; + } + // Detect b + if (word >> 26) == 0b000101 { + // This could be a branch with a target inside this function, or + // a tail call outside of this function. + // Let's use the following heuristic: If this instruction is followed + // by valid epilogue instructions which adjusted the stack pointer, then + // we treat it as a tail call. + if self.sp_offset != 0 { + return EpilogueStepResult::FoundTailCall; + } + return EpilogueStepResult::FoundBodyInstruction( + UnexpectedInstructionType::BranchWithUnadjustedStackPointer, + ); + } + if (word >> 22) & 0b1011111001 == 0b1010100001 { + // Section C3.3, Loads and stores. + // but only those that are commonly seen in prologues / epilogues (bits 29 and 31 are set) + let writeback_bits = (word >> 23) & 0b11; + if writeback_bits == 0b00 { + // Not 64-bit load/store. + return EpilogueStepResult::FoundBodyInstruction( + UnexpectedInstructionType::LoadOfWrongSize, + ); + } + let reference_reg = ((word >> 5) & 0b11111) as u16; + if reference_reg != 31 { + return EpilogueStepResult::FoundBodyInstruction( + UnexpectedInstructionType::LoadReferenceRegisterNotSp, + ); + } + let is_preindexed_writeback = writeback_bits == 0b11; // TODO: are there preindexed loads? What do they mean? + let is_postindexed_writeback = writeback_bits == 0b01; + let imm7 = (((((word >> 15) & 0b1111111) as i16) << 9) >> 6) as i32; + let reg_loc = if is_postindexed_writeback { + self.sp_offset + } else { + self.sp_offset + imm7 + }; + let pair_reg_1 = (word & 0b11111) as u16; + if pair_reg_1 == 29 { + self.fp_offset_from_initial_sp = Some(reg_loc); + } else if pair_reg_1 == 30 { + self.lr_offset_from_initial_sp = Some(reg_loc); + } + let pair_reg_2 = ((word >> 10) & 0b11111) as u16; + if pair_reg_2 == 29 { + self.fp_offset_from_initial_sp = Some(reg_loc + 8); + } else if pair_reg_2 == 30 { + self.lr_offset_from_initial_sp = Some(reg_loc + 8); + } + if is_preindexed_writeback || is_postindexed_writeback { + self.sp_offset += imm7; + } + return EpilogueStepResult::NeedMore; + } + if (word >> 23) & 0b111111111 == 0b100100010 { + // Section C3.4, Data processing - immediate + // unsigned add imm, size class X (8 bytes) + let result_reg = (word & 0b11111) as u16; + let input_reg = ((word >> 5) & 0b11111) as u16; + if result_reg != 31 || input_reg != 31 { + return EpilogueStepResult::FoundBodyInstruction( + UnexpectedInstructionType::AddSubNotOperatingOnSp, + ); + } + let mut imm12 = ((word >> 10) & 0b111111111111) as i32; + let shift_immediate_by_12 = ((word >> 22) & 0b1) == 0b1; + if shift_immediate_by_12 { + imm12 <<= 12 + } + self.sp_offset += imm12; + return EpilogueStepResult::NeedMore; + } + EpilogueStepResult::FoundBodyInstruction(UnexpectedInstructionType::Unknown) + } +} + +pub fn unwind_rule_from_detected_epilogue( + bytes: &[u8], + pc_offset: usize, +) -> Option { + let mut detector = EpilogueDetectorAarch64::new(); + match detector.analyze_slice(bytes, pc_offset) { + EpilogueResult::ProbablyStillInBody(_) + | EpilogueResult::ReachedFunctionEndWithoutReturn => None, + EpilogueResult::FoundReturnOrTailCall { + sp_offset, + fp_offset_from_initial_sp, + lr_offset_from_initial_sp, + } => { + let sp_offset_by_16 = u16::try_from(sp_offset / 16).ok()?; + let rule = match (fp_offset_from_initial_sp, lr_offset_from_initial_sp) { + (None, None) if sp_offset_by_16 == 0 => UnwindRuleAarch64::NoOp, + (None, None) => UnwindRuleAarch64::OffsetSp { sp_offset_by_16 }, + (None, Some(lr_offset)) => UnwindRuleAarch64::OffsetSpAndRestoreLr { + sp_offset_by_16, + lr_storage_offset_from_sp_by_8: i16::try_from(lr_offset / 8).ok()?, + }, + (Some(_), None) => return None, + (Some(fp_offset), Some(lr_offset)) => { + UnwindRuleAarch64::OffsetSpAndRestoreFpAndLr { + sp_offset_by_16, + fp_storage_offset_from_sp_by_8: i16::try_from(fp_offset / 8).ok()?, + lr_storage_offset_from_sp_by_8: i16::try_from(lr_offset / 8).ok()?, + } + } + }; + Some(rule) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_epilogue_1() { + // 1000e0d18 fd 7b 44 a9 ldp fp, lr, [sp, #0x40] + // 1000e0d1c f4 4f 43 a9 ldp x20, x19, [sp, #0x30] + // 1000e0d20 f6 57 42 a9 ldp x22, x21, [sp, #0x20] + // 1000e0d24 ff 43 01 91 add sp, sp, #0x50 + // 1000e0d28 c0 03 5f d6 ret + + let bytes = &[ + 0xfd, 0x7b, 0x44, 0xa9, 0xf4, 0x4f, 0x43, 0xa9, 0xf6, 0x57, 0x42, 0xa9, 0xff, 0x43, + 0x01, 0x91, 0xc0, 0x03, 0x5f, 0xd6, + ]; + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 0), + Some(UnwindRuleAarch64::OffsetSpAndRestoreFpAndLr { + sp_offset_by_16: 5, + fp_storage_offset_from_sp_by_8: 8, + lr_storage_offset_from_sp_by_8: 9, + }) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 4), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 5 }) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 8), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 5 }) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 12), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 5 }) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 16), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!(unwind_rule_from_detected_epilogue(bytes, 20), None); + } + + #[test] + fn test_epilogue_with_retab() { + // _malloc_zone_realloc epilogue + // 18012466c e0 03 16 aa mov x0,x22 + // 180124670 fd 7b 43 a9 ldp x29=>local_10,x30,[sp, #0x30] + // 180124674 f4 4f 42 a9 ldp x20,x19,[sp, #local_20] + // 180124678 f6 57 41 a9 ldp x22,x21,[sp, #local_30] + // 18012467c f8 5f c4 a8 ldp x24,x23,[sp], #0x40 + // 180124680 ff 0f 5f d6 retab + // 180124684 a0 01 80 52 mov w0,#0xd + // 180124688 20 60 a6 72 movk w0,#0x3301, LSL #16 + + let bytes = &[ + 0xe0, 0x03, 0x16, 0xaa, 0xfd, 0x7b, 0x43, 0xa9, 0xf4, 0x4f, 0x42, 0xa9, 0xf6, 0x57, + 0x41, 0xa9, 0xf8, 0x5f, 0xc4, 0xa8, 0xff, 0x0f, 0x5f, 0xd6, 0xa0, 0x01, 0x80, 0x52, + 0x20, 0x60, 0xa6, 0x72, + ]; + assert_eq!(unwind_rule_from_detected_epilogue(bytes, 0), None); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 4), + Some(UnwindRuleAarch64::OffsetSpAndRestoreFpAndLr { + sp_offset_by_16: 4, + fp_storage_offset_from_sp_by_8: 6, + lr_storage_offset_from_sp_by_8: 7 + }) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 8), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 4 }) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 12), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 4 }) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 16), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 4 }) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 20), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!(unwind_rule_from_detected_epilogue(bytes, 24), None); + } + + #[test] + fn test_epilogue_with_retab_2() { + // _tiny_free_list_add_ptr: + // ... + // 18013e114 28 01 00 79 strh w8, [x9] + // 18013e118 fd 7b c1 a8 ldp fp, lr, [sp], #0x10 + // 18013e11c ff 0f 5f d6 retab + // 18013e120 e2 03 08 aa mov x2, x8 + // 18013e124 38 76 00 94 bl _free_list_checksum_botch + // ... + + let bytes = &[ + 0x28, 0x01, 0x00, 0x79, 0xfd, 0x7b, 0xc1, 0xa8, 0xff, 0x0f, 0x5f, 0xd6, 0xe2, 0x03, + 0x08, 0xaa, 0x38, 0x76, 0x00, 0x94, + ]; + assert_eq!(unwind_rule_from_detected_epilogue(bytes, 0), None); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 4), + Some(UnwindRuleAarch64::OffsetSpAndRestoreFpAndLr { + sp_offset_by_16: 1, + fp_storage_offset_from_sp_by_8: 0, + lr_storage_offset_from_sp_by_8: 1 + }) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 8), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!(unwind_rule_from_detected_epilogue(bytes, 12), None); + assert_eq!(unwind_rule_from_detected_epilogue(bytes, 16), None); + } + + #[test] + fn test_epilogue_with_regular_tail_call() { + // (in rustup) __ZN126_$LT$$LT$toml..value..Value$u20$as$u20$serde..de..Deserialize$GT$..deserialize..ValueVisitor$u20$as$u20$serde..de..Visitor$GT$9visit_map17h0afd4b269ef00eebE + // ... + // 1002566b4 fc 6f c6 a8 ldp x28, x27, [sp], #0x60 + // 1002566b8 bc ba ff 17 b __ZN4core3ptr41drop_in_place$LT$toml..de..MapVisitor$GT$17hd4556de1a4edab42E + // ... + let bytes = &[0xfc, 0x6f, 0xc6, 0xa8, 0xbc, 0xba, 0xff, 0x17]; + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 0), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 6 }) + ); + } + + // This test fails at the moment. + #[test] + fn test_epilogue_with_register_tail_call() { + // This test requires lookbehind in the epilogue detection. + // We want to detect the `br` as a tail call. We should do this + // based on the fact that the previous instruction adjusted the + // stack pointer. + // + // (in rustup) __ZN4core3fmt9Formatter3pad17h3f40041e7f99f180E + // ... + // 1000500bc fa 67 c5 a8 ldp x26, x25, [sp], #0x50 + // 1000500c0 60 00 1f d6 br x3 + // ... + let bytes = &[0xfa, 0x67, 0xc5, 0xa8, 0x60, 0x00, 0x1f, 0xd6]; + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 4), + Some(UnwindRuleAarch64::NoOp) + ); + } + + #[test] + fn test_epilogue_with_auth_tail_call() { + // _nanov2_free_definite_size + // ... + // 180139048 e1 03 13 aa mov x1, x19 + // 18013904c fd 7b 42 a9 ldp fp, lr, [sp, #0x20] + // 180139050 f4 4f 41 a9 ldp x20, x19, [sp, #0x10] + // 180139054 f6 57 c3 a8 ldp x22, x21, [sp], #0x30 + // 180139058 ff 23 03 d5 autibsp + // 18013905c d0 07 1e ca eor x16, lr, lr, lsl #1 + // 180139060 50 00 f0 b6 tbz x16, 0x3e, loc_180139068 + // 180139064 20 8e 38 d4 brk #0xc471 + // loc_180139068: + // 180139068 13 00 00 14 b _nanov2_free_to_block + // loc_18013906c: + // 18013906c a0 16 78 f9 ldr x0, [x21, #0x7028] + // 180139070 03 3c 40 f9 ldr x3, [x0, #0x78] + // ... + let bytes = &[ + 0xe1, 0x03, 0x13, 0xaa, 0xfd, 0x7b, 0x42, 0xa9, 0xf4, 0x4f, 0x41, 0xa9, 0xf6, 0x57, + 0xc3, 0xa8, 0xff, 0x23, 0x03, 0xd5, 0xd0, 0x07, 0x1e, 0xca, 0x50, 0x00, 0xf0, 0xb6, + 0x20, 0x8e, 0x38, 0xd4, 0x13, 0x00, 0x00, 0x14, 0xa0, 0x16, 0x78, 0xf9, 0x03, 0x3c, + 0x40, 0xf9, + ]; + assert_eq!(unwind_rule_from_detected_epilogue(bytes, 0), None); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 4), + Some(UnwindRuleAarch64::OffsetSpAndRestoreFpAndLr { + sp_offset_by_16: 3, + fp_storage_offset_from_sp_by_8: 4, + lr_storage_offset_from_sp_by_8: 5 + }) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 8), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 3 }) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 12), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 3 }) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 16), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 20), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 24), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 28), + Some(UnwindRuleAarch64::NoOp) + ); + } + + #[test] + fn test_epilogue_with_auth_tail_call_2() { + // _malloc_zone_claimed_addres + // ... + // 1801457ac e1 03 13 aa mov x1, x19 + // 1801457b0 fd 7b 41 a9 ldp fp, lr, [sp, #0x10] + // 1801457b4 f4 4f c2 a8 ldp x20, x19, [sp], #0x20 + // 1801457b8 ff 23 03 d5 autibsp + // 1801457bc d0 07 1e ca eor x16, lr, lr, lsl #1 + // 1801457c0 50 00 f0 b6 tbz x16, 0x3e, loc_1801457c8 + // 1801457c4 20 8e 38 d4 brk #0xc471 + // loc_1801457c8: + // 1801457c8 f0 77 9c d2 mov x16, #0xe3bf + // 1801457cc 50 08 1f d7 braa x2, x16 + // ... + let bytes = &[ + 0xe1, 0x03, 0x13, 0xaa, 0xfd, 0x7b, 0x41, 0xa9, 0xf4, 0x4f, 0xc2, 0xa8, 0xff, 0x23, + 0x03, 0xd5, 0xd0, 0x07, 0x1e, 0xca, 0x50, 0x00, 0xf0, 0xb6, 0x20, 0x8e, 0x38, 0xd4, + 0xf0, 0x77, 0x9c, 0xd2, 0x50, 0x08, 0x1f, 0xd7, + ]; + assert_eq!(unwind_rule_from_detected_epilogue(bytes, 0), None); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 4), + Some(UnwindRuleAarch64::OffsetSpAndRestoreFpAndLr { + sp_offset_by_16: 2, + fp_storage_offset_from_sp_by_8: 2, + lr_storage_offset_from_sp_by_8: 3 + }) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 8), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 2 }) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 12), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 16), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 20), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 24), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!( + unwind_rule_from_detected_epilogue(bytes, 28), + Some(UnwindRuleAarch64::NoOp) + ); + } +} diff --git a/third_party/rust/framehop/src/aarch64/instruction_analysis/mod.rs b/third_party/rust/framehop/src/aarch64/instruction_analysis/mod.rs new file mode 100644 index 000000000000..2e006fc0057b --- /dev/null +++ b/third_party/rust/framehop/src/aarch64/instruction_analysis/mod.rs @@ -0,0 +1,25 @@ +use super::arch::ArchAarch64; +use crate::instruction_analysis::InstructionAnalysis; + +mod epilogue; +mod prologue; + +use epilogue::unwind_rule_from_detected_epilogue; +use prologue::unwind_rule_from_detected_prologue; + +impl InstructionAnalysis for ArchAarch64 { + fn rule_from_prologue_analysis( + text_bytes: &[u8], + pc_offset: usize, + ) -> Option { + let (slice_from_start, slice_to_end) = text_bytes.split_at(pc_offset); + unwind_rule_from_detected_prologue(slice_from_start, slice_to_end) + } + + fn rule_from_epilogue_analysis( + text_bytes: &[u8], + pc_offset: usize, + ) -> Option { + unwind_rule_from_detected_epilogue(text_bytes, pc_offset) + } +} diff --git a/third_party/rust/framehop/src/aarch64/instruction_analysis/prologue.rs b/third_party/rust/framehop/src/aarch64/instruction_analysis/prologue.rs new file mode 100644 index 000000000000..0e5d4cbf639f --- /dev/null +++ b/third_party/rust/framehop/src/aarch64/instruction_analysis/prologue.rs @@ -0,0 +1,400 @@ +use super::super::unwind_rule::UnwindRuleAarch64; + +struct PrologueDetectorAarch64 { + sp_offset: i32, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +enum PrologueStepResult { + UnexpectedInstruction(UnexpectedInstructionType), + ValidPrologueInstruction, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +enum PrologueResult { + ProbablyAlreadyInBody(UnexpectedInstructionType), + FoundFunctionStart { sp_offset: i32 }, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +enum PrologueInstructionType { + NotExpectedInPrologue, + CouldBePartOfPrologueIfThereIsAlsoAStackPointerSub, + VeryLikelyPartOfPrologue, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +enum UnexpectedInstructionType { + StoreOfWrongSize, + StoreReferenceRegisterNotSp, + AddSubNotOperatingOnSp, + NoNextInstruction, + NoStackPointerSubBeforeStore, + Unknown, +} + +impl PrologueDetectorAarch64 { + pub fn new() -> Self { + Self { sp_offset: 0 } + } + + pub fn analyze_slices( + &mut self, + slice_from_start: &[u8], + slice_to_end: &[u8], + ) -> PrologueResult { + // There are at least two options of what we could do here: + // - We could walk forwards from the function start to the instruction pointer. + // - We could walk backwards from the instruction pointer to the function start. + // Walking backwards is fine on arm64 because instructions are fixed size. + // Walking forwards requires that we have a useful function start address. + // + // Unfortunately, we can't rely on having a useful function start address. + // We get the funcion start address from the __unwind_info, which often collapses + // consecutive functions with the same unwind rules into a single entry, discarding + // the original function start addresses. + // Concretely, this means that `slice_from_start` may start much earlier than the + // current function. + // + // So we walk backwards. We first check the next instruction, and then + // go backwards from the instruction pointer to the function start. + // If the instruction we're about to execute is one that we'd expect to find in a prologue, + // then we assume that we're in a prologue. Then we single-step backwards until we + // either run out of instructions (which means we've definitely hit the start of the + // function), or until we find an instruction that we would not expect in a prologue. + // At that point we guess that this instruction must be belonging to the previous + // function, and that we've succesfully found the start of the current function. + if slice_to_end.len() < 4 { + return PrologueResult::ProbablyAlreadyInBody( + UnexpectedInstructionType::NoNextInstruction, + ); + } + let next_instruction = u32::from_le_bytes([ + slice_to_end[0], + slice_to_end[1], + slice_to_end[2], + slice_to_end[3], + ]); + let next_instruction_type = Self::analyze_prologue_instruction_type(next_instruction); + if next_instruction_type == PrologueInstructionType::NotExpectedInPrologue { + return PrologueResult::ProbablyAlreadyInBody(UnexpectedInstructionType::Unknown); + } + let instructions = slice_from_start + .chunks_exact(4) + .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]])) + .rev(); + for instruction in instructions { + if let PrologueStepResult::UnexpectedInstruction(_) = + self.reverse_step_instruction(instruction) + { + break; + } + } + if next_instruction_type + == PrologueInstructionType::CouldBePartOfPrologueIfThereIsAlsoAStackPointerSub + && self.sp_offset == 0 + { + return PrologueResult::ProbablyAlreadyInBody( + UnexpectedInstructionType::NoStackPointerSubBeforeStore, + ); + } + PrologueResult::FoundFunctionStart { + sp_offset: self.sp_offset, + } + } + + /// Check if the instruction indicates that we're likely in a prologue. + pub fn analyze_prologue_instruction_type(word: u32) -> PrologueInstructionType { + // Detect pacibsp (verify stack pointer authentication) and `mov x29, sp`. + if word == 0xd503237f || word == 0x910003fd { + return PrologueInstructionType::VeryLikelyPartOfPrologue; + } + + let bits_22_to_32 = word >> 22; + + // Detect stores of register pairs to the stack. + if bits_22_to_32 & 0b1011111001 == 0b1010100000 { + // Section C3.3, Loads and stores. + // Only stores that are commonly seen in prologues (bits 22, 29 and 31 are set) + let writeback_bits = bits_22_to_32 & 0b110; + let reference_reg = ((word >> 5) & 0b11111) as u16; + if writeback_bits == 0b000 || reference_reg != 31 { + return PrologueInstructionType::NotExpectedInPrologue; + } + // We are storing a register pair to the stack. This is something that + // can happen in a prologue but it can also happen in the body of a + // function. + if writeback_bits == 0b100 { + // No writeback. + return PrologueInstructionType::CouldBePartOfPrologueIfThereIsAlsoAStackPointerSub; + } + return PrologueInstructionType::VeryLikelyPartOfPrologue; + } + // Detect sub instructions operating on the stack pointer. + // Detect `add fp, sp, #0xXX` instructions + if bits_22_to_32 & 0b1011111110 == 0b1001000100 { + // Section C3.4, Data processing - immediate + // unsigned add / sub imm, size class X (8 bytes) + let result_reg = (word & 0b11111) as u16; + let input_reg = ((word >> 5) & 0b11111) as u16; + let is_sub = ((word >> 30) & 0b1) == 0b1; + let expected_result_reg = if is_sub { 31 } else { 29 }; + if input_reg != 31 || result_reg != expected_result_reg { + return PrologueInstructionType::NotExpectedInPrologue; + } + return PrologueInstructionType::VeryLikelyPartOfPrologue; + } + PrologueInstructionType::NotExpectedInPrologue + } + + /// Step backwards over one (already executed) instruction. + pub fn reverse_step_instruction(&mut self, word: u32) -> PrologueStepResult { + // Detect pacibsp (verify stack pointer authentication) + if word == 0xd503237f { + return PrologueStepResult::ValidPrologueInstruction; + } + + // Detect stores of register pairs to the stack. + if (word >> 22) & 0b1011111001 == 0b1010100000 { + // Section C3.3, Loads and stores. + // but only those that are commonly seen in prologues / prologues (bits 29 and 31 are set) + let writeback_bits = (word >> 23) & 0b11; + if writeback_bits == 0b00 { + // Not 64-bit load/store. + return PrologueStepResult::UnexpectedInstruction( + UnexpectedInstructionType::StoreOfWrongSize, + ); + } + let reference_reg = ((word >> 5) & 0b11111) as u16; + if reference_reg != 31 { + return PrologueStepResult::UnexpectedInstruction( + UnexpectedInstructionType::StoreReferenceRegisterNotSp, + ); + } + let is_preindexed_writeback = writeback_bits == 0b11; + let is_postindexed_writeback = writeback_bits == 0b01; // TODO: are there postindexed stores? What do they mean? + if is_preindexed_writeback || is_postindexed_writeback { + let imm7 = (((((word >> 15) & 0b1111111) as i16) << 9) >> 6) as i32; + self.sp_offset -= imm7; // - to undo the instruction + } + return PrologueStepResult::ValidPrologueInstruction; + } + // Detect sub instructions operating on the stack pointer. + if (word >> 23) & 0b111111111 == 0b110100010 { + // Section C3.4, Data processing - immediate + // unsigned sub imm, size class X (8 bytes) + let result_reg = (word & 0b11111) as u16; + let input_reg = ((word >> 5) & 0b11111) as u16; + if result_reg != 31 || input_reg != 31 { + return PrologueStepResult::UnexpectedInstruction( + UnexpectedInstructionType::AddSubNotOperatingOnSp, + ); + } + let mut imm12 = ((word >> 10) & 0b111111111111) as i32; + let shift_immediate_by_12 = ((word >> 22) & 0b1) == 0b1; + if shift_immediate_by_12 { + imm12 <<= 12 + } + self.sp_offset += imm12; // + to undo the sub instruction + return PrologueStepResult::ValidPrologueInstruction; + } + PrologueStepResult::UnexpectedInstruction(UnexpectedInstructionType::Unknown) + } +} + +pub fn unwind_rule_from_detected_prologue( + slice_from_start: &[u8], + slice_to_end: &[u8], +) -> Option { + let mut detector = PrologueDetectorAarch64::new(); + match detector.analyze_slices(slice_from_start, slice_to_end) { + PrologueResult::ProbablyAlreadyInBody(_) => None, + PrologueResult::FoundFunctionStart { sp_offset } => { + let sp_offset_by_16 = u16::try_from(sp_offset / 16).ok()?; + let rule = if sp_offset_by_16 == 0 { + UnwindRuleAarch64::NoOp + } else { + UnwindRuleAarch64::OffsetSp { sp_offset_by_16 } + }; + Some(rule) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_prologue_1() { + // gimli::read::unit::parse_attribute + // 1000dfeb8 ff 43 01 d1 sub sp, sp, #0x50 + // 1000dfebc f6 57 02 a9 stp x22, x21, [sp, #local_30] + // 1000dfec0 f4 4f 03 a9 stp x20, x19, [sp, #local_20] + // 1000dfec4 fd 7b 04 a9 stp x29, x30, [sp, #local_10] + // 1000dfec8 fd 03 01 91 add x29, sp, #0x40 + // 1000dfecc f4 03 04 aa mov x20, x4 + // 1000dfed0 f5 03 01 aa mov x21, x1 + + let bytes = &[ + 0xff, 0x43, 0x01, 0xd1, 0xf6, 0x57, 0x02, 0xa9, 0xf4, 0x4f, 0x03, 0xa9, 0xfd, 0x7b, + 0x04, 0xa9, 0xfd, 0x03, 0x01, 0x91, 0xf4, 0x03, 0x04, 0xaa, 0xf5, 0x03, 0x01, 0xaa, + ]; + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..0], &bytes[0..]), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..4], &bytes[4..]), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 5 }) + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..8], &bytes[8..]), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 5 }) + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..12], &bytes[12..]), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 5 }) + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..16], &bytes[16..]), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 5 }) + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..20], &bytes[20..]), + None + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..24], &bytes[24..]), + None + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..28], &bytes[28..]), + None + ); + } + + #[test] + fn test_prologue_with_pacibsp() { + // 1801245c4 08 58 29 b8 str w8,[x0, w9, UXTW #0x2] + // 1801245c8 c0 03 5f d6 ret + // _malloc_zone_realloc + // 1801245cc 7f 23 03 d5 pacibsp + // 1801245d0 f8 5f bc a9 stp x24,x23,[sp, #local_40]! + // 1801245d4 f6 57 01 a9 stp x22,x21,[sp, #local_30] + // 1801245d8 f4 4f 02 a9 stp x20,x19,[sp, #local_20] + // 1801245dc fd 7b 03 a9 stp x29,x30,[sp, #local_10] + // 1801245e0 fd c3 00 91 add x29,sp,#0x30 + // 1801245e4 f3 03 02 aa mov x19,x2 + // 1801245e8 f4 03 01 aa mov x20,x1 + + let bytes = &[ + 0x08, 0x58, 0x29, 0xb8, 0xc0, 0x03, 0x5f, 0xd6, 0x7f, 0x23, 0x03, 0xd5, 0xf8, 0x5f, + 0xbc, 0xa9, 0xf6, 0x57, 0x01, 0xa9, 0xf4, 0x4f, 0x02, 0xa9, 0xfd, 0x7b, 0x03, 0xa9, + 0xfd, 0xc3, 0x00, 0x91, 0xf3, 0x03, 0x02, 0xaa, 0xf4, 0x03, 0x01, 0xaa, + ]; + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..0], &bytes[0..]), + None + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..4], &bytes[4..]), + None + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..8], &bytes[8..]), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..12], &bytes[12..]), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..16], &bytes[16..]), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 4 }) + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..20], &bytes[20..]), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 4 }) + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..24], &bytes[24..]), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 4 }) + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..28], &bytes[28..]), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 4 }) + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..32], &bytes[32..]), + None + ); + } + + #[test] + fn test_prologue_with_mov_fp_sp() { + // _tiny_free_list_add_ptr + // 180126e94 7f 23 03 d5 pacibsp + // 180126e98 fd 7b bf a9 stp x29,x30,[sp, #local_10]! + // 180126e9c fd 03 00 91 mov x29,sp + // 180126ea0 68 04 00 51 sub w8,w3,#0x1 + + let bytes = &[ + 0x7f, 0x23, 0x03, 0xd5, 0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0x68, 0x04, + 0x00, 0x51, + ]; + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..0], &bytes[0..]), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..4], &bytes[4..]), + Some(UnwindRuleAarch64::NoOp) + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..8], &bytes[8..]), + Some(UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 1 }) + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..12], &bytes[12..]), + None + ); + } + + #[test] + fn test_no_prologue_despite_stack_store() { + // We're in the middle of a function and are storing something to the stack. + // But this is not a prologue, so it shouldn't be detected as one. + // + // 1004073d0 e8 17 00 f9 str x8,[sp, #0x28] + // 1004073d4 03 00 00 14 b LAB_1004073e0 + // 1004073d8 ff ff 01 a9 stp xzr,xzr,[sp, #0x18] ; <-- stores the pair xzr, xzr on the stack + // 1004073dc ff 17 00 f9 str xzr,[sp, #0x28] + // 1004073e0 e0 03 00 91 mov x0,sp + + let bytes = &[ + 0xe8, 0x17, 0x00, 0xf9, 0x03, 0x00, 0x00, 0x14, 0xff, 0xff, 0x01, 0xa9, 0xff, 0x17, + 0x00, 0xf9, 0xe0, 0x03, 0x00, 0x91, + ]; + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..0], &bytes[0..]), + None + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..4], &bytes[4..]), + None + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..8], &bytes[8..]), + None + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..12], &bytes[12..]), + None + ); + assert_eq!( + unwind_rule_from_detected_prologue(&bytes[..16], &bytes[16..]), + None + ); + } +} diff --git a/third_party/rust/framehop/src/aarch64/macho.rs b/third_party/rust/framehop/src/aarch64/macho.rs new file mode 100644 index 000000000000..84deb80ae25c --- /dev/null +++ b/third_party/rust/framehop/src/aarch64/macho.rs @@ -0,0 +1,96 @@ +use super::arch::ArchAarch64; +use super::unwind_rule::UnwindRuleAarch64; +use crate::instruction_analysis::InstructionAnalysis; +use crate::macho::{CompactUnwindInfoUnwinderError, CompactUnwindInfoUnwinding, CuiUnwindResult}; +use macho_unwind_info::opcodes::OpcodeArm64; +use macho_unwind_info::Function; + +impl CompactUnwindInfoUnwinding for ArchAarch64 { + fn unwind_frame( + function: Function, + is_first_frame: bool, + address_offset_within_function: usize, + function_bytes: Option<&[u8]>, + ) -> Result, CompactUnwindInfoUnwinderError> { + let opcode = OpcodeArm64::parse(function.opcode); + if is_first_frame { + if opcode == OpcodeArm64::Null { + return Ok(CuiUnwindResult::ExecRule(UnwindRuleAarch64::NoOp)); + } + // The pc might be in a prologue or an epilogue. The compact unwind info format ignores + // prologues and epilogues; the opcodes only describe the function body. So we do some + // instruction analysis to check for prologues and epilogues. + if let Some(function_bytes) = function_bytes { + if let Some(rule) = Self::rule_from_instruction_analysis( + function_bytes, + address_offset_within_function, + ) { + // We are inside a prologue / epilogue. Ignore the opcode and use the rule from + // instruction analysis. + return Ok(CuiUnwindResult::ExecRule(rule)); + } + } + } + + // At this point we know with high certainty that we are in a function body. + let r = match opcode { + OpcodeArm64::Null => { + return Err(CompactUnwindInfoUnwinderError::FunctionHasNoInfo); + } + OpcodeArm64::Frameless { + stack_size_in_bytes, + } => { + if is_first_frame { + if stack_size_in_bytes == 0 { + CuiUnwindResult::ExecRule(UnwindRuleAarch64::NoOp) + } else { + CuiUnwindResult::ExecRule(UnwindRuleAarch64::OffsetSp { + sp_offset_by_16: stack_size_in_bytes / 16, + }) + } + } else { + return Err(CompactUnwindInfoUnwinderError::CallerCannotBeFrameless); + } + } + OpcodeArm64::Dwarf { eh_frame_fde } => CuiUnwindResult::NeedDwarf(eh_frame_fde), + OpcodeArm64::FrameBased { .. } => { + CuiUnwindResult::ExecRule(UnwindRuleAarch64::UseFramePointer) + } + OpcodeArm64::UnrecognizedKind(kind) => { + return Err(CompactUnwindInfoUnwinderError::BadOpcodeKind(kind)) + } + }; + Ok(r) + } + + fn rule_for_stub_helper( + offset: u32, + ) -> Result, CompactUnwindInfoUnwinderError> { + // shared: + // +0x0 1d309c B1 94 48 10 adr x17, #0x100264330 + // +0x4 1d30a0 1F 20 03 D5 nop + // +0x8 1d30a4 F0 47 BF A9 stp x16, x17, [sp, #-0x10]! + // +0xc 1d30a8 1F 20 03 D5 nop + // +0x10 1d30ac F0 7A 32 58 ldr x16, #dyld_stub_binder_100238008 + // +0x14 1d30b0 00 02 1F D6 br x16 + // first stub: + // +0x18 1d30b4 50 00 00 18 ldr w16, =0x1800005000000000 + // +0x1c 1d30b8 F9 FF FF 17 b 0x1001d309c + // +0x20 1d30bc 00 00 00 00 (padding) + // second stub: + // +0x24 1d30c0 50 00 00 18 ldr w16, =0x1800005000000012 + // +0x28 1d30c4 F6 FF FF 17 b 0x1001d309c + // +0x2c 1d30c8 00 00 00 00 (padding) + let rule = if offset < 0xc { + // Stack pointer hasn't been touched, just follow lr + UnwindRuleAarch64::NoOp + } else if offset < 0x18 { + // Add 0x10 to the stack pointer and follow lr + UnwindRuleAarch64::OffsetSp { sp_offset_by_16: 1 } + } else { + // Stack pointer hasn't been touched, just follow lr + UnwindRuleAarch64::NoOp + }; + Ok(CuiUnwindResult::ExecRule(rule)) + } +} diff --git a/third_party/rust/framehop/src/aarch64/mod.rs b/third_party/rust/framehop/src/aarch64/mod.rs new file mode 100644 index 000000000000..470f814b4ce8 --- /dev/null +++ b/third_party/rust/framehop/src/aarch64/mod.rs @@ -0,0 +1,17 @@ +mod arch; +mod cache; +mod dwarf; +mod instruction_analysis; +#[cfg(feature = "macho")] +mod macho; +#[cfg(feature = "pe")] +mod pe; +mod unwind_rule; +mod unwinder; +mod unwindregs; + +pub use arch::*; +pub use cache::*; +pub use unwind_rule::*; +pub use unwinder::*; +pub use unwindregs::*; diff --git a/third_party/rust/framehop/src/aarch64/pe.rs b/third_party/rust/framehop/src/aarch64/pe.rs new file mode 100644 index 000000000000..8f9245152515 --- /dev/null +++ b/third_party/rust/framehop/src/aarch64/pe.rs @@ -0,0 +1,19 @@ +use super::arch::ArchAarch64; +use crate::pe::{PeSections, PeUnwinderError, PeUnwinding}; +use crate::unwind_result::UnwindResult; + +impl PeUnwinding for ArchAarch64 { + fn unwind_frame( + _sections: PeSections, + _address: u32, + _regs: &mut Self::UnwindRegs, + _is_first_frame: bool, + _read_stack: &mut F, + ) -> Result, PeUnwinderError> + where + F: FnMut(u64) -> Result, + D: core::ops::Deref, + { + Err(PeUnwinderError::Aarch64Unsupported) + } +} diff --git a/third_party/rust/framehop/src/aarch64/unwind_rule.rs b/third_party/rust/framehop/src/aarch64/unwind_rule.rs new file mode 100644 index 000000000000..0310258686b3 --- /dev/null +++ b/third_party/rust/framehop/src/aarch64/unwind_rule.rs @@ -0,0 +1,264 @@ +use super::unwindregs::UnwindRegsAarch64; +use crate::add_signed::checked_add_signed; +use crate::error::Error; + +use crate::unwind_rule::UnwindRule; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum UnwindRuleAarch64 { + /// (sp, fp, lr) = (sp, fp, lr) + /// Only possible for the first frame. Subsequent frames must get the + /// return address from somewhere other than the lr register to avoid + /// infinite loops. + NoOp, + /// (sp, fp, lr) = if is_first_frame (sp, fp, lr) else (fp + 16, *fp, *(fp + 8)) + /// Used as a fallback rule. + NoOpIfFirstFrameOtherwiseFp, + /// (sp, fp, lr) = (sp + 16x, fp, lr) + /// Only possible for the first frame. Subsequent frames must get the + /// return address from somewhere other than the lr register to avoid + /// infinite loops. + OffsetSp { sp_offset_by_16: u16 }, + /// (sp, fp, lr) = (sp + 16x, fp, lr) if is_first_frame + /// This rule reflects an ambiguity in DWARF CFI information. When the + /// return address is "undefined" because it was omitted, it could mean + /// "same value", but this is only allowed for the first frame. + OffsetSpIfFirstFrameOtherwiseStackEndsHere { sp_offset_by_16: u16 }, + /// (sp, fp, lr) = (sp + 16x, fp, *(sp + 8y)) + OffsetSpAndRestoreLr { + sp_offset_by_16: u16, + lr_storage_offset_from_sp_by_8: i16, + }, + /// (sp, fp, lr) = (sp + 16x, *(sp + 8y), *(sp + 8z)) + OffsetSpAndRestoreFpAndLr { + sp_offset_by_16: u16, + fp_storage_offset_from_sp_by_8: i16, + lr_storage_offset_from_sp_by_8: i16, + }, + /// (sp, fp, lr) = (fp + 16, *fp, *(fp + 8)) + UseFramePointer, + /// (sp, fp, lr) = (fp + 8x, *(fp + 8y), *(fp + 8z)) + UseFramepointerWithOffsets { + sp_offset_from_fp_by_8: u16, + fp_storage_offset_from_fp_by_8: i16, + lr_storage_offset_from_fp_by_8: i16, + }, +} + +impl UnwindRule for UnwindRuleAarch64 { + type UnwindRegs = UnwindRegsAarch64; + + fn rule_for_stub_functions() -> Self { + UnwindRuleAarch64::NoOp + } + fn rule_for_function_start() -> Self { + UnwindRuleAarch64::NoOp + } + fn fallback_rule() -> Self { + UnwindRuleAarch64::UseFramePointer + } + + fn exec( + self, + is_first_frame: bool, + regs: &mut UnwindRegsAarch64, + read_stack: &mut F, + ) -> Result, Error> + where + F: FnMut(u64) -> Result, + { + let lr = regs.lr(); + let sp = regs.sp(); + let fp = regs.fp(); + + let (new_lr, new_sp, new_fp) = match self { + UnwindRuleAarch64::NoOp => { + if !is_first_frame { + return Err(Error::DidNotAdvance); + } + (lr, sp, fp) + } + UnwindRuleAarch64::NoOpIfFirstFrameOtherwiseFp => { + if is_first_frame { + (lr, sp, fp) + } else { + let fp = regs.fp(); + let new_sp = fp.checked_add(16).ok_or(Error::IntegerOverflow)?; + let new_lr = + read_stack(fp + 8).map_err(|_| Error::CouldNotReadStack(fp + 8))?; + let new_fp = read_stack(fp).map_err(|_| Error::CouldNotReadStack(fp))?; + if new_sp <= sp { + return Err(Error::FramepointerUnwindingMovedBackwards); + } + (new_lr, new_sp, new_fp) + } + } + UnwindRuleAarch64::OffsetSpIfFirstFrameOtherwiseStackEndsHere { sp_offset_by_16 } => { + if !is_first_frame { + return Ok(None); + } + let sp_offset = u64::from(sp_offset_by_16) * 16; + let new_sp = sp.checked_add(sp_offset).ok_or(Error::IntegerOverflow)?; + (lr, new_sp, fp) + } + UnwindRuleAarch64::OffsetSp { sp_offset_by_16 } => { + if !is_first_frame { + return Err(Error::DidNotAdvance); + } + let sp_offset = u64::from(sp_offset_by_16) * 16; + let new_sp = sp.checked_add(sp_offset).ok_or(Error::IntegerOverflow)?; + (lr, new_sp, fp) + } + UnwindRuleAarch64::OffsetSpAndRestoreLr { + sp_offset_by_16, + lr_storage_offset_from_sp_by_8, + } => { + let sp_offset = u64::from(sp_offset_by_16) * 16; + let new_sp = sp.checked_add(sp_offset).ok_or(Error::IntegerOverflow)?; + let lr_storage_offset = i64::from(lr_storage_offset_from_sp_by_8) * 8; + let lr_location = + checked_add_signed(sp, lr_storage_offset).ok_or(Error::IntegerOverflow)?; + let new_lr = + read_stack(lr_location).map_err(|_| Error::CouldNotReadStack(lr_location))?; + (new_lr, new_sp, fp) + } + UnwindRuleAarch64::OffsetSpAndRestoreFpAndLr { + sp_offset_by_16, + fp_storage_offset_from_sp_by_8, + lr_storage_offset_from_sp_by_8, + } => { + let sp_offset = u64::from(sp_offset_by_16) * 16; + let new_sp = sp.checked_add(sp_offset).ok_or(Error::IntegerOverflow)?; + let lr_storage_offset = i64::from(lr_storage_offset_from_sp_by_8) * 8; + let lr_location = + checked_add_signed(sp, lr_storage_offset).ok_or(Error::IntegerOverflow)?; + let new_lr = + read_stack(lr_location).map_err(|_| Error::CouldNotReadStack(lr_location))?; + let fp_storage_offset = i64::from(fp_storage_offset_from_sp_by_8) * 8; + let fp_location = + checked_add_signed(sp, fp_storage_offset).ok_or(Error::IntegerOverflow)?; + let new_fp = + read_stack(fp_location).map_err(|_| Error::CouldNotReadStack(fp_location))?; + (new_lr, new_sp, new_fp) + } + UnwindRuleAarch64::UseFramePointer => { + // Do a frame pointer stack walk. Frame-based aarch64 functions store the caller's fp and lr + // on the stack and then set fp to the address where the caller's fp is stored. + // + // Function prologue example (this one also stores x19, x20, x21 and x22): + // stp x22, x21, [sp, #-0x30]! ; subtracts 0x30 from sp, and then stores (x22, x21) at sp + // stp x20, x19, [sp, #0x10] ; stores (x20, x19) at sp + 0x10 (== original sp - 0x20) + // stp fp, lr, [sp, #0x20] ; stores (fp, lr) at sp + 0x20 (== original sp - 0x10) + // add fp, sp, #0x20 ; sets fp to the address where the old fp is stored on the stack + // + // Function epilogue: + // ldp fp, lr, [sp, #0x20] ; restores fp and lr from the stack + // ldp x20, x19, [sp, #0x10] ; restores x20 and x19 + // ldp x22, x21, [sp], #0x30 ; restores x22 and x21, and then adds 0x30 to sp + // ret ; follows lr to jump back to the caller + // + // Functions are called with bl ("branch with link"); bl puts the return address into the lr register. + // When a function reaches its end, ret reads the return address from lr and jumps to it. + // On aarch64, the stack pointer is always aligned to 16 bytes, and registers are usually written + // to and read from the stack in pairs. + // In frame-based functions, fp and lr are placed next to each other on the stack. + // So when a function is called, we have the following stack layout: + // + // [... rest of the stack] + // ^ sp ^ fp + // bl some_function ; jumps to the function and sets lr = return address + // [... rest of the stack] + // ^ sp ^ fp + // adjust stack ptr, write some registers, and write fp and lr + // [more saved regs] [caller's frame pointer] [return address] [... rest of the stack] + // ^ sp ^ fp + // add fp, sp, #0x20 ; sets fp to where the caller's fp is now stored + // [more saved regs] [caller's frame pointer] [return address] [... rest of the stack] + // ^ sp ^ fp + // ; can execute bl and overwrite lr with a new value + // ... [more saved regs] [caller's frame pointer] [return address] [... rest of the stack] + // ^ sp ^ fp + // + // So: *fp is the caller's frame pointer, and *(fp + 8) is the return address. + let fp = regs.fp(); + let new_sp = fp.checked_add(16).ok_or(Error::IntegerOverflow)?; + let new_lr = read_stack(fp + 8).map_err(|_| Error::CouldNotReadStack(fp + 8))?; + let new_fp = read_stack(fp).map_err(|_| Error::CouldNotReadStack(fp))?; + if new_fp == 0 { + return Ok(None); + } + if new_fp <= fp || new_sp <= sp { + return Err(Error::FramepointerUnwindingMovedBackwards); + } + (new_lr, new_sp, new_fp) + } + UnwindRuleAarch64::UseFramepointerWithOffsets { + sp_offset_from_fp_by_8, + fp_storage_offset_from_fp_by_8, + lr_storage_offset_from_fp_by_8, + } => { + let sp_offset_from_fp = u64::from(sp_offset_from_fp_by_8) * 8; + let new_sp = fp + .checked_add(sp_offset_from_fp) + .ok_or(Error::IntegerOverflow)?; + let lr_storage_offset = i64::from(lr_storage_offset_from_fp_by_8) * 8; + let lr_location = + checked_add_signed(fp, lr_storage_offset).ok_or(Error::IntegerOverflow)?; + let new_lr = + read_stack(lr_location).map_err(|_| Error::CouldNotReadStack(lr_location))?; + let fp_storage_offset = i64::from(fp_storage_offset_from_fp_by_8) * 8; + let fp_location = + checked_add_signed(fp, fp_storage_offset).ok_or(Error::IntegerOverflow)?; + let new_fp = + read_stack(fp_location).map_err(|_| Error::CouldNotReadStack(fp_location))?; + + if new_fp == 0 { + return Ok(None); + } + if new_fp <= fp || new_sp <= sp { + return Err(Error::FramepointerUnwindingMovedBackwards); + } + (new_lr, new_sp, new_fp) + } + }; + let return_address = regs.lr_mask().strip_ptr_auth(new_lr); + if return_address == 0 { + return Ok(None); + } + if !is_first_frame && new_sp == sp { + return Err(Error::DidNotAdvance); + } + regs.set_lr(new_lr); + regs.set_sp(new_sp); + regs.set_fp(new_fp); + + Ok(Some(return_address)) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_basic() { + let stack = [ + 1, 2, 3, 4, 0x40, 0x100200, 5, 6, 0x70, 0x100100, 7, 8, 9, 10, 0x0, 0x0, + ]; + let mut read_stack = |addr| Ok(stack[(addr / 8) as usize]); + let mut regs = UnwindRegsAarch64::new(0x100300, 0x10, 0x20); + let res = UnwindRuleAarch64::NoOp.exec(true, &mut regs, &mut read_stack); + assert_eq!(res, Ok(Some(0x100300))); + assert_eq!(regs.sp(), 0x10); + let res = UnwindRuleAarch64::UseFramePointer.exec(false, &mut regs, &mut read_stack); + assert_eq!(res, Ok(Some(0x100200))); + assert_eq!(regs.sp(), 0x30); + assert_eq!(regs.fp(), 0x40); + let res = UnwindRuleAarch64::UseFramePointer.exec(false, &mut regs, &mut read_stack); + assert_eq!(res, Ok(Some(0x100100))); + assert_eq!(regs.sp(), 0x50); + assert_eq!(regs.fp(), 0x70); + let res = UnwindRuleAarch64::UseFramePointer.exec(false, &mut regs, &mut read_stack); + assert_eq!(res, Ok(None)); + } +} diff --git a/third_party/rust/framehop/src/aarch64/unwinder.rs b/third_party/rust/framehop/src/aarch64/unwinder.rs new file mode 100644 index 000000000000..e8fafb6808ae --- /dev/null +++ b/third_party/rust/framehop/src/aarch64/unwinder.rs @@ -0,0 +1,66 @@ +use core::ops::Deref; + +use crate::{ + unwinder::UnwinderInternal, AllocationPolicy, Error, FrameAddress, MayAllocateDuringUnwind, + Module, Unwinder, +}; + +use super::{ArchAarch64, CacheAarch64, UnwindRegsAarch64}; + +/// The unwinder for the Aarch64 CPU architecture. Use the [`Unwinder`] trait for unwinding. +/// +/// Type arguments: +/// +/// - `D`: The type for unwind section data in the modules. See [`Module`]. +/// - `P`: The [`AllocationPolicy`]. +pub struct UnwinderAarch64(UnwinderInternal); + +impl Default for UnwinderAarch64 { + fn default() -> Self { + Self::new() + } +} + +impl Clone for UnwinderAarch64 { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl UnwinderAarch64 { + /// Create an unwinder for a process. + pub fn new() -> Self { + Self(UnwinderInternal::new()) + } +} + +impl, P: AllocationPolicy> Unwinder for UnwinderAarch64 { + type UnwindRegs = UnwindRegsAarch64; + type Cache = CacheAarch64

; + type Module = Module; + + fn add_module(&mut self, module: Module) { + self.0.add_module(module); + } + + fn remove_module(&mut self, module_address_range_start: u64) { + self.0.remove_module(module_address_range_start); + } + + fn max_known_code_address(&self) -> u64 { + self.0.max_known_code_address() + } + + fn unwind_frame( + &self, + address: FrameAddress, + regs: &mut UnwindRegsAarch64, + cache: &mut CacheAarch64

, + read_stack: &mut F, + ) -> Result, Error> + where + F: FnMut(u64) -> Result, + { + self.0.unwind_frame(address, regs, &mut cache.0, read_stack) + } +} diff --git a/third_party/rust/framehop/src/aarch64/unwindregs.rs b/third_party/rust/framehop/src/aarch64/unwindregs.rs new file mode 100644 index 000000000000..3d0c11d03a48 --- /dev/null +++ b/third_party/rust/framehop/src/aarch64/unwindregs.rs @@ -0,0 +1,182 @@ +use core::fmt::Debug; + +use crate::display_utils::HexNum; + +/// The registers used for unwinding on Aarch64. We only need lr (x30), sp (x31), +/// and fp (x29). +/// +/// We also have a [`PtrAuthMask`] which allows stripping off the pointer authentication +/// hash bits from the return address when unwinding through libraries which use pointer +/// authentication, e.g. in system libraries on macOS. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct UnwindRegsAarch64 { + lr_mask: PtrAuthMask, + lr: u64, + sp: u64, + fp: u64, +} + +/// Aarch64 CPUs support special instructions which interpret pointers as pair +/// of the pointer address and an encrypted hash: The address is stored in the +/// lower bits and the hash in the high bits. These are called "authenticated" +/// pointers. Special instructions exist to verify pointers before dereferencing +/// them. +/// +/// Return address can be such authenticated pointers. To return to an +/// authenticated return address, the "retab" instruction is used instead of +/// the regular "ret" instruction. +/// +/// Stack walkers need to strip the encrypted hash from return addresses because +/// they need the raw code address. +/// +/// On macOS arm64, system libraries compiled with the arm64e target use pointer +/// pointer authentication for return addresses. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct PtrAuthMask(pub u64); + +impl PtrAuthMask { + /// Create a no-op mask which treats all bits of the pointer as address bits, + /// so no bits are stripped. + pub fn new_no_strip() -> Self { + Self(u64::MAX) + } + + /// Create a mask for 24 bits hash + 40 bits pointer. This appears to be + /// what macOS arm64e uses. It is unclear whether we can rely on this or + /// whether it can change. + /// + /// On macOS arm64, this mask can be applied to both authenticated pointers + /// and to non-authenticated pointers without data loss; non-authenticated + /// don't appear to use the top 24 bits (they're always zero). + pub fn new_24_40() -> Self { + Self(u64::MAX >> 24) + } + + /// Deduce a mask based on the highest known address. The leading zero bits + /// in this address will be reserved for the hash. + pub fn from_max_known_address(address: u64) -> Self { + Self(u64::MAX >> address.leading_zeros()) + } + + /// Apply the mask to the given pointer. + #[inline(always)] + pub fn strip_ptr_auth(&self, ptr: u64) -> u64 { + ptr & self.0 + } +} + +impl UnwindRegsAarch64 { + /// Create a set of unwind register values and do not apply any pointer + /// authentication stripping. + pub fn new(lr: u64, sp: u64, fp: u64) -> Self { + Self { + lr_mask: PtrAuthMask::new_no_strip(), + lr, + sp, + fp, + } + } + + /// Create a set of unwind register values with the given mask for return + /// address pointer authentication stripping. + pub fn new_with_ptr_auth_mask( + code_ptr_auth_mask: PtrAuthMask, + lr: u64, + sp: u64, + fp: u64, + ) -> Self { + Self { + lr_mask: code_ptr_auth_mask, + lr: code_ptr_auth_mask.strip_ptr_auth(lr), + sp, + fp, + } + } + + /// Get the [`PtrAuthMask`] which we apply to the `lr` value. + #[inline(always)] + pub fn lr_mask(&self) -> PtrAuthMask { + self.lr_mask + } + + /// Get the stack pointer value. + #[inline(always)] + pub fn sp(&self) -> u64 { + self.sp + } + + /// Set the stack pointer value. + #[inline(always)] + pub fn set_sp(&mut self, sp: u64) { + self.sp = sp + } + + /// Get the frame pointer value (x29). + #[inline(always)] + pub fn fp(&self) -> u64 { + self.fp + } + + /// Set the frame pointer value (x29). + #[inline(always)] + pub fn set_fp(&mut self, fp: u64) { + self.fp = fp + } + + /// Get the lr register value. + #[inline(always)] + pub fn lr(&self) -> u64 { + self.lr + } + + /// Set the lr register value. + #[inline(always)] + pub fn set_lr(&mut self, lr: u64) { + self.lr = self.lr_mask.strip_ptr_auth(lr) + } +} + +impl Debug for UnwindRegsAarch64 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("UnwindRegsAarch64") + .field("lr", &HexNum(self.lr)) + .field("sp", &HexNum(self.sp)) + .field("fp", &HexNum(self.fp)) + .finish() + } +} + +#[cfg(test)] +mod test { + use crate::aarch64::PtrAuthMask; + + #[test] + fn test() { + assert_eq!(PtrAuthMask::new_24_40().0, u64::MAX >> 24); + assert_eq!(PtrAuthMask::new_24_40().0, (1 << 40) - 1); + assert_eq!( + PtrAuthMask::from_max_known_address(0x0000aaaab54f7000).0, + 0x0000ffffffffffff + ); + assert_eq!( + PtrAuthMask::from_max_known_address(0x0000ffffa3206000).0, + 0x0000ffffffffffff + ); + assert_eq!( + PtrAuthMask::from_max_known_address(0xffffffffc05a9000).0, + 0xffffffffffffffff + ); + assert_eq!( + PtrAuthMask::from_max_known_address(0x000055ba9f07e000).0, + 0x00007fffffffffff + ); + assert_eq!( + PtrAuthMask::from_max_known_address(0x00007f76b8019000).0, + 0x00007fffffffffff + ); + assert_eq!( + PtrAuthMask::from_max_known_address(0x000000022a3ccff7).0, + 0x00000003ffffffff + ); + } +} diff --git a/third_party/rust/framehop/src/add_signed.rs b/third_party/rust/framehop/src/add_signed.rs new file mode 100644 index 000000000000..9e924d7bbb7a --- /dev/null +++ b/third_party/rust/framehop/src/add_signed.rs @@ -0,0 +1,99 @@ +/// Add a signed integer to this unsigned integer, with wrapping. +#[allow(unused)] +pub fn wrapping_add_signed(lhs: T, rhs: T::Signed) -> T { + lhs.wrapping_add_signed(rhs) +} + +/// Add a signed integer to this unsigned integer, but only if doing so +/// does not cause underflow / overflow. +pub fn checked_add_signed(lhs: T, rhs: T::Signed) -> Option { + lhs.checked_add_signed(rhs) +} + +/// A trait which adds method to unsigned integers which allow checked and +/// wrapping addition of the corresponding signed integer type. +/// Unfortunately, these methods conflict with the proposed standard rust +/// methods, so this trait isn't actually usable without risking build +/// errors once these methods are stabilized. +/// https://github.com/rust-lang/rust/issues/87840 +pub trait AddSigned: Sized { + type Signed; + + /// Add a signed integer to this unsigned integer, with wrapping. + fn wrapping_add_signed(self, rhs: Self::Signed) -> Self; + + /// Add a signed integer to this unsigned integer, but only if doing so + /// does not cause underflow / overflow. + fn checked_add_signed(self, rhs: Self::Signed) -> Option; +} + +impl AddSigned for u64 { + type Signed = i64; + + fn wrapping_add_signed(self, rhs: i64) -> u64 { + self.wrapping_add(rhs as u64) + } + + fn checked_add_signed(self, rhs: i64) -> Option { + let res = AddSigned::wrapping_add_signed(self, rhs); + if (rhs >= 0 && res >= self) || (rhs < 0 && res < self) { + Some(res) + } else { + None + } + } +} + +impl AddSigned for u32 { + type Signed = i32; + + fn wrapping_add_signed(self, rhs: i32) -> u32 { + self.wrapping_add(rhs as u32) + } + + fn checked_add_signed(self, rhs: i32) -> Option { + let res = AddSigned::wrapping_add_signed(self, rhs); + if (rhs >= 0 && res >= self) || (rhs < 0 && res < self) { + Some(res) + } else { + None + } + } +} + +#[cfg(test)] +mod test { + use super::{checked_add_signed, wrapping_add_signed}; + + #[test] + fn test_wrapping() { + assert_eq!(wrapping_add_signed(1, 2), 3u64); + assert_eq!(wrapping_add_signed(2, 1), 3u64); + assert_eq!(wrapping_add_signed(5, -4), 1u64); + assert_eq!(wrapping_add_signed(5, -5), 0u64); + assert_eq!(wrapping_add_signed(u64::MAX - 5, 3), u64::MAX - 2); + assert_eq!(wrapping_add_signed(u64::MAX - 5, 5), u64::MAX); + assert_eq!(wrapping_add_signed(u64::MAX - 5, -5), u64::MAX - 10); + assert_eq!(wrapping_add_signed(1, -2), u64::MAX); + assert_eq!(wrapping_add_signed(2, -4), u64::MAX - 1); + assert_eq!(wrapping_add_signed(u64::MAX, 1), 0); + assert_eq!(wrapping_add_signed(u64::MAX - 5, 6), 0); + assert_eq!(wrapping_add_signed(u64::MAX - 5, 9), 3); + } + + #[test] + fn test_checked() { + assert_eq!(checked_add_signed(1, 2), Some(3u64)); + assert_eq!(checked_add_signed(2, 1), Some(3u64)); + assert_eq!(checked_add_signed(5, -4), Some(1u64)); + assert_eq!(checked_add_signed(5, -5), Some(0u64)); + assert_eq!(checked_add_signed(u64::MAX - 5, 3), Some(u64::MAX - 2)); + assert_eq!(checked_add_signed(u64::MAX - 5, 5), Some(u64::MAX)); + assert_eq!(checked_add_signed(u64::MAX - 5, -5), Some(u64::MAX - 10)); + assert_eq!(checked_add_signed(1u64, -2), None); + assert_eq!(checked_add_signed(2u64, -4), None); + assert_eq!(checked_add_signed(u64::MAX, 1), None); + assert_eq!(checked_add_signed(u64::MAX - 5, 6), None); + assert_eq!(checked_add_signed(u64::MAX - 5, 9), None); + } +} diff --git a/third_party/rust/framehop/src/arch.rs b/third_party/rust/framehop/src/arch.rs new file mode 100644 index 000000000000..b08c3fd31670 --- /dev/null +++ b/third_party/rust/framehop/src/arch.rs @@ -0,0 +1,6 @@ +use crate::unwind_rule::UnwindRule; + +pub trait Arch { + type UnwindRegs; + type UnwindRule: UnwindRule; +} diff --git a/third_party/rust/framehop/src/cache.rs b/third_party/rust/framehop/src/cache.rs new file mode 100644 index 000000000000..1c7a61f6e102 --- /dev/null +++ b/third_party/rust/framehop/src/cache.rs @@ -0,0 +1,81 @@ +use alloc::boxed::Box; + +use crate::{rule_cache::RuleCache, unwind_rule::UnwindRule}; + +pub use crate::rule_cache::CacheStats; + +/// A trait which lets you opt into allocation-free unwinding. The two implementations of +/// this trait are [`MustNotAllocateDuringUnwind`] and [`MayAllocateDuringUnwind`]. +pub trait AllocationPolicy { + type GimliUnwindContextStorage: gimli::UnwindContextStorage; + type GimliEvaluationStorage: gimli::EvaluationStorage; +} + +/// Require allocation-free unwinding. This is one of the two [`AllocationPolicy`] +/// implementations. +/// +/// Using this means that the unwinder cache takes up more memory, because it preallocates +/// space for DWARF CFI unwind table row evaluation and for DWARF CFI expression evaluation. +/// And because those preallocations are of a fixed size, it is possible that this fixed +/// size is not large enough for certain DWARF unwinding tasks. +pub struct MustNotAllocateDuringUnwind; + +/// This is only used in the implementation of [MustNotAllocateDuringUnwind] and +/// is not intended to be used by the outside world. +#[doc(hidden)] +pub struct StoreOnStack; + +impl gimli::UnwindContextStorage for StoreOnStack { + type Rules = [(gimli::Register, gimli::RegisterRule); 192]; + type Stack = [gimli::UnwindTableRow; 4]; +} + +impl gimli::EvaluationStorage for StoreOnStack { + type Stack = [gimli::Value; 64]; + type ExpressionStack = [(R, R); 4]; + type Result = [gimli::Piece; 1]; +} + +impl AllocationPolicy for MustNotAllocateDuringUnwind { + type GimliUnwindContextStorage = StoreOnStack; + type GimliEvaluationStorage = StoreOnStack; +} + +/// Allow allocation during unwinding. This is one of the two [`AllocationPolicy`] +/// implementations. +/// +/// This is the preferred policy because it saves memory and places no limitations on +/// DWARF CFI evaluation. +pub struct MayAllocateDuringUnwind; +impl AllocationPolicy for MayAllocateDuringUnwind { + type GimliUnwindContextStorage = gimli::StoreOnHeap; + type GimliEvaluationStorage = gimli::StoreOnHeap; +} + +/// The unwinder cache. This needs to be created upfront before unwinding. During +/// unwinding, the unwinder needs exclusive access to this cache. +/// +/// A single unwinder cache can be used with multiple unwinders alternatingly. +/// +/// The cache stores unwind rules for addresses it has seen before, and it stores the +/// unwind context which gimli needs for DWARF CFI evaluation. +pub struct Cache { + pub(crate) gimli_unwind_context: + Box>>, + pub(crate) rule_cache: RuleCache, +} + +impl Cache { + pub fn new() -> Self { + Self { + gimli_unwind_context: Box::new(gimli::UnwindContext::new_in()), + rule_cache: RuleCache::new(), + } + } +} + +impl Default for Cache { + fn default() -> Self { + Self::new() + } +} diff --git a/third_party/rust/framehop/src/code_address.rs b/third_party/rust/framehop/src/code_address.rs new file mode 100644 index 000000000000..892823608d7e --- /dev/null +++ b/third_party/rust/framehop/src/code_address.rs @@ -0,0 +1,75 @@ +use core::num::NonZeroU64; + +/// An absolute code address for a stack frame. Can either be taken directly from the +/// instruction pointer ("program counter"), or from a return address. +/// +/// These addresses are "AVMAs", i.e. Actual Virtual Memory Addresses, i.e. addresses +/// in the virtual memory of the profiled process. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum FrameAddress { + /// This address is the instruction pointer / program counter. This is what unwinding + /// starts with. + InstructionPointer(u64), + + /// This is a return address, i.e. the address to which the CPU will jump to when + /// returning from a function. This is the address of the instruction *after* the + /// call instruction. + /// + /// Unwinding produces a list of return addresses. + ReturnAddress(NonZeroU64), +} + +impl FrameAddress { + /// Create a [`FrameAddress::InstructionPointer`]. + pub fn from_instruction_pointer(ip: u64) -> Self { + FrameAddress::InstructionPointer(ip) + } + + /// Create a [`FrameAddress::ReturnAddress`]. This returns `None` if the given + /// address is zero. + pub fn from_return_address(return_address: u64) -> Option { + Some(FrameAddress::ReturnAddress(NonZeroU64::new( + return_address, + )?)) + } + + /// The raw address (AVMA). + pub fn address(self) -> u64 { + match self { + FrameAddress::InstructionPointer(address) => address, + FrameAddress::ReturnAddress(address) => address.into(), + } + } + + /// The address (AVMA) that should be used for lookup. + /// + /// If this address is taken directly from the instruction pointer, then the lookup + /// address is just the raw address. + /// + /// If this address is a return address, then the lookup address is that address **minus + /// one byte**. This adjusted address will point inside the call instruction. This + /// subtraction of one byte is needed if you want to look up unwind information or + /// debug information, because you usually want the information for the call, not for + /// the next instruction after the call. + /// + /// Furthermore, this distinction matters if a function calls a noreturn function as + /// the last thing it does: If the call is the final instruction of the function, then + /// the return address will point *after* the function, into the *next* function. + /// If, during unwinding, you look up unwind information for that next function, you'd + /// get incorrect unwinding. + /// This has been observed in practice with `+[NSThread exit]`. + pub fn address_for_lookup(self) -> u64 { + match self { + FrameAddress::InstructionPointer(address) => address, + FrameAddress::ReturnAddress(address) => u64::from(address) - 1, + } + } + + /// Returns whether this address is a return address. + pub fn is_return_address(self) -> bool { + match self { + FrameAddress::InstructionPointer(_) => false, + FrameAddress::ReturnAddress(_) => true, + } + } +} diff --git a/third_party/rust/framehop/src/display_utils.rs b/third_party/rust/framehop/src/display_utils.rs new file mode 100644 index 000000000000..ccd5004ee3ad --- /dev/null +++ b/third_party/rust/framehop/src/display_utils.rs @@ -0,0 +1,17 @@ +use core::fmt::{Binary, Debug, LowerHex}; + +pub struct HexNum(pub N); + +impl Debug for HexNum { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + LowerHex::fmt(&self.0, f) + } +} + +pub struct BinNum(pub N); + +impl Debug for BinNum { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + Binary::fmt(&self.0, f) + } +} diff --git a/third_party/rust/framehop/src/dwarf.rs b/third_party/rust/framehop/src/dwarf.rs new file mode 100644 index 000000000000..6dd2778926c4 --- /dev/null +++ b/third_party/rust/framehop/src/dwarf.rs @@ -0,0 +1,478 @@ +use core::marker::PhantomData; + +use alloc::vec::Vec; +use gimli::{ + CfaRule, CieOrFde, DebugFrame, EhFrame, EhFrameHdr, Encoding, EndianSlice, Evaluation, + EvaluationResult, EvaluationStorage, Expression, LittleEndian, Location, ParsedEhFrameHdr, + Reader, ReaderOffset, Register, RegisterRule, UnwindContext, UnwindContextStorage, + UnwindOffset, UnwindSection, UnwindTableRow, Value, +}; + +pub(crate) use gimli::BaseAddresses; + +use crate::{arch::Arch, unwind_result::UnwindResult, ModuleSectionInfo}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DwarfUnwinderError { + FdeFromOffsetFailed(gimli::Error), + UnwindInfoForAddressFailed(gimli::Error), + StackPointerMovedBackwards, + DidNotAdvance, + CouldNotRecoverCfa, + CouldNotRecoverReturnAddress, + CouldNotRecoverFramePointer, +} + +impl core::fmt::Display for DwarfUnwinderError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::FdeFromOffsetFailed(err) => { + write!(f, "Could not get the FDE for the supplied offset: {err}") + } + Self::UnwindInfoForAddressFailed(err) => write!( + f, + "Could not find DWARF unwind info for the requested address: {err}" + ), + Self::StackPointerMovedBackwards => write!(f, "Stack pointer moved backwards"), + Self::DidNotAdvance => write!(f, "Did not advance"), + Self::CouldNotRecoverCfa => write!(f, "Could not recover the CFA"), + Self::CouldNotRecoverReturnAddress => write!(f, "Could not recover the return address"), + Self::CouldNotRecoverFramePointer => write!(f, "Could not recover the frame pointer"), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for DwarfUnwinderError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::FdeFromOffsetFailed(e) => Some(e), + Self::UnwindInfoForAddressFailed(e) => Some(e), + _ => None, + } + } +} + +#[derive(Clone, Debug)] +pub enum ConversionError { + CfaIsExpression, + CfaIsOffsetFromUnknownRegister, + ReturnAddressRuleWithUnexpectedOffset, + ReturnAddressRuleWasWeird, + SpOffsetDoesNotFit, + RegisterNotStoredRelativeToCfa, + RestoringFpButNotLr, + LrStorageOffsetDoesNotFit, + FpStorageOffsetDoesNotFit, + SpOffsetFromFpDoesNotFit, + FramePointerRuleDoesNotRestoreLr, + FramePointerRuleDoesNotRestoreFp, + FramePointerRuleDoesNotRestoreBp, + FramePointerRuleHasStrangeBpOffset, +} + +pub trait DwarfUnwinding: Arch { + fn unwind_frame( + section: &impl UnwindSection, + unwind_info: &UnwindTableRow, + encoding: Encoding, + regs: &mut Self::UnwindRegs, + is_first_frame: bool, + read_stack: &mut F, + ) -> Result, DwarfUnwinderError> + where + F: FnMut(u64) -> Result, + R: Reader, + UCS: UnwindContextStorage, + ES: EvaluationStorage; + + fn rule_if_uncovered_by_fde() -> Self::UnwindRule; +} + +pub enum UnwindSectionType { + EhFrame, + DebugFrame, +} + +pub struct DwarfUnwinder<'a, R, A, UCS> +where + R: Reader, + A: DwarfUnwinding, + UCS: UnwindContextStorage, +{ + unwind_section_data: R, + unwind_section_type: UnwindSectionType, + eh_frame_hdr: Option>>, + unwind_context: &'a mut UnwindContext, + base_svma: u64, + bases: BaseAddresses, + _arch: PhantomData, +} + +impl<'a, R, A, UCS> DwarfUnwinder<'a, R, A, UCS> +where + R: Reader, + A: DwarfUnwinding, + UCS: UnwindContextStorage, +{ + pub fn new( + unwind_section_data: R, + unwind_section_type: UnwindSectionType, + eh_frame_hdr_data: Option<&'a [u8]>, + unwind_context: &'a mut UnwindContext, + bases: BaseAddresses, + base_svma: u64, + ) -> Self { + let eh_frame_hdr = match eh_frame_hdr_data { + Some(eh_frame_hdr_data) => { + let hdr = EhFrameHdr::new(eh_frame_hdr_data, unwind_section_data.endian()); + match hdr.parse(&bases, 8) { + Ok(hdr) => Some(hdr), + Err(_) => None, + } + } + None => None, + }; + Self { + unwind_section_data, + unwind_section_type, + eh_frame_hdr, + unwind_context, + bases, + base_svma, + _arch: PhantomData, + } + } + + pub fn get_fde_offset_for_relative_address(&self, rel_lookup_address: u32) -> Option { + let lookup_svma = self.base_svma + rel_lookup_address as u64; + let eh_frame_hdr = self.eh_frame_hdr.as_ref()?; + let table = eh_frame_hdr.table()?; + let fde_ptr = table.lookup(lookup_svma, &self.bases).ok()?; + let fde_offset = table.pointer_to_offset(fde_ptr).ok()?; + fde_offset.0.into_u64().try_into().ok() + } + + pub fn unwind_frame_with_fde( + &mut self, + regs: &mut A::UnwindRegs, + is_first_frame: bool, + rel_lookup_address: u32, + fde_offset: u32, + read_stack: &mut F, + ) -> Result, DwarfUnwinderError> + where + F: FnMut(u64) -> Result, + ES: EvaluationStorage, + { + let lookup_svma = self.base_svma + rel_lookup_address as u64; + let unwind_section_data = self.unwind_section_data.clone(); + match self.unwind_section_type { + UnwindSectionType::EhFrame => { + let mut eh_frame = EhFrame::from(unwind_section_data); + eh_frame.set_address_size(8); + let unwind_info = self.unwind_info_for_fde(&eh_frame, lookup_svma, fde_offset); + if let Err(DwarfUnwinderError::UnwindInfoForAddressFailed(_)) = unwind_info { + return Ok(UnwindResult::ExecRule(A::rule_if_uncovered_by_fde())); + } + let (unwind_info, encoding) = unwind_info?; + A::unwind_frame::( + &eh_frame, + unwind_info, + encoding, + regs, + is_first_frame, + read_stack, + ) + } + UnwindSectionType::DebugFrame => { + let mut debug_frame = DebugFrame::from(unwind_section_data); + debug_frame.set_address_size(8); + let unwind_info = self.unwind_info_for_fde(&debug_frame, lookup_svma, fde_offset); + if let Err(DwarfUnwinderError::UnwindInfoForAddressFailed(_)) = unwind_info { + return Ok(UnwindResult::ExecRule(A::rule_if_uncovered_by_fde())); + } + let (unwind_info, encoding) = unwind_info?; + A::unwind_frame::( + &debug_frame, + unwind_info, + encoding, + regs, + is_first_frame, + read_stack, + ) + } + } + } + + fn unwind_info_for_fde>( + &mut self, + unwind_section: &US, + lookup_svma: u64, + fde_offset: u32, + ) -> Result<(&UnwindTableRow, Encoding), DwarfUnwinderError> { + let fde = unwind_section.fde_from_offset( + &self.bases, + US::Offset::from(R::Offset::from_u32(fde_offset)), + US::cie_from_offset, + ); + let fde = fde.map_err(DwarfUnwinderError::FdeFromOffsetFailed)?; + let encoding = fde.cie().encoding(); + let unwind_info: &UnwindTableRow<_, _> = fde + .unwind_info_for_address( + unwind_section, + &self.bases, + self.unwind_context, + lookup_svma, + ) + .map_err(DwarfUnwinderError::UnwindInfoForAddressFailed)?; + Ok((unwind_info, encoding)) + } +} + +pub(crate) fn base_addresses_for_sections( + section_info: &mut impl ModuleSectionInfo, +) -> BaseAddresses { + let mut start_addr = |names: &[&[u8]]| -> u64 { + names + .iter() + .find_map(|name| section_info.section_svma_range(name)) + .map(|r| r.start) + .unwrap_or_default() + }; + BaseAddresses::default() + .set_eh_frame(start_addr(&[b"__eh_frame", b".eh_frame"])) + .set_eh_frame_hdr(start_addr(&[b"__eh_frame_hdr", b".eh_frame_hdr"])) + .set_text(start_addr(&[b"__text", b".text"])) + .set_got(start_addr(&[b"__got", b".got"])) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DwarfCfiIndexError { + Gimli(gimli::Error), + CouldNotSubtractBaseAddress, + RelativeAddressTooBig, + FdeOffsetTooBig, +} + +impl core::fmt::Display for DwarfCfiIndexError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::Gimli(e) => write!(f, "EhFrame processing failed: {e}"), + Self::CouldNotSubtractBaseAddress => { + write!(f, "Could not subtract base address to create relative pc") + } + Self::RelativeAddressTooBig => write!(f, "Relative address did not fit into u32"), + Self::FdeOffsetTooBig => write!(f, "FDE offset did not fit into u32"), + } + } +} + +impl From for DwarfCfiIndexError { + fn from(e: gimli::Error) -> Self { + Self::Gimli(e) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for DwarfCfiIndexError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::Gimli(e) => Some(e), + _ => None, + } + } +} + +/// A binary search table for eh_frame FDEs. We generate this whenever a module +/// without eh_frame_hdr is added. +pub struct DwarfCfiIndex { + /// Contains the initial address for every FDE, relative to the base address. + /// This vector is sorted so that it can be used for binary search. + /// It has the same length as `fde_offsets`. + sorted_fde_pc_starts: Vec, + /// Contains the FDE offset for every FDE. The FDE at offset `fde_offsets[i]` + /// has a PC range which starts at `sorted_fde_pc_starts[i]`. + fde_offsets: Vec, +} + +impl DwarfCfiIndex { + pub fn try_new( + unwind_section: US, + bases: BaseAddresses, + base_svma: u64, + ) -> Result + where + R: Reader, + R::Offset: TryInto, + US: UnwindSection, + { + let mut fde_pc_and_offset = Vec::new(); + + let mut cur_cie = None; + let mut entries_iter = unwind_section.entries(&bases); + while let Some(entry) = entries_iter.next()? { + let fde = match entry { + CieOrFde::Cie(cie) => { + cur_cie = Some(cie); + continue; + } + CieOrFde::Fde(partial_fde) => { + partial_fde.parse(|unwind_section, bases, cie_offset| { + if let Some(cie) = &cur_cie { + if cie.offset() + == >::into(cie_offset) + { + return Ok(cie.clone()); + } + } + let cie = unwind_section.cie_from_offset(bases, cie_offset); + if let Ok(cie) = &cie { + cur_cie = Some(cie.clone()); + } + cie + })? + } + }; + let pc = fde.initial_address(); + let relative_pc = pc + .checked_sub(base_svma) + .ok_or(DwarfCfiIndexError::CouldNotSubtractBaseAddress)?; + let relative_pc = u32::try_from(relative_pc) + .map_err(|_| DwarfCfiIndexError::RelativeAddressTooBig)?; + let fde_offset = >::try_into(fde.offset()) + .map_err(|_| DwarfCfiIndexError::FdeOffsetTooBig)?; + fde_pc_and_offset.push((relative_pc, fde_offset)); + } + fde_pc_and_offset.sort_by_key(|(pc, _)| *pc); + let sorted_fde_pc_starts = fde_pc_and_offset.iter().map(|(pc, _)| *pc).collect(); + let fde_offsets = fde_pc_and_offset.into_iter().map(|(_, fde)| fde).collect(); + Ok(Self { + sorted_fde_pc_starts, + fde_offsets, + }) + } + + pub fn try_new_eh_frame( + eh_frame_data: &[u8], + section_info: &mut impl ModuleSectionInfo, + ) -> Result { + let bases = base_addresses_for_sections(section_info); + let mut eh_frame = EhFrame::from(EndianSlice::new(eh_frame_data, LittleEndian)); + eh_frame.set_address_size(8); + + Self::try_new(eh_frame, bases, section_info.base_svma()) + } + + pub fn try_new_debug_frame( + debug_frame_data: &[u8], + section_info: &mut impl ModuleSectionInfo, + ) -> Result { + let bases = base_addresses_for_sections(section_info); + let mut debug_frame = DebugFrame::from(EndianSlice::new(debug_frame_data, LittleEndian)); + debug_frame.set_address_size(8); + + Self::try_new(debug_frame, bases, section_info.base_svma()) + } + + pub fn fde_offset_for_relative_address(&self, rel_lookup_address: u32) -> Option { + let i = match self.sorted_fde_pc_starts.binary_search(&rel_lookup_address) { + Err(0) => return None, + Ok(i) => i, + Err(i) => i - 1, + }; + Some(self.fde_offsets[i]) + } +} + +pub trait DwarfUnwindRegs { + fn get(&self, register: Register) -> Option; +} + +pub fn eval_cfa_rule>( + section: &impl UnwindSection, + rule: &CfaRule, + encoding: Encoding, + regs: &UR, +) -> Option { + match rule { + CfaRule::RegisterAndOffset { register, offset } => { + let val = regs.get(*register)?; + u64::try_from(i64::try_from(val).ok()?.checked_add(*offset)?).ok() + } + CfaRule::Expression(expr) => { + let expr = expr.get(section).ok()?; + eval_expr::(expr, encoding, regs) + } + } +} + +fn eval_expr>( + expr: Expression, + encoding: Encoding, + regs: &UR, +) -> Option { + let mut eval = Evaluation::::new_in(expr.0, encoding); + let mut result = eval.evaluate().ok()?; + loop { + match result { + EvaluationResult::Complete => break, + EvaluationResult::RequiresRegister { register, .. } => { + let value = regs.get(register)?; + result = eval.resume_with_register(Value::Generic(value as _)).ok()?; + } + _ => return None, + } + } + let x = &eval.as_result().last()?.location; + if let Location::Address { address } = x { + Some(*address) + } else { + None + } +} + +pub fn eval_register_rule( + section: &impl UnwindSection, + rule: RegisterRule, + cfa: u64, + encoding: Encoding, + val: u64, + regs: &UR, + read_stack: &mut F, +) -> Option +where + R: Reader, + F: FnMut(u64) -> Result, + UR: DwarfUnwindRegs, + S: EvaluationStorage, +{ + match rule { + RegisterRule::Undefined => None, + RegisterRule::SameValue => Some(val), + RegisterRule::Offset(offset) => { + let cfa_plus_offset = + u64::try_from(i64::try_from(cfa).ok()?.checked_add(offset)?).ok()?; + read_stack(cfa_plus_offset).ok() + } + RegisterRule::ValOffset(offset) => { + u64::try_from(i64::try_from(cfa).ok()?.checked_add(offset)?).ok() + } + RegisterRule::Register(register) => regs.get(register), + RegisterRule::Expression(expr) => { + let expr = expr.get(section).ok()?; + let val = eval_expr::(expr, encoding, regs)?; + read_stack(val).ok() + } + RegisterRule::ValExpression(expr) => { + let expr = expr.get(section).ok()?; + eval_expr::(expr, encoding, regs) + } + RegisterRule::Architectural => { + // Unimplemented + // TODO: Find out what the architectural rules for x86_64 and for aarch64 are, if any. + None + } + _ => None, + } +} diff --git a/third_party/rust/framehop/src/error.rs b/third_party/rust/framehop/src/error.rs new file mode 100644 index 000000000000..df58947e680d --- /dev/null +++ b/third_party/rust/framehop/src/error.rs @@ -0,0 +1,116 @@ +use crate::dwarf::DwarfUnwinderError; +#[cfg(feature = "macho")] +use crate::macho::CompactUnwindInfoUnwinderError; +#[cfg(feature = "pe")] +use crate::pe::PeUnwinderError; + +/// The error type used in this crate. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Error { + CouldNotReadStack(u64), + FramepointerUnwindingMovedBackwards, + DidNotAdvance, + IntegerOverflow, + ReturnAddressIsNull, +} + +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::CouldNotReadStack(addr) => write!(f, "Could not read stack memory at 0x{addr:x}"), + Self::FramepointerUnwindingMovedBackwards => { + write!(f, "Frame pointer unwinding moved backwards") + } + Self::DidNotAdvance => write!( + f, + "Neither the code address nor the stack pointer changed, would loop" + ), + Self::IntegerOverflow => write!(f, "Unwinding caused integer overflow"), + Self::ReturnAddressIsNull => write!(f, "Return address is null"), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for Error {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UnwinderError { + #[cfg(feature = "macho")] + CompactUnwindInfo(CompactUnwindInfoUnwinderError), + Dwarf(DwarfUnwinderError), + #[cfg(feature = "pe")] + Pe(PeUnwinderError), + #[cfg(feature = "macho")] + NoDwarfData, + NoModuleUnwindData, + EhFrameHdrCouldNotFindAddress, + DwarfCfiIndexCouldNotFindAddress, +} + +impl core::fmt::Display for UnwinderError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + #[cfg(feature = "macho")] + Self::CompactUnwindInfo(err) => { + write!(f, "Compact Unwind Info unwinding failed: {err}") + } + Self::Dwarf(err) => write!(f, "DWARF unwinding failed: {err}"), + #[cfg(feature = "pe")] + Self::Pe(err) => write!(f, "PE unwinding failed: {err}"), + #[cfg(feature = "macho")] + Self::NoDwarfData => write!( + f, + "__unwind_info referred to DWARF FDE but we do not have __eh_frame data" + ), + Self::NoModuleUnwindData => { + write!(f, "No unwind data for the module containing the address") + } + Self::EhFrameHdrCouldNotFindAddress => write!( + f, + ".eh_frame_hdr was not successful in looking up the address in the table" + ), + Self::DwarfCfiIndexCouldNotFindAddress => write!( + f, + "Failed to look up the address in the DwarfCfiIndex search table" + ), + } + } +} + +impl From for UnwinderError { + fn from(e: DwarfUnwinderError) -> Self { + Self::Dwarf(e) + } +} + +#[cfg(feature = "pe")] +impl From for UnwinderError { + fn from(e: PeUnwinderError) -> Self { + Self::Pe(e) + } +} + +#[cfg(feature = "macho")] +impl From for UnwinderError { + fn from(e: CompactUnwindInfoUnwinderError) -> Self { + match e { + CompactUnwindInfoUnwinderError::BadDwarfUnwinding(e) => UnwinderError::Dwarf(e), + e => UnwinderError::CompactUnwindInfo(e), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for UnwinderError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + #[cfg(feature = "macho")] + Self::CompactUnwindInfo(e) => Some(e), + Self::Dwarf(e) => Some(e), + #[cfg(feature = "pe")] + Self::Pe(e) => Some(e), + _ => None, + } + } +} diff --git a/third_party/rust/framehop/src/instruction_analysis.rs b/third_party/rust/framehop/src/instruction_analysis.rs new file mode 100644 index 000000000000..47c0eeac3143 --- /dev/null +++ b/third_party/rust/framehop/src/instruction_analysis.rs @@ -0,0 +1,20 @@ +use crate::arch::Arch; + +pub trait InstructionAnalysis: Arch { + /// Caller guarantees pc_offset <= text_bytes.len() + fn rule_from_prologue_analysis(text_bytes: &[u8], pc_offset: usize) + -> Option; + + /// Caller guarantees pc_offset <= text_bytes.len() + fn rule_from_epilogue_analysis(text_bytes: &[u8], pc_offset: usize) + -> Option; + + /// Caller guarantees pc_offset <= text_bytes.len() + fn rule_from_instruction_analysis( + text_bytes: &[u8], + pc_offset: usize, + ) -> Option { + Self::rule_from_prologue_analysis(text_bytes, pc_offset) + .or_else(|| Self::rule_from_epilogue_analysis(text_bytes, pc_offset)) + } +} diff --git a/third_party/rust/framehop/src/lib.rs b/third_party/rust/framehop/src/lib.rs new file mode 100644 index 000000000000..737235c07157 --- /dev/null +++ b/third_party/rust/framehop/src/lib.rs @@ -0,0 +1,166 @@ +//! # framehop +//! +//! Framehop is a stack frame unwinder written in 100% Rust. It produces high quality stacks at high speed, on multiple platforms and architectures, without an expensive pre-processing step for unwind information. This makes it suitable for sampling profilers. +//! +//! It currently supports unwinding x86_64 and aarch64, with unwind information formats commonly used on Windows, macOS, Linux and Android. +//! +//! You give framehop register values, stack memory and unwind data, and framehop produces a list of return addresses. +//! +//! Framehop can be used in the following scenarios: +//! +//! - Live unwinding of a remote process. This is how [`samply`](https://github.com/mstange/samply/) uses it. +//! - Offline unwinding from saved registers and stack bytes, even on a different machine, a different OS, or a different CPU architecture. +//! - Live unwinding inside the same process. This is currently unproven, but should work as long as you can do heap allocation before sampling, in order to allocate a cache and to update the list of modules. The actual unwinding does not require any heap allocation and should work even inside a signal handler, as long as you use `MustNotAllocateDuringUnwind`. +//! +//! As a user of framehop, your responsibilities are the following: +//! +//! - You need to enumerate the modules (libraries) that are loaded in the sampled process ahead of time, or ideally maintain a live list which is updated whenever modules are loaded / unloaded. +//! - You need to provide address ranges and unwind section data for those modules. +//! - When sampling, you provide the register values and a callback to read arbitrary stack memory without segfaulting. +//! - On aarch64, picking the right bitmask to strip pointer authentication bits from return addresses is up to you. +//! - You will need to do symbol resolution yourself, if you want function names. Framehop only produces addresses, it does not do any symbolication. +//! +//! In turn, framehop solves the following problems: +//! +//! - It parses a number of different unwind information formats. At the moment, it supports the following: +//! - Apple's Compact Unwinding Format, in `__unwind_info` (macOS) +//! - DWARF CFI in `.eh_frame` (using `.eh_frame_hdr` as an index, if available) +//! - DWARF CFI in `.debug_frame` +//! - PE unwind info in `.pdata`, `.rdata` and `.xdata` (for Windows x86_64) +//! - It supports correct unwinding even when the program is interrupted inside a function prologue or epilogue. On macOS, it has to analyze assembly instructions in order to do this. +//! - On x86_64 and aarch64, it falls back to frame pointer unwinding if it cannot find unwind information for an address. +//! - It caches the unwind rule for each address in a fixed-size cache, so that repeated unwinding from the same address is even faster. +//! - It generates binary search indexes for unwind information formats which don't have them. Specifically, for `.debug_frame` and for `.eh_frame` without `.eh_frame_hdr`. +//! - It does a reasonable job of detecting the end of the stack, so that you can differentiate between properly terminated stacks and prematurely truncated stacks. +//! +//! Framehop is not suitable for debuggers or to implement exception handling. Debuggers usually need to recover all register values for every frame whereas framehop only cares about return addresses. And exception handling needs the ability to call destructors, which is also a non-goal for framehop. +//! +//! ## Speed +//! +//! Framehop achieves high speed in the following ways: +//! +//! 1. It only recovers registers which are needed for computing return addresses. On x86_64 that's `rip`, `rsp` and `rbp`, and on aarch64 that's `lr`, `sp` and `fp`. All other registers are not needed - in theory they could be used as inputs to DWARF CFI expressions, but in practice they are not. +//! 2. It uses zero-copy parsing wherever possible. For example, the bytes in `__unwind_info` are only accessed during unwinding, and the binary search happens right inside the original `__unwind_info` memory. For DWARF unwinding, framehop uses the excellent [`gimli` crate](https://github.com/gimli-rs/gimli/), which was written with performance in mind. +//! 3. It uses binary search to find the correct unwind rule in all supported unwind information formats. For formats without an built-in index, it creates an index when the module is added. +//! 4. It caches unwind rules based on address. In practice, the 509-slot cache achieves a hit rate of around 80% on complicated code like Firefox (with the cache being shared across all Firefox processes). When profiling simpler applications, the hit rate is likely much higher. +//! +//! Furthermore, adding a module is fast too because framehop only does minimal up-front parsing and processing - really, the only thing it does is to create the index of FDE offsets for `.eh_frame` / `.debug_frame`. +//! +//! ## Example +//! +//! ``` +//! use core::ops::Range; +//! use framehop::aarch64::{CacheAarch64, UnwindRegsAarch64, UnwinderAarch64}; +//! use framehop::{ExplicitModuleSectionInfo, FrameAddress, Module}; +//! +//! let mut cache = CacheAarch64::<_>::new(); +//! let mut unwinder = UnwinderAarch64::new(); +//! +//! let module = Module::new( +//! "mybinary".to_string(), +//! 0x1003fc000..0x100634000, +//! 0x1003fc000, +//! ExplicitModuleSectionInfo { +//! base_svma: 0x100000000, +//! text_svma: Some(0x100000b64..0x1001d2d18), +//! text: Some(vec![/* __text */]), +//! stubs_svma: Some(0x1001d2d18..0x1001d309c), +//! stub_helper_svma: Some(0x1001d309c..0x1001d3438), +//! got_svma: Some(0x100238000..0x100238010), +//! unwind_info: Some(vec![/* __unwind_info */]), +//! eh_frame_svma: Some(0x100237f80..0x100237ffc), +//! eh_frame: Some(vec![/* __eh_frame */]), +//! text_segment_svma: Some(0x1003fc000..0x100634000), +//! text_segment: Some(vec![/* __TEXT */]), +//! ..Default::default() +//! }, +//! ); +//! unwinder.add_module(module); +//! +//! let pc = 0x1003fc000 + 0x1292c0; +//! let lr = 0x1003fc000 + 0xe4830; +//! let sp = 0x10; +//! let fp = 0x20; +//! let stack = [ +//! 1, 2, 3, 4, 0x40, 0x1003fc000 + 0x100dc4, +//! 5, 6, 0x70, 0x1003fc000 + 0x12ca28, +//! 7, 8, 9, 10, 0x0, 0x0, +//! ]; +//! let mut read_stack = |addr| stack.get((addr / 8) as usize).cloned().ok_or(()); +//! +//! use framehop::Unwinder; +//! let mut iter = unwinder.iter_frames( +//! pc, +//! UnwindRegsAarch64::new(lr, sp, fp), +//! &mut cache, +//! &mut read_stack, +//! ); +//! +//! let mut frames = Vec::new(); +//! while let Ok(Some(frame)) = iter.next() { +//! frames.push(frame); +//! } +//! +//! assert_eq!( +//! frames, +//! vec![ +//! FrameAddress::from_instruction_pointer(0x1003fc000 + 0x1292c0), +//! FrameAddress::from_return_address(0x1003fc000 + 0x100dc4).unwrap(), +//! FrameAddress::from_return_address(0x1003fc000 + 0x12ca28).unwrap() +//! ] +//! ); +//! ``` + +#![cfg_attr(not(feature = "std"), no_std)] + +extern crate alloc; + +mod add_signed; +mod arch; +mod cache; +mod code_address; +mod display_utils; +mod dwarf; +mod error; +mod instruction_analysis; +#[cfg(feature = "macho")] +mod macho; +#[cfg(feature = "pe")] +mod pe; +mod rule_cache; +mod unwind_result; +mod unwind_rule; +mod unwinder; + +/// Types for unwinding on the aarch64 CPU architecture. +pub mod aarch64; +/// Types for unwinding on the x86_64 CPU architecture. +pub mod x86_64; + +pub use cache::{AllocationPolicy, MayAllocateDuringUnwind, MustNotAllocateDuringUnwind}; +pub use code_address::FrameAddress; +pub use error::Error; +pub use rule_cache::CacheStats; +pub use unwinder::{ + ExplicitModuleSectionInfo, Module, ModuleSectionInfo, UnwindIterator, Unwinder, +}; + +/// The unwinder cache for the native CPU architecture. +#[cfg(target_arch = "aarch64")] +pub type CacheNative

= aarch64::CacheAarch64

; +/// The unwind registers type for the native CPU architecture. +#[cfg(target_arch = "aarch64")] +pub type UnwindRegsNative = aarch64::UnwindRegsAarch64; +/// The unwinder type for the native CPU architecture. +#[cfg(target_arch = "aarch64")] +pub type UnwinderNative = aarch64::UnwinderAarch64; + +/// The unwinder cache for the native CPU architecture. +#[cfg(target_arch = "x86_64")] +pub type CacheNative

= x86_64::CacheX86_64

; +/// The unwind registers type for the native CPU architecture. +#[cfg(target_arch = "x86_64")] +pub type UnwindRegsNative = x86_64::UnwindRegsX86_64; +/// The unwinder type for the native CPU architecture. +#[cfg(target_arch = "x86_64")] +pub type UnwinderNative = x86_64::UnwinderX86_64; diff --git a/third_party/rust/framehop/src/macho.rs b/third_party/rust/framehop/src/macho.rs new file mode 100644 index 000000000000..ef60c6e193c0 --- /dev/null +++ b/third_party/rust/framehop/src/macho.rs @@ -0,0 +1,206 @@ +use core::marker::PhantomData; + +use crate::dwarf::DwarfUnwinderError; +use crate::{arch::Arch, unwind_rule::UnwindRule}; +use macho_unwind_info::UnwindInfo; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CompactUnwindInfoUnwinderError { + BadFormat(macho_unwind_info::Error), + AddressOutsideRange(u32), + CallerCannotBeFrameless, + FunctionHasNoInfo, + BpOffsetDoesNotFit, + BadOpcodeKind(u8), + BadDwarfUnwinding(DwarfUnwinderError), + NoTextBytesToLookUpIndirectStackOffset, + IndirectStackOffsetOutOfBounds, + StackAdjustOverflow, + StackSizeDoesNotFit, + StubFunctionCannotBeCaller, + InvalidFrameless, +} + +impl core::fmt::Display for CompactUnwindInfoUnwinderError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::BadFormat(err) => write!(f, "Bad __unwind_info format: {err}"), + Self::AddressOutsideRange(addr) => write!(f, "Address 0x{addr:x} outside of the range covered by __unwind_info"), + Self::CallerCannotBeFrameless => write!(f, "Encountered a non-leaf function which was marked as frameless."), + Self::FunctionHasNoInfo => write!(f, "No unwind info (null opcode) for this function in __unwind_info"), + Self::BpOffsetDoesNotFit => write!(f, "rbp offset from the stack pointer divided by 8 does not fit into i16"), + Self::BadOpcodeKind(kind) => write!(f, "Unrecognized __unwind_info opcode kind {kind}"), + Self::BadDwarfUnwinding(err) => write!(f, "DWARF unwinding failed: {err}"), + Self::NoTextBytesToLookUpIndirectStackOffset => write!(f, "Don't have the function bytes to look up the offset for frameless function with indirect stack offset"), + Self::IndirectStackOffsetOutOfBounds => write!(f, "Stack offset not found inside the bounds of the text bytes"), + Self::StackAdjustOverflow => write!(f, "Stack adjust addition overflowed"), + Self::StackSizeDoesNotFit => write!(f, "Stack size does not fit into the rule representation"), + Self::StubFunctionCannotBeCaller => write!(f, "A caller had its address in the __stubs section"), + Self::InvalidFrameless => write!(f, "Encountered invalid unwind entry"), + } + } +} + +impl From for CompactUnwindInfoUnwinderError { + fn from(e: macho_unwind_info::Error) -> Self { + Self::BadFormat(e) + } +} + +impl From for CompactUnwindInfoUnwinderError { + fn from(e: DwarfUnwinderError) -> Self { + Self::BadDwarfUnwinding(e) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for CompactUnwindInfoUnwinderError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::BadFormat(e) => Some(e), + Self::BadDwarfUnwinding(e) => Some(e), + _ => None, + } + } +} + +#[derive(Clone, Debug)] +pub enum CuiUnwindResult { + ExecRule(R), + NeedDwarf(u32), +} + +pub trait CompactUnwindInfoUnwinding: Arch { + fn unwind_frame( + function: macho_unwind_info::Function, + is_first_frame: bool, + address_offset_within_function: usize, + function_bytes: Option<&[u8]>, + ) -> Result, CompactUnwindInfoUnwinderError>; + + fn rule_for_stub_helper( + offset: u32, + ) -> Result, CompactUnwindInfoUnwinderError>; +} + +#[derive(Clone, Copy)] +pub struct TextBytes<'a> { + offset_from_base_address: u32, + bytes: &'a [u8], +} + +impl<'a> TextBytes<'a> { + pub fn new(offset_from_base_address: u32, bytes: &'a [u8]) -> Self { + Self { + offset_from_base_address, + bytes, + } + } +} + +pub struct CompactUnwindInfoUnwinder<'a, A: CompactUnwindInfoUnwinding> { + unwind_info_data: &'a [u8], + text_bytes: Option>, + stubs_range: (u32, u32), + stub_helper_range: (u32, u32), + _arch: PhantomData, +} + +impl<'a, A: CompactUnwindInfoUnwinding> CompactUnwindInfoUnwinder<'a, A> { + pub fn new( + unwind_info_data: &'a [u8], + text_bytes: Option>, + stubs_range: (u32, u32), + stub_helper_range: (u32, u32), + ) -> Self { + Self { + unwind_info_data, + text_bytes, + stubs_range, + stub_helper_range, + _arch: PhantomData, + } + } + + pub fn function_for_address( + &self, + address: u32, + ) -> Result { + let unwind_info = UnwindInfo::parse(self.unwind_info_data) + .map_err(CompactUnwindInfoUnwinderError::BadFormat)?; + let function = unwind_info + .lookup(address) + .map_err(CompactUnwindInfoUnwinderError::BadFormat)?; + function.ok_or(CompactUnwindInfoUnwinderError::AddressOutsideRange(address)) + } + + pub fn unwind_frame( + &mut self, + rel_lookup_address: u32, + is_first_frame: bool, + ) -> Result, CompactUnwindInfoUnwinderError> { + // Exclude __stubs and __stub_helper sections. The __unwind_info does not describe those + // sections. These sections need to be manually excluded because the addresses in + // __unwind_info can be both before and after the stubs/stub_helper sections, if there is + // both a __text and a text_env section. + if self.stubs_range.0 <= rel_lookup_address && rel_lookup_address < self.stubs_range.1 { + if !is_first_frame { + return Err(CompactUnwindInfoUnwinderError::StubFunctionCannotBeCaller); + } + // All stub functions are frameless. + return Ok(CuiUnwindResult::ExecRule( + A::UnwindRule::rule_for_stub_functions(), + )); + } + if self.stub_helper_range.0 <= rel_lookup_address + && rel_lookup_address < self.stub_helper_range.1 + { + if !is_first_frame { + return Err(CompactUnwindInfoUnwinderError::StubFunctionCannotBeCaller); + } + let lookup_address_relative_to_section = rel_lookup_address - self.stub_helper_range.0; + return ::rule_for_stub_helper( + lookup_address_relative_to_section, + ); + } + let function = match self.function_for_address(rel_lookup_address) { + Ok(f) => f, + Err(CompactUnwindInfoUnwinderError::AddressOutsideRange(_)) if is_first_frame => { + // pc is falling into this module's address range, but it's not covered by __unwind_info. + // This could mean that we're inside a stub function, in the __stubs section. + // All stub functions are frameless. + // TODO: Obtain the actual __stubs address range and do better checking here. + return Ok(CuiUnwindResult::ExecRule( + A::UnwindRule::rule_for_stub_functions(), + )); + } + Err(err) => return Err(err), + }; + if is_first_frame && rel_lookup_address == function.start_address { + return Ok(CuiUnwindResult::ExecRule( + A::UnwindRule::rule_for_function_start(), + )); + } + let address_offset_within_function = + usize::try_from(rel_lookup_address - function.start_address).unwrap(); + let function_bytes = self.text_bytes.and_then(|text_bytes| { + let TextBytes { + offset_from_base_address, + bytes, + } = text_bytes; + let function_start_relative_to_text = function + .start_address + .checked_sub(offset_from_base_address)? + as usize; + let function_end_relative_to_text = + function.end_address.checked_sub(offset_from_base_address)? as usize; + bytes.get(function_start_relative_to_text..function_end_relative_to_text) + }); + ::unwind_frame( + function, + is_first_frame, + address_offset_within_function, + function_bytes, + ) + } +} diff --git a/third_party/rust/framehop/src/pe.rs b/third_party/rust/framehop/src/pe.rs new file mode 100644 index 000000000000..bba6cf19c582 --- /dev/null +++ b/third_party/rust/framehop/src/pe.rs @@ -0,0 +1,100 @@ +use crate::{arch::Arch, unwind_result::UnwindResult}; +use core::ops::Range; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum PeUnwinderError { + MissingUnwindInfoData(u32), + MissingInstructionData(u32), + MissingStackData(Option), + UnwindInfoParseError, + Aarch64Unsupported, +} + +impl core::fmt::Display for PeUnwinderError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::MissingUnwindInfoData(rva) => { + write!(f, "failed to read unwind info memory at RVA {rva:x}") + } + Self::MissingInstructionData(rva) => { + write!(f, "failed to read instruction memory at RVA {rva:x}") + } + Self::MissingStackData(addr) => { + write!(f, "failed to read stack")?; + if let Some(addr) = addr { + write!(f, " at address {addr:x}")?; + } + Ok(()) + } + Self::UnwindInfoParseError => write!(f, "failed to parse UnwindInfo"), + Self::Aarch64Unsupported => write!(f, "AArch64 is not yet supported"), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for PeUnwinderError {} + +/// Data and the related RVA range within the binary. +/// +/// This is only used by PE unwinding. +/// +/// Type arguments: +/// - `D`: The type for unwind section data. This allows carrying owned data on the +/// module, e.g. `Vec`. But it could also be a wrapper around mapped memory from +/// a file or a different process, for example. It just needs to provide a slice of +/// bytes via its `Deref` implementation. +pub struct DataAtRvaRange { + pub data: D, + pub rva_range: Range, +} + +pub struct PeSections<'a, D> { + pub pdata: &'a D, + pub rdata: Option<&'a DataAtRvaRange>, + pub xdata: Option<&'a DataAtRvaRange>, + pub text: Option<&'a DataAtRvaRange>, +} + +impl<'a, D> PeSections<'a, D> +where + D: core::ops::Deref, +{ + pub fn unwind_info_memory_at_rva(&self, rva: u32) -> Result<&'a [u8], PeUnwinderError> { + [&self.rdata, &self.xdata] + .into_iter() + .find_map(|o| o.and_then(|m| memory_at_rva(m, rva))) + .ok_or(PeUnwinderError::MissingUnwindInfoData(rva)) + } + + pub fn text_memory_at_rva(&self, rva: u32) -> Result<&'a [u8], PeUnwinderError> { + self.text + .and_then(|m| memory_at_rva(m, rva)) + .ok_or(PeUnwinderError::MissingInstructionData(rva)) + } +} + +fn memory_at_rva>( + DataAtRvaRange { data, rva_range }: &DataAtRvaRange, + address: u32, +) -> Option<&[u8]> { + if rva_range.contains(&address) { + let offset = address - rva_range.start; + Some(&data[(offset as usize)..]) + } else { + None + } +} + +pub trait PeUnwinding: Arch { + fn unwind_frame( + sections: PeSections, + address: u32, + regs: &mut Self::UnwindRegs, + is_first_frame: bool, + read_stack: &mut F, + ) -> Result, PeUnwinderError> + where + F: FnMut(u64) -> Result, + D: core::ops::Deref; +} diff --git a/third_party/rust/framehop/src/rule_cache.rs b/third_party/rust/framehop/src/rule_cache.rs new file mode 100644 index 000000000000..4aeb507c0cca --- /dev/null +++ b/third_party/rust/framehop/src/rule_cache.rs @@ -0,0 +1,146 @@ +use alloc::boxed::Box; + +use crate::unwind_rule::UnwindRule; + +const CACHE_ENTRY_COUNT: usize = 509; + +pub struct RuleCache { + entries: Box<[Option>; CACHE_ENTRY_COUNT]>, + stats: CacheStats, +} + +impl RuleCache { + pub fn new() -> Self { + Self { + entries: Box::new([None; CACHE_ENTRY_COUNT]), + stats: CacheStats::new(), + } + } + + pub fn lookup(&mut self, address: u64, modules_generation: u16) -> CacheResult { + let slot = (address % (CACHE_ENTRY_COUNT as u64)) as u16; + match &self.entries[slot as usize] { + None => { + self.stats.miss_empty_slot_count += 1; + } + Some(entry) => { + if entry.modules_generation == modules_generation { + if entry.address == address { + self.stats.hit_count += 1; + return CacheResult::Hit(entry.unwind_rule); + } else { + self.stats.miss_wrong_address_count += 1; + } + } else { + self.stats.miss_wrong_modules_count += 1; + } + } + } + CacheResult::Miss(CacheHandle { + slot, + address, + modules_generation, + }) + } + + pub fn insert(&mut self, handle: CacheHandle, unwind_rule: R) { + let CacheHandle { + slot, + address, + modules_generation, + } = handle; + self.entries[slot as usize] = Some(CacheEntry { + address, + modules_generation, + unwind_rule, + }); + } + + /// Returns a snapshot of the cache usage statistics. + pub fn stats(&self) -> CacheStats { + self.stats + } +} + +pub enum CacheResult { + Miss(CacheHandle), + Hit(R), +} + +pub struct CacheHandle { + slot: u16, + address: u64, + modules_generation: u16, +} + +const _: () = assert!( + CACHE_ENTRY_COUNT as u64 <= u16::MAX as u64, + "u16 should be sufficient to store the cache slot index" +); + +#[derive(Clone, Copy, Debug)] +struct CacheEntry { + address: u64, + modules_generation: u16, + unwind_rule: R, +} + +/// Statistics about the effectiveness of the rule cache. +#[derive(Default, Debug, Clone, Copy)] +pub struct CacheStats { + /// The number of successful cache hits. + pub hit_count: u64, + /// The number of cache misses that were due to an empty slot. + pub miss_empty_slot_count: u64, + /// The number of cache misses that were due to a filled slot whose module + /// generation didn't match the unwinder's current module generation. + /// (This means that either the unwinder's modules have changed since the + /// rule in this slot was stored, or the same cache is used with multiple + /// unwinders and the unwinders are stomping on each other's cache slots.) + pub miss_wrong_modules_count: u64, + /// The number of cache misses that were due to cache slot collisions of + /// different addresses. + pub miss_wrong_address_count: u64, +} + +impl CacheStats { + /// Create a new instance. + pub fn new() -> Self { + Default::default() + } + + /// The number of total lookups. + pub fn total(&self) -> u64 { + self.hits() + self.misses() + } + + /// The number of total hits. + pub fn hits(&self) -> u64 { + self.hit_count + } + + /// The number of total misses. + pub fn misses(&self) -> u64 { + self.miss_empty_slot_count + self.miss_wrong_modules_count + self.miss_wrong_address_count + } +} + +#[cfg(test)] +mod tests { + use crate::{aarch64::UnwindRuleAarch64, x86_64::UnwindRuleX86_64}; + + use super::*; + + // Ensure that the size of Option> doesn't change by accident. + #[test] + fn test_cache_entry_size() { + assert_eq!( + core::mem::size_of::>>(), + 16 + ); + assert_eq!( + core::mem::size_of::>>(), + 24 // <-- larger than we'd like + ); + } +} diff --git a/third_party/rust/framehop/src/unwind_result.rs b/third_party/rust/framehop/src/unwind_result.rs new file mode 100644 index 000000000000..57f6aae70933 --- /dev/null +++ b/third_party/rust/framehop/src/unwind_result.rs @@ -0,0 +1,5 @@ +#[derive(Debug, Clone)] +pub enum UnwindResult { + ExecRule(R), + Uncacheable(u64), +} diff --git a/third_party/rust/framehop/src/unwind_rule.rs b/third_party/rust/framehop/src/unwind_rule.rs new file mode 100644 index 000000000000..f2c2e159f8a3 --- /dev/null +++ b/third_party/rust/framehop/src/unwind_rule.rs @@ -0,0 +1,18 @@ +use crate::error::Error; + +pub trait UnwindRule: Copy + core::fmt::Debug { + type UnwindRegs; + + fn exec( + self, + is_first_frame: bool, + regs: &mut Self::UnwindRegs, + read_stack: &mut F, + ) -> Result, Error> + where + F: FnMut(u64) -> Result; + + fn rule_for_stub_functions() -> Self; + fn rule_for_function_start() -> Self; + fn fallback_rule() -> Self; +} diff --git a/third_party/rust/framehop/src/unwinder.rs b/third_party/rust/framehop/src/unwinder.rs new file mode 100644 index 000000000000..49c7cda1cb12 --- /dev/null +++ b/third_party/rust/framehop/src/unwinder.rs @@ -0,0 +1,1013 @@ +use alloc::string::String; +use alloc::sync::Arc; +use alloc::vec::Vec; +use fallible_iterator::FallibleIterator; +use gimli::{EndianSlice, LittleEndian}; + +use crate::arch::Arch; +use crate::cache::{AllocationPolicy, Cache}; +use crate::dwarf::{DwarfCfiIndex, DwarfUnwinder, DwarfUnwinding, UnwindSectionType}; +use crate::error::{Error, UnwinderError}; +use crate::instruction_analysis::InstructionAnalysis; + +#[cfg(feature = "macho")] +use crate::macho::{ + CompactUnwindInfoUnwinder, CompactUnwindInfoUnwinding, CuiUnwindResult, TextBytes, +}; +#[cfg(feature = "pe")] +use crate::pe::{DataAtRvaRange, PeUnwinding}; +use crate::rule_cache::CacheResult; +use crate::unwind_result::UnwindResult; +use crate::unwind_rule::UnwindRule; +use crate::FrameAddress; + +use core::marker::PhantomData; +use core::ops::{Deref, Range}; +use core::sync::atomic::{AtomicU16, Ordering}; + +/// Unwinder is the trait that each CPU architecture's concrete unwinder type implements. +/// This trait's methods are what let you do the actual unwinding. +pub trait Unwinder: Clone { + /// The unwind registers type for the targeted CPU architecture. + type UnwindRegs; + + /// The unwind cache for the targeted CPU architecture. + /// This is an associated type because the cache stores unwind rules, whose concrete + /// type depends on the CPU arch, and because the cache can support different allocation + /// policies. + type Cache; + + /// The module type. This is an associated type because the concrete type varies + /// depending on the type you use to give the module access to the unwind section data. + type Module; + + /// Add a module that's loaded in the profiled process. This is how you provide unwind + /// information and address ranges. + /// + /// This should be called whenever a new module is loaded into the process. + fn add_module(&mut self, module: Self::Module); + + /// Remove a module that was added before using `add_module`, keyed by the start + /// address of that module's address range. If no match is found, the call is ignored. + /// This should be called whenever a module is unloaded from the process. + fn remove_module(&mut self, module_avma_range_start: u64); + + /// Returns the highest code address that is known in this process based on the module + /// address ranges. Returns 0 if no modules have been added. + /// + /// This method can be used together with + /// [`PtrAuthMask::from_max_known_address`](crate::aarch64::PtrAuthMask::from_max_known_address) + /// to make an educated guess at a pointer authentication mask for Aarch64 return addresses. + fn max_known_code_address(&self) -> u64; + + /// Unwind a single frame, to recover return address and caller register values. + /// This is the main entry point for unwinding. + fn unwind_frame( + &self, + address: FrameAddress, + regs: &mut Self::UnwindRegs, + cache: &mut Self::Cache, + read_stack: &mut F, + ) -> Result, Error> + where + F: FnMut(u64) -> Result; + + /// Return an iterator that unwinds frame by frame until the end of the stack is found. + fn iter_frames<'u, 'c, 'r, F>( + &'u self, + pc: u64, + regs: Self::UnwindRegs, + cache: &'c mut Self::Cache, + read_stack: &'r mut F, + ) -> UnwindIterator<'u, 'c, 'r, Self, F> + where + F: FnMut(u64) -> Result, + { + UnwindIterator::new(self, pc, regs, cache, read_stack) + } +} + +/// An iterator for unwinding the entire stack, starting from the initial register values. +/// +/// The first yielded frame is the instruction pointer. Subsequent addresses are return +/// addresses. +/// +/// This iterator attempts to detect if stack unwinding completed successfully, or if the +/// stack was truncated prematurely. If it thinks that it successfully found the root +/// function, it will complete with `Ok(None)`, otherwise it will complete with `Err(...)`. +/// However, the detection does not work in all cases, so you should expect `Err(...)` to +/// be returned even during normal operation. As a result, it is not recommended to use +/// this iterator as a `FallibleIterator`, because you might lose the entire stack if the +/// last iteration returns `Err(...)`. +/// +/// Lifetimes: +/// +/// - `'u`: The lifetime of the [`Unwinder`]. +/// - `'c`: The lifetime of the unwinder cache. +/// - `'r`: The lifetime of the exclusive access to the `read_stack` callback. +pub struct UnwindIterator<'u, 'c, 'r, U: Unwinder + ?Sized, F: FnMut(u64) -> Result> { + unwinder: &'u U, + state: UnwindIteratorState, + regs: U::UnwindRegs, + cache: &'c mut U::Cache, + read_stack: &'r mut F, +} + +enum UnwindIteratorState { + Initial(u64), + Unwinding(FrameAddress), + Done, +} + +impl<'u, 'c, 'r, U: Unwinder + ?Sized, F: FnMut(u64) -> Result> + UnwindIterator<'u, 'c, 'r, U, F> +{ + /// Create a new iterator. You'd usually use [`Unwinder::iter_frames`] instead. + pub fn new( + unwinder: &'u U, + pc: u64, + regs: U::UnwindRegs, + cache: &'c mut U::Cache, + read_stack: &'r mut F, + ) -> Self { + Self { + unwinder, + state: UnwindIteratorState::Initial(pc), + regs, + cache, + read_stack, + } + } +} + +impl<'u, 'c, 'r, U: Unwinder + ?Sized, F: FnMut(u64) -> Result> + UnwindIterator<'u, 'c, 'r, U, F> +{ + /// Yield the next frame in the stack. + /// + /// The first frame is `Ok(Some(FrameAddress::InstructionPointer(...)))`. + /// Subsequent frames are `Ok(Some(FrameAddress::ReturnAddress(...)))`. + /// + /// If a root function has been reached, this iterator completes with `Ok(None)`. + /// Otherwise it completes with `Err(...)`, usually indicating that a certain stack + /// address could not be read. + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> Result, Error> { + let next = match self.state { + UnwindIteratorState::Initial(pc) => { + self.state = UnwindIteratorState::Unwinding(FrameAddress::InstructionPointer(pc)); + return Ok(Some(FrameAddress::InstructionPointer(pc))); + } + UnwindIteratorState::Unwinding(address) => { + self.unwinder + .unwind_frame(address, &mut self.regs, self.cache, self.read_stack)? + } + UnwindIteratorState::Done => return Ok(None), + }; + match next { + Some(return_address) => { + let return_address = FrameAddress::from_return_address(return_address) + .ok_or(Error::ReturnAddressIsNull)?; + self.state = UnwindIteratorState::Unwinding(return_address); + Ok(Some(return_address)) + } + None => { + self.state = UnwindIteratorState::Done; + Ok(None) + } + } + } +} + +impl<'u, 'c, 'r, U: Unwinder + ?Sized, F: FnMut(u64) -> Result> FallibleIterator + for UnwindIterator<'u, 'c, 'r, U, F> +{ + type Item = FrameAddress; + type Error = Error; + + fn next(&mut self) -> Result, Error> { + self.next() + } +} + +/// This global generation counter makes it so that the cache can be shared +/// between multiple unwinders. +/// This is a u16, so if you make it wrap around by adding / removing modules +/// more than 65535 times, then you risk collisions in the cache; meaning: +/// unwinding might not work properly if an old unwind rule was found in the +/// cache for the same address and the same (pre-wraparound) modules_generation. +static GLOBAL_MODULES_GENERATION: AtomicU16 = AtomicU16::new(0); + +fn next_global_modules_generation() -> u16 { + GLOBAL_MODULES_GENERATION.fetch_add(1, Ordering::Relaxed) +} + +cfg_if::cfg_if! { + if #[cfg(all(feature = "macho", feature = "pe"))] { + pub trait Unwinding: + Arch + DwarfUnwinding + InstructionAnalysis + CompactUnwindInfoUnwinding + PeUnwinding {} + impl + Unwinding for T {} + } else if #[cfg(feature = "macho")] { + pub trait Unwinding: + Arch + DwarfUnwinding + InstructionAnalysis + CompactUnwindInfoUnwinding {} + impl Unwinding for T {} + } else if #[cfg(feature = "pe")] { + pub trait Unwinding: + Arch + DwarfUnwinding + InstructionAnalysis + PeUnwinding {} + impl Unwinding for T {} + } else { + pub trait Unwinding: Arch + DwarfUnwinding + InstructionAnalysis {} + impl Unwinding for T {} + } +} + +pub struct UnwinderInternal { + /// sorted by avma_range.start + modules: Vec>, + /// Incremented every time modules is changed. + modules_generation: u16, + _arch: PhantomData, + _allocation_policy: PhantomData

, +} + +impl Default for UnwinderInternal { + fn default() -> Self { + Self::new() + } +} + +impl Clone for UnwinderInternal { + fn clone(&self) -> Self { + Self { + modules: self.modules.clone(), + modules_generation: self.modules_generation, + _arch: PhantomData, + _allocation_policy: PhantomData, + } + } +} + +impl UnwinderInternal { + pub fn new() -> Self { + Self { + modules: Vec::new(), + modules_generation: next_global_modules_generation(), + _arch: PhantomData, + _allocation_policy: PhantomData, + } + } +} + +impl, A: Unwinding, P: AllocationPolicy> UnwinderInternal { + pub fn add_module(&mut self, module: Module) { + let insertion_index = match self + .modules + .binary_search_by_key(&module.avma_range.start, |module| module.avma_range.start) + { + Ok(i) => { + #[cfg(feature = "std")] + eprintln!( + "Now we have two modules at the same start address 0x{:x}. This can't be good.", + module.avma_range.start + ); + i + } + Err(i) => i, + }; + self.modules.insert(insertion_index, module); + self.modules_generation = next_global_modules_generation(); + } + + pub fn remove_module(&mut self, module_address_range_start: u64) { + if let Ok(index) = self + .modules + .binary_search_by_key(&module_address_range_start, |module| { + module.avma_range.start + }) + { + self.modules.remove(index); + self.modules_generation = next_global_modules_generation(); + }; + } + + pub fn max_known_code_address(&self) -> u64 { + self.modules.last().map_or(0, |m| m.avma_range.end) + } + + fn find_module_for_address(&self, address: u64) -> Option<(usize, u32)> { + let (module_index, module) = match self + .modules + .binary_search_by_key(&address, |m| m.avma_range.start) + { + Ok(i) => (i, &self.modules[i]), + Err(insertion_index) => { + if insertion_index == 0 { + // address is before first known module + return None; + } + let i = insertion_index - 1; + let module = &self.modules[i]; + if module.avma_range.end <= address { + // address is after this module + return None; + } + (i, module) + } + }; + if address < module.base_avma { + // Invalid base address + return None; + } + let relative_address = u32::try_from(address - module.base_avma).ok()?; + Some((module_index, relative_address)) + } + + fn with_cache( + &self, + address: FrameAddress, + regs: &mut A::UnwindRegs, + cache: &mut Cache, + read_stack: &mut F, + callback: G, + ) -> Result, Error> + where + F: FnMut(u64) -> Result, + G: FnOnce( + &Module, + FrameAddress, + u32, + &mut A::UnwindRegs, + &mut Cache, + &mut F, + ) -> Result, UnwinderError>, + { + let lookup_address = address.address_for_lookup(); + let is_first_frame = !address.is_return_address(); + let cache_handle = match cache + .rule_cache + .lookup(lookup_address, self.modules_generation) + { + CacheResult::Hit(unwind_rule) => { + return unwind_rule.exec(is_first_frame, regs, read_stack); + } + CacheResult::Miss(handle) => handle, + }; + + let unwind_rule = match self.find_module_for_address(lookup_address) { + None => A::UnwindRule::fallback_rule(), + Some((module_index, relative_lookup_address)) => { + let module = &self.modules[module_index]; + match callback( + module, + address, + relative_lookup_address, + regs, + cache, + read_stack, + ) { + Ok(UnwindResult::ExecRule(rule)) => rule, + Ok(UnwindResult::Uncacheable(return_address)) => { + return Ok(Some(return_address)) + } + Err(_err) => { + // eprintln!("Unwinder error: {}", err); + A::UnwindRule::fallback_rule() + } + } + } + }; + cache.rule_cache.insert(cache_handle, unwind_rule); + unwind_rule.exec(is_first_frame, regs, read_stack) + } + + pub fn unwind_frame( + &self, + address: FrameAddress, + regs: &mut A::UnwindRegs, + cache: &mut Cache, + read_stack: &mut F, + ) -> Result, Error> + where + F: FnMut(u64) -> Result, + { + self.with_cache(address, regs, cache, read_stack, Self::unwind_frame_impl) + } + + fn unwind_frame_impl( + module: &Module, + address: FrameAddress, + rel_lookup_address: u32, + regs: &mut A::UnwindRegs, + cache: &mut Cache, + read_stack: &mut F, + ) -> Result, UnwinderError> + where + F: FnMut(u64) -> Result, + { + let is_first_frame = !address.is_return_address(); + let unwind_result = match &*module.unwind_data { + #[cfg(feature = "macho")] + ModuleUnwindDataInternal::CompactUnwindInfoAndEhFrame { + unwind_info, + eh_frame, + stubs_svma: stubs, + stub_helper_svma: stub_helper, + base_addresses, + text_data, + } => { + // eprintln!("unwinding with cui and eh_frame in module {}", module.name); + let text_bytes = text_data.as_ref().and_then(|data| { + let offset_from_base = + u32::try_from(data.svma_range.start.checked_sub(module.base_svma)?).ok()?; + Some(TextBytes::new(offset_from_base, &data.bytes[..])) + }); + let stubs_range = if let Some(stubs_range) = stubs { + ( + (stubs_range.start - module.base_svma) as u32, + (stubs_range.end - module.base_svma) as u32, + ) + } else { + (0, 0) + }; + let stub_helper_range = if let Some(stub_helper_range) = stub_helper { + ( + (stub_helper_range.start - module.base_svma) as u32, + (stub_helper_range.end - module.base_svma) as u32, + ) + } else { + (0, 0) + }; + let mut unwinder = CompactUnwindInfoUnwinder::::new( + &unwind_info[..], + text_bytes, + stubs_range, + stub_helper_range, + ); + + let unwind_result = unwinder.unwind_frame(rel_lookup_address, is_first_frame)?; + match unwind_result { + CuiUnwindResult::ExecRule(rule) => UnwindResult::ExecRule(rule), + CuiUnwindResult::NeedDwarf(fde_offset) => { + let eh_frame_data = + eh_frame.as_deref().ok_or(UnwinderError::NoDwarfData)?; + let mut dwarf_unwinder = DwarfUnwinder::<_, A, _>::new( + EndianSlice::new(eh_frame_data, LittleEndian), + UnwindSectionType::EhFrame, + None, + &mut cache.gimli_unwind_context, + base_addresses.clone(), + module.base_svma, + ); + dwarf_unwinder.unwind_frame_with_fde::<_, P::GimliEvaluationStorage<_>>( + regs, + is_first_frame, + rel_lookup_address, + fde_offset, + read_stack, + )? + } + } + } + ModuleUnwindDataInternal::EhFrameHdrAndEhFrame { + eh_frame_hdr, + eh_frame, + base_addresses, + } => { + let eh_frame_hdr_data = &eh_frame_hdr[..]; + let mut dwarf_unwinder = DwarfUnwinder::<_, A, _>::new( + EndianSlice::new(eh_frame, LittleEndian), + UnwindSectionType::EhFrame, + Some(eh_frame_hdr_data), + &mut cache.gimli_unwind_context, + base_addresses.clone(), + module.base_svma, + ); + let fde_offset = dwarf_unwinder + .get_fde_offset_for_relative_address(rel_lookup_address) + .ok_or(UnwinderError::EhFrameHdrCouldNotFindAddress)?; + dwarf_unwinder.unwind_frame_with_fde::<_, P::GimliEvaluationStorage<_>>( + regs, + is_first_frame, + rel_lookup_address, + fde_offset, + read_stack, + )? + } + ModuleUnwindDataInternal::DwarfCfiIndexAndEhFrame { + index, + eh_frame, + base_addresses, + } => { + let mut dwarf_unwinder = DwarfUnwinder::<_, A, _>::new( + EndianSlice::new(eh_frame, LittleEndian), + UnwindSectionType::EhFrame, + None, + &mut cache.gimli_unwind_context, + base_addresses.clone(), + module.base_svma, + ); + let fde_offset = index + .fde_offset_for_relative_address(rel_lookup_address) + .ok_or(UnwinderError::DwarfCfiIndexCouldNotFindAddress)?; + dwarf_unwinder.unwind_frame_with_fde::<_, P::GimliEvaluationStorage<_>>( + regs, + is_first_frame, + rel_lookup_address, + fde_offset, + read_stack, + )? + } + ModuleUnwindDataInternal::DwarfCfiIndexAndDebugFrame { + index, + debug_frame, + base_addresses, + } => { + let mut dwarf_unwinder = DwarfUnwinder::<_, A, _>::new( + EndianSlice::new(debug_frame, LittleEndian), + UnwindSectionType::DebugFrame, + None, + &mut cache.gimli_unwind_context, + base_addresses.clone(), + module.base_svma, + ); + let fde_offset = index + .fde_offset_for_relative_address(rel_lookup_address) + .ok_or(UnwinderError::DwarfCfiIndexCouldNotFindAddress)?; + dwarf_unwinder.unwind_frame_with_fde::<_, P::GimliEvaluationStorage<_>>( + regs, + is_first_frame, + rel_lookup_address, + fde_offset, + read_stack, + )? + } + #[cfg(feature = "pe")] + ModuleUnwindDataInternal::PeUnwindInfo { + pdata, + rdata, + xdata, + text, + } => ::unwind_frame( + crate::pe::PeSections { + pdata, + rdata: rdata.as_ref(), + xdata: xdata.as_ref(), + text: text.as_ref(), + }, + rel_lookup_address, + regs, + is_first_frame, + read_stack, + )?, + ModuleUnwindDataInternal::None => return Err(UnwinderError::NoModuleUnwindData), + }; + Ok(unwind_result) + } +} + +/// The unwind data that should be used when unwinding addresses inside this module. +/// Unwind data describes how to recover register values of the caller frame. +/// +/// The type of unwind information you use depends on the platform and what's available +/// in the binary. +/// +/// Type arguments: +/// +/// - `D`: The type for unwind section data. This allows carrying owned data on the +/// module, e.g. `Vec`. But it could also be a wrapper around mapped memory from +/// a file or a different process, for example. It just needs to provide a slice of +/// bytes via its `Deref` implementation. +enum ModuleUnwindDataInternal { + /// Used on macOS, with mach-O binaries. Compact unwind info is in the `__unwind_info` + /// section and is sometimes supplemented with DWARF CFI information in the `__eh_frame` + /// section. `__stubs` and `__stub_helper` ranges are used by the unwinder. + #[cfg(feature = "macho")] + CompactUnwindInfoAndEhFrame { + unwind_info: D, + eh_frame: Option, + stubs_svma: Option>, + stub_helper_svma: Option>, + base_addresses: crate::dwarf::BaseAddresses, + text_data: Option>, + }, + /// Used with ELF binaries (Linux and friends), in the `.eh_frame_hdr` and `.eh_frame` + /// sections. Contains an index and DWARF CFI. + EhFrameHdrAndEhFrame { + eh_frame_hdr: D, + eh_frame: D, + base_addresses: crate::dwarf::BaseAddresses, + }, + /// Used with ELF binaries (Linux and friends), in the `.eh_frame` section. Contains + /// DWARF CFI. We create a binary index for the FDEs when a module with this unwind + /// data type is added. + DwarfCfiIndexAndEhFrame { + index: DwarfCfiIndex, + eh_frame: D, + base_addresses: crate::dwarf::BaseAddresses, + }, + /// Used with ELF binaries (Linux and friends), in the `.debug_frame` section. Contains + /// DWARF CFI. We create a binary index for the FDEs when a module with this unwind + /// data type is added. + DwarfCfiIndexAndDebugFrame { + index: DwarfCfiIndex, + debug_frame: D, + base_addresses: crate::dwarf::BaseAddresses, + }, + /// Used with PE binaries (Windows). + #[cfg(feature = "pe")] + PeUnwindInfo { + pdata: D, + rdata: Option>, + xdata: Option>, + text: Option>, + }, + /// No unwind information is used. Unwinding in this module will use a fallback rule + /// (usually frame pointer unwinding). + None, +} + +impl> ModuleUnwindDataInternal { + fn new(section_info: &mut impl ModuleSectionInfo) -> Self { + use crate::dwarf::base_addresses_for_sections; + + #[cfg(feature = "macho")] + if let Some(unwind_info) = section_info.section_data(b"__unwind_info") { + let eh_frame = section_info.section_data(b"__eh_frame"); + let stubs = section_info.section_svma_range(b"__stubs"); + let stub_helper = section_info.section_svma_range(b"__stub_helper"); + // Get the bytes of the executable code (instructions). + // + // In mach-O objects, executable code is stored in the `__TEXT` segment, which contains + // multiple executable sections such as `__text`, `__stubs`, and `__stub_helper`. If we + // don't have the full `__TEXT` segment contents, we can fall back to the contents of + // just the `__text` section. + let text_data = if let (Some(bytes), Some(svma_range)) = ( + section_info.segment_data(b"__TEXT"), + section_info.segment_svma_range(b"__TEXT"), + ) { + Some(TextByteData { bytes, svma_range }) + } else if let (Some(bytes), Some(svma_range)) = ( + section_info.section_data(b"__text"), + section_info.section_svma_range(b"__text"), + ) { + Some(TextByteData { bytes, svma_range }) + } else { + None + }; + return ModuleUnwindDataInternal::CompactUnwindInfoAndEhFrame { + unwind_info, + eh_frame, + stubs_svma: stubs, + stub_helper_svma: stub_helper, + base_addresses: base_addresses_for_sections(section_info), + text_data, + }; + } + + #[cfg(feature = "pe")] + if let Some(pdata) = section_info.section_data(b".pdata") { + let mut range_and_data = |name| { + let rva_range = section_info.section_svma_range(name).and_then(|range| { + Some(Range { + start: (range.start - section_info.base_svma()).try_into().ok()?, + end: (range.end - section_info.base_svma()).try_into().ok()?, + }) + })?; + let data = section_info.section_data(name)?; + Some(DataAtRvaRange { data, rva_range }) + }; + return ModuleUnwindDataInternal::PeUnwindInfo { + pdata, + rdata: range_and_data(b".rdata"), + xdata: range_and_data(b".xdata"), + text: range_and_data(b".text"), + }; + } + + if let Some(eh_frame) = section_info + .section_data(b".eh_frame") + .or_else(|| section_info.section_data(b"__eh_frame")) + { + if let Some(eh_frame_hdr) = section_info + .section_data(b".eh_frame_hdr") + .or_else(|| section_info.section_data(b"__eh_frame_hdr")) + { + ModuleUnwindDataInternal::EhFrameHdrAndEhFrame { + eh_frame_hdr, + eh_frame, + base_addresses: base_addresses_for_sections(section_info), + } + } else { + match DwarfCfiIndex::try_new_eh_frame(&eh_frame, section_info) { + Ok(index) => ModuleUnwindDataInternal::DwarfCfiIndexAndEhFrame { + index, + eh_frame, + base_addresses: base_addresses_for_sections(section_info), + }, + Err(_) => ModuleUnwindDataInternal::None, + } + } + } else if let Some(debug_frame) = section_info.section_data(b".debug_frame") { + match DwarfCfiIndex::try_new_debug_frame(&debug_frame, section_info) { + Ok(index) => ModuleUnwindDataInternal::DwarfCfiIndexAndDebugFrame { + index, + debug_frame, + base_addresses: base_addresses_for_sections(section_info), + }, + Err(_) => ModuleUnwindDataInternal::None, + } + } else { + ModuleUnwindDataInternal::None + } + } +} + +/// Used to supply raw instruction bytes to the unwinder, which uses it to analyze +/// instructions in order to provide high quality unwinding inside function prologues and +/// epilogues. +/// +/// This is only needed on macOS, because mach-O `__unwind_info` and `__eh_frame` only +/// cares about accuracy in function bodies, not in function prologues and epilogues. +/// +/// On Linux, compilers produce `.eh_frame` and `.debug_frame` which provides correct +/// unwind information for all instructions including those in function prologues and +/// epilogues, so instruction analysis is not needed. +/// +/// Type arguments: +/// +/// - `D`: The type for unwind section data. This allows carrying owned data on the +/// module, e.g. `Vec`. But it could also be a wrapper around mapped memory from +/// a file or a different process, for example. It just needs to provide a slice of +/// bytes via its `Deref` implementation. +#[cfg(feature = "macho")] +struct TextByteData { + pub bytes: D, + pub svma_range: Range, +} + +/// Information about a module that is loaded in a process. You might know this under a +/// different name, for example: (Shared) library, binary image, DSO ("Dynamic shared object") +/// +/// The unwinder needs to have an up-to-date list of modules so that it can match an +/// absolute address to the right module, and so that it can find that module's unwind +/// information. +/// +/// Type arguments: +/// +/// - `D`: The type for unwind section data. This allows carrying owned data on the +/// module, e.g. `Vec`. But it could also be a wrapper around mapped memory from +/// a file or a different process, for example. It just needs to provide a slice of +/// bytes via its `Deref` implementation. +pub struct Module { + /// The name or file path of the module. Unused, it's just there for easier debugging. + #[allow(unused)] + name: String, + /// The address range where this module is mapped into the process. + avma_range: Range, + /// The base address of this module, in the process's address space. On Linux, the base + /// address can sometimes be different from the start address of the mapped range. + base_avma: u64, + /// The base address of this module, according to the module. + base_svma: u64, + /// The unwind data that should be used for unwinding addresses from this module. + unwind_data: Arc>, +} + +impl Clone for Module { + fn clone(&self) -> Self { + Self { + name: self.name.clone(), + avma_range: self.avma_range.clone(), + base_avma: self.base_avma, + base_svma: self.base_svma, + unwind_data: self.unwind_data.clone(), + } + } +} + +/// Information about a module's sections (and segments). +/// +/// This trait is used as an interface to module information, and each function with `&mut self` is +/// called at most once with a particular argument (e.g., `section_data(b".text")` will be called +/// at most once, so it can move data out of the underlying type if desired). +/// +/// Type arguments: +/// +/// - `D`: The type for section data. This allows carrying owned data on the module, e.g. +/// `Vec`. But it could also be a wrapper around mapped memory from a file or a different +/// process, for example. +pub trait ModuleSectionInfo { + /// Return the base address stated in the module. + /// + /// For mach-O objects, this is the vmaddr of the __TEXT segment. For ELF objects, this is + /// zero. For PE objects, this is the image base address. + /// + /// This is used to convert between SVMAs and relative addresses. + fn base_svma(&self) -> u64; + + /// Get the given section's memory range, as stated in the module. + fn section_svma_range(&mut self, name: &[u8]) -> Option>; + + /// Get the given section's data. This will only be called once per section. + fn section_data(&mut self, name: &[u8]) -> Option; + + /// Get the given segment's memory range, as stated in the module. + fn segment_svma_range(&mut self, _name: &[u8]) -> Option> { + None + } + + /// Get the given segment's data. This will only be called once per segment. + fn segment_data(&mut self, _name: &[u8]) -> Option { + None + } +} + +/// Explicit addresses and data of various sections in the module. This implements +/// the `ModuleSectionInfo` trait. +/// +/// Unless otherwise stated, these are SVMAs, "stated virtual memory addresses", i.e. addresses as +/// stated in the object, as opposed to AVMAs, "actual virtual memory addresses", i.e. addresses in +/// the virtual memory of the profiled process. +/// +/// Code addresses inside a module's unwind information are usually written down as SVMAs, +/// or as relative addresses. For example, DWARF CFI can have code addresses expressed as +/// relative-to-.text addresses or as absolute SVMAs. And mach-O compact unwind info +/// contains addresses relative to the image base address. +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct ExplicitModuleSectionInfo { + /// The image base address, as stated in the object. For mach-O objects, this is the + /// vmaddr of the `__TEXT` segment. For ELF objects, this is zero. + /// + /// This is used to convert between SVMAs and relative addresses. + pub base_svma: u64, + /// The address range of the `__text` or `.text` section. This is where most of the compiled + /// code is stored. + /// + /// This is used to detect whether we need to do instruction analysis for an address. + pub text_svma: Option>, + /// The data of the `__text` or `.text` section. This is where most of the compiled code is + /// stored. For mach-O binaries, this does not need to be supplied if `text_segment` is supplied. + /// + /// This is used to handle function prologues and epilogues in some cases. + pub text: Option, + /// The address range of the mach-O `__stubs` section. Contains small pieces of + /// executable code for calling imported functions. Code inside this section is not + /// covered by the unwind information in `__unwind_info`. + /// + /// This is used to exclude addresses in this section from incorrectly applying + /// `__unwind_info` opcodes. It is also used to infer unwind rules for the known + /// structure of stub functions. + pub stubs_svma: Option>, + /// The address range of the mach-O `__stub_helper` section. Contains small pieces of + /// executable code for calling imported functions. Code inside this section is not + /// covered by the unwind information in `__unwind_info`. + /// + /// This is used to exclude addresses in this section from incorrectly applying + /// `__unwind_info` opcodes. It is also used to infer unwind rules for the known + /// structure of stub helper + /// functions. + pub stub_helper_svma: Option>, + /// The address range of the `.got` section (Global Offset Table). This is used + /// during DWARF CFI processing, to resolve got-relative addresses. + pub got_svma: Option>, + /// The data of the `__unwind_info` section of mach-O binaries. + pub unwind_info: Option, + /// The address range of the `__eh_frame` or `.eh_frame` section. This is used during DWARF CFI + /// processing, to resolve eh_frame-relative addresses. + pub eh_frame_svma: Option>, + /// The data of the `__eh_frame` or `.eh_frame` section. This is used during DWARF CFI + /// processing, to resolve eh_frame-relative addresses. + pub eh_frame: Option, + /// The address range of the `.eh_frame_hdr` section. This is used during DWARF CFI processing, + /// to resolve eh_frame_hdr-relative addresses. + pub eh_frame_hdr_svma: Option>, + /// The data of the `.eh_frame_hdr` section. This is used during DWARF CFI processing, to + /// resolve eh_frame_hdr-relative addresses. + pub eh_frame_hdr: Option, + /// The data of the `.debug_frame` section. The related address range is not needed. + pub debug_frame: Option, + /// The address range of the `__TEXT` segment of mach-O binaries, if available. + pub text_segment_svma: Option>, + /// The data of the `__TEXT` segment of mach-O binaries, if available. + pub text_segment: Option, +} + +impl ModuleSectionInfo for ExplicitModuleSectionInfo +where + D: Deref, +{ + fn base_svma(&self) -> u64 { + self.base_svma + } + + fn section_svma_range(&mut self, name: &[u8]) -> Option> { + match name { + b"__text" | b".text" => self.text_svma.clone(), + b"__stubs" => self.stubs_svma.clone(), + b"__stub_helper" => self.stub_helper_svma.clone(), + b"__eh_frame" | b".eh_frame" => self.eh_frame_svma.clone(), + b"__eh_frame_hdr" | b".eh_frame_hdr" => self.eh_frame_hdr_svma.clone(), + b"__got" | b".got" => self.got_svma.clone(), + _ => None, + } + } + fn section_data(&mut self, name: &[u8]) -> Option { + match name { + b"__text" | b".text" => self.text.take(), + b"__unwind_info" => self.unwind_info.take(), + b"__eh_frame" | b".eh_frame" => self.eh_frame.take(), + b"__eh_frame_hdr" | b".eh_frame_hdr" => self.eh_frame_hdr.take(), + b"__debug_frame" | b".debug_frame" => self.debug_frame.take(), + _ => None, + } + } + fn segment_svma_range(&mut self, name: &[u8]) -> Option> { + match name { + b"__TEXT" => self.text_segment_svma.clone(), + _ => None, + } + } + fn segment_data(&mut self, name: &[u8]) -> Option { + match name { + b"__TEXT" => self.text_segment.take(), + _ => None, + } + } +} + +#[cfg(feature = "object")] +mod object { + use super::{ModuleSectionInfo, Range}; + use object::read::{Object, ObjectSection, ObjectSegment}; + + impl<'data: 'file, 'file, O, D> ModuleSectionInfo for &'file O + where + O: Object<'data>, + D: From<&'data [u8]>, + { + fn base_svma(&self) -> u64 { + if let Some(text_segment) = self.segments().find(|s| s.name() == Ok(Some("__TEXT"))) { + // This is a mach-O image. "Relative addresses" are relative to the + // vmaddr of the __TEXT segment. + return text_segment.address(); + } + + // For PE binaries, relative_address_base() returns the image base address. + // Otherwise it returns zero. This gives regular ELF images a base address of zero, + // which is what we want. + self.relative_address_base() + } + + fn section_svma_range(&mut self, name: &[u8]) -> Option> { + let section = self.section_by_name_bytes(name)?; + Some(section.address()..section.address() + section.size()) + } + + fn section_data(&mut self, name: &[u8]) -> Option { + let section = self.section_by_name_bytes(name)?; + section.data().ok().map(|data| data.into()) + } + + fn segment_svma_range(&mut self, name: &[u8]) -> Option> { + let segment = self.segments().find(|s| s.name_bytes() == Ok(Some(name)))?; + Some(segment.address()..segment.address() + segment.size()) + } + + fn segment_data(&mut self, name: &[u8]) -> Option { + let segment = self.segments().find(|s| s.name_bytes() == Ok(Some(name)))?; + segment.data().ok().map(|data| data.into()) + } + } +} + +impl> Module { + pub fn new( + name: String, + avma_range: core::ops::Range, + base_avma: u64, + mut section_info: impl ModuleSectionInfo, + ) -> Self { + let unwind_data = ModuleUnwindDataInternal::new(&mut section_info); + + Self { + name, + avma_range, + base_avma, + base_svma: section_info.base_svma(), + unwind_data: Arc::new(unwind_data), + } + } + + pub fn avma_range(&self) -> core::ops::Range { + self.avma_range.clone() + } + + pub fn base_avma(&self) -> u64 { + self.base_avma + } + + pub fn name(&self) -> &str { + &self.name + } +} diff --git a/third_party/rust/framehop/src/x86_64/arch.rs b/third_party/rust/framehop/src/x86_64/arch.rs new file mode 100644 index 000000000000..3a1e7cc62446 --- /dev/null +++ b/third_party/rust/framehop/src/x86_64/arch.rs @@ -0,0 +1,10 @@ +use super::unwind_rule::UnwindRuleX86_64; +use super::unwindregs::UnwindRegsX86_64; +use crate::arch::Arch; + +/// The x86_64 CPU architecture. +pub struct ArchX86_64; +impl Arch for ArchX86_64 { + type UnwindRule = UnwindRuleX86_64; + type UnwindRegs = UnwindRegsX86_64; +} diff --git a/third_party/rust/framehop/src/x86_64/cache.rs b/third_party/rust/framehop/src/x86_64/cache.rs new file mode 100644 index 000000000000..fa3abdfbe5fb --- /dev/null +++ b/third_party/rust/framehop/src/x86_64/cache.rs @@ -0,0 +1,32 @@ +use super::unwind_rule::*; +use crate::cache::*; + +/// The unwinder cache type for [`UnwinderX86_64`](super::UnwinderX86_64). +pub struct CacheX86_64( + pub Cache, +); + +impl CacheX86_64 { + /// Create a new cache. + pub fn new() -> Self { + Self(Cache::new()) + } +} + +impl CacheX86_64

{ + /// Create a new cache. + pub fn new_in() -> Self { + Self(Cache::new()) + } + + /// Returns a snapshot of the cache usage statistics. + pub fn stats(&self) -> CacheStats { + self.0.rule_cache.stats() + } +} + +impl Default for CacheX86_64

{ + fn default() -> Self { + Self::new_in() + } +} diff --git a/third_party/rust/framehop/src/x86_64/dwarf.rs b/third_party/rust/framehop/src/x86_64/dwarf.rs new file mode 100644 index 000000000000..12fd80ad5495 --- /dev/null +++ b/third_party/rust/framehop/src/x86_64/dwarf.rs @@ -0,0 +1,164 @@ +use gimli::{ + CfaRule, Encoding, EvaluationStorage, Reader, ReaderOffset, Register, RegisterRule, + UnwindContextStorage, UnwindSection, UnwindTableRow, X86_64, +}; + +use super::{arch::ArchX86_64, unwind_rule::UnwindRuleX86_64, unwindregs::UnwindRegsX86_64}; +use crate::dwarf::{ + eval_cfa_rule, eval_register_rule, ConversionError, DwarfUnwindRegs, DwarfUnwinderError, + DwarfUnwinding, +}; +use crate::unwind_result::UnwindResult; + +impl DwarfUnwindRegs for UnwindRegsX86_64 { + fn get(&self, register: Register) -> Option { + match register { + X86_64::RA => Some(self.ip()), + X86_64::RSP => Some(self.sp()), + X86_64::RBP => Some(self.bp()), + _ => None, + } + } +} + +impl DwarfUnwinding for ArchX86_64 { + fn unwind_frame( + section: &impl UnwindSection, + unwind_info: &UnwindTableRow, + encoding: Encoding, + regs: &mut Self::UnwindRegs, + is_first_frame: bool, + read_stack: &mut F, + ) -> Result, DwarfUnwinderError> + where + F: FnMut(u64) -> Result, + R: Reader, + UCS: UnwindContextStorage, + ES: EvaluationStorage, + { + let cfa_rule = unwind_info.cfa(); + let bp_rule = unwind_info.register(X86_64::RBP); + let ra_rule = unwind_info.register(X86_64::RA); + + match translate_into_unwind_rule(cfa_rule, &bp_rule, &ra_rule) { + Ok(unwind_rule) => return Ok(UnwindResult::ExecRule(unwind_rule)), + Err(_err) => { + // Could not translate into a cacheable unwind rule. Fall back to the generic path. + // eprintln!("Unwind rule translation failed: {:?}", err); + } + } + + let cfa = eval_cfa_rule::(section, cfa_rule, encoding, regs) + .ok_or(DwarfUnwinderError::CouldNotRecoverCfa)?; + + let ip = regs.ip(); + let bp = regs.bp(); + let sp = regs.sp(); + + let new_bp = eval_register_rule::( + section, bp_rule, cfa, encoding, bp, regs, read_stack, + ) + .unwrap_or(bp); + + let return_address = match eval_register_rule::( + section, ra_rule, cfa, encoding, ip, regs, read_stack, + ) { + Some(ra) => ra, + None => { + read_stack(cfa - 8).map_err(|_| DwarfUnwinderError::CouldNotRecoverReturnAddress)? + } + }; + + if cfa == sp && return_address == ip { + return Err(DwarfUnwinderError::DidNotAdvance); + } + if !is_first_frame && cfa < regs.sp() { + return Err(DwarfUnwinderError::StackPointerMovedBackwards); + } + + regs.set_ip(return_address); + regs.set_bp(new_bp); + regs.set_sp(cfa); + + Ok(UnwindResult::Uncacheable(return_address)) + } + + fn rule_if_uncovered_by_fde() -> Self::UnwindRule { + UnwindRuleX86_64::JustReturnIfFirstFrameOtherwiseFp + } +} + +fn register_rule_to_cfa_offset( + rule: &RegisterRule, +) -> Result, ConversionError> { + match *rule { + RegisterRule::Undefined | RegisterRule::SameValue => Ok(None), + RegisterRule::Offset(offset) => Ok(Some(offset)), + _ => Err(ConversionError::RegisterNotStoredRelativeToCfa), + } +} + +fn translate_into_unwind_rule( + cfa_rule: &CfaRule, + bp_rule: &RegisterRule, + ra_rule: &RegisterRule, +) -> Result { + match ra_rule { + RegisterRule::Undefined => { + // No return address. This means that we've reached the end of the stack. + return Ok(UnwindRuleX86_64::EndOfStack); + } + RegisterRule::Offset(offset) if *offset == -8 => { + // This is normal case. Return address is [CFA-8]. + } + RegisterRule::Offset(_) => { + // Unsupported, will have to use the slow path. + return Err(ConversionError::ReturnAddressRuleWithUnexpectedOffset); + } + _ => { + // Unsupported, will have to use the slow path. + return Err(ConversionError::ReturnAddressRuleWasWeird); + } + } + + match cfa_rule { + CfaRule::RegisterAndOffset { register, offset } => match *register { + X86_64::RSP => { + let sp_offset_by_8 = + u16::try_from(offset / 8).map_err(|_| ConversionError::SpOffsetDoesNotFit)?; + let fp_cfa_offset = register_rule_to_cfa_offset(bp_rule)?; + match fp_cfa_offset { + None => Ok(UnwindRuleX86_64::OffsetSp { sp_offset_by_8 }), + Some(bp_cfa_offset) => { + let bp_storage_offset_from_sp_by_8 = + i16::try_from((offset + bp_cfa_offset) / 8) + .map_err(|_| ConversionError::FpStorageOffsetDoesNotFit)?; + Ok(UnwindRuleX86_64::OffsetSpAndRestoreBp { + sp_offset_by_8, + bp_storage_offset_from_sp_by_8, + }) + } + } + } + X86_64::RBP => { + let bp_cfa_offset = register_rule_to_cfa_offset(bp_rule)? + .ok_or(ConversionError::FramePointerRuleDoesNotRestoreBp)?; + if *offset == 16 && bp_cfa_offset == -16 { + Ok(UnwindRuleX86_64::UseFramePointer) + } else { + // TODO: Maybe handle this case. This case has been observed in _ffi_call_unix64, + // which has the following unwind table: + // + // 00000060 00000024 0000001c FDE cie=00000048 pc=000de548...000de6a6 + // 0xde548: CFA=reg7+8: reg16=[CFA-8] + // 0xde562: CFA=reg6+32: reg6=[CFA-16], reg16=[CFA-8] + // 0xde5ad: CFA=reg7+8: reg16=[CFA-8] + // 0xde668: CFA=reg7+8: reg6=[CFA-16], reg16=[CFA-8] + Err(ConversionError::FramePointerRuleHasStrangeBpOffset) + } + } + _ => Err(ConversionError::CfaIsOffsetFromUnknownRegister), + }, + CfaRule::Expression(_) => Err(ConversionError::CfaIsExpression), + } +} diff --git a/third_party/rust/framehop/src/x86_64/instruction_analysis/epilogue.rs b/third_party/rust/framehop/src/x86_64/instruction_analysis/epilogue.rs new file mode 100644 index 000000000000..13c8640ce0a4 --- /dev/null +++ b/third_party/rust/framehop/src/x86_64/instruction_analysis/epilogue.rs @@ -0,0 +1,85 @@ +use super::super::unwind_rule::UnwindRuleX86_64; + +pub fn unwind_rule_from_detected_epilogue( + text_bytes: &[u8], + pc_offset: usize, +) -> Option { + let (slice_from_start, slice_to_end) = text_bytes.split_at(pc_offset); + + let mut sp_offset_by_8 = 0; + let mut bp_offset_by_8 = None; + let mut bytes = slice_to_end; + loop { + if bytes.is_empty() { + return None; + } + + // Detect ret + if bytes[0] == 0xc3 { + break; + } + // Detect jmp + if bytes[0] == 0xeb || bytes[0] == 0xe9 || bytes[0] == 0xff { + // This could be a tail call, or just a regular jump inside the current function. + // Ideally, we would check whether the jump target is inside this function. + // But this would require having an accurate idea of where the current function + // starts and ends. + // For now, we instead use the following heuristic: Any jmp that directly follows + // a `pop` instruction is treated as a tail call. + if sp_offset_by_8 != 0 { + // We have detected a pop in the previous loop iteration. + break; + } + // This must be the first iteration. Look backwards. + if let Some(potential_pop_byte) = slice_from_start.last() { + // Get the previous byte. We have no idea how long the previous instruction + // is, so we might be looking at a random last byte of a wider instruction. + // Let's just pray that this is not the case. + if potential_pop_byte & 0xf8 == 0x58 { + // Assuming we haven't just misinterpreted the last byte of a wider + // instruction, this is a `pop rXX`. + break; + } + } + return None; + } + // Detect pop rbp + if bytes[0] == 0x5d { + bp_offset_by_8 = Some(sp_offset_by_8 as i16); + sp_offset_by_8 += 1; + bytes = &bytes[1..]; + continue; + } + // Detect pop rXX + if (0x58..=0x5f).contains(&bytes[0]) { + sp_offset_by_8 += 1; + bytes = &bytes[1..]; + continue; + } + // Detect pop rXX with prefix + if bytes.len() >= 2 && bytes[0] & 0xfe == 0x40 && bytes[1] & 0xf8 == 0x58 { + sp_offset_by_8 += 1; + bytes = &bytes[2..]; + continue; + } + // Unexpected instruction. + // This probably means that we weren't in an epilogue after all. + return None; + } + + // We've found the return or the tail call. + let rule = if sp_offset_by_8 == 0 { + UnwindRuleX86_64::JustReturn + } else { + sp_offset_by_8 += 1; // Add one for popping the return address. + if let Some(bp_storage_offset_from_sp_by_8) = bp_offset_by_8 { + UnwindRuleX86_64::OffsetSpAndRestoreBp { + sp_offset_by_8, + bp_storage_offset_from_sp_by_8, + } + } else { + UnwindRuleX86_64::OffsetSp { sp_offset_by_8 } + } + }; + Some(rule) +} diff --git a/third_party/rust/framehop/src/x86_64/instruction_analysis/mod.rs b/third_party/rust/framehop/src/x86_64/instruction_analysis/mod.rs new file mode 100644 index 000000000000..146f6603cc8d --- /dev/null +++ b/third_party/rust/framehop/src/x86_64/instruction_analysis/mod.rs @@ -0,0 +1,24 @@ +use super::arch::ArchX86_64; +use crate::instruction_analysis::InstructionAnalysis; + +mod epilogue; +mod prologue; + +use epilogue::unwind_rule_from_detected_epilogue; +use prologue::unwind_rule_from_detected_prologue; + +impl InstructionAnalysis for ArchX86_64 { + fn rule_from_prologue_analysis( + text_bytes: &[u8], + pc_offset: usize, + ) -> Option { + unwind_rule_from_detected_prologue(text_bytes, pc_offset) + } + + fn rule_from_epilogue_analysis( + text_bytes: &[u8], + pc_offset: usize, + ) -> Option { + unwind_rule_from_detected_epilogue(text_bytes, pc_offset) + } +} diff --git a/third_party/rust/framehop/src/x86_64/instruction_analysis/prologue.rs b/third_party/rust/framehop/src/x86_64/instruction_analysis/prologue.rs new file mode 100644 index 000000000000..25abeb5c0474 --- /dev/null +++ b/third_party/rust/framehop/src/x86_64/instruction_analysis/prologue.rs @@ -0,0 +1,100 @@ +use super::super::unwind_rule::UnwindRuleX86_64; + +pub fn unwind_rule_from_detected_prologue( + text_bytes: &[u8], + pc_offset: usize, +) -> Option { + let (slice_from_start, slice_to_end) = text_bytes.split_at(pc_offset); + if !is_next_instruction_expected_in_prologue(slice_to_end) { + return None; + } + // We're in a prologue. Find the current stack depth of this frame by + // walking backwards. This is risky business, because x86 is a variable + // length encoding so you never know what you're looking at if you look + // backwards. + // Let's do it anyway and hope our heuristics are good enough so that + // they work in more cases than they fail in. + let mut cursor = slice_from_start.len(); + let mut sp_offset_by_8 = 0; + loop { + if cursor >= 4 { + // Detect push rbp; mov rbp, rsp [0x55, 0x48 0x89 0xe5] + if slice_from_start[cursor - 4..cursor] == [0x55, 0x48, 0x89, 0xe5] { + return Some(UnwindRuleX86_64::UseFramePointer); + } + } + if cursor >= 1 { + // Detect push rXX with optional prefix + let byte = slice_from_start[cursor - 1]; + if byte & 0xf8 == 0x50 { + sp_offset_by_8 += 1; + cursor -= 1; + + // Consume prefix, if present + if cursor >= 1 && slice_from_start[cursor - 1] & 0xfe == 0x40 { + cursor -= 1; + } + + continue; + } + } + break; + } + sp_offset_by_8 += 1; // Add one for popping the return address. + Some(UnwindRuleX86_64::OffsetSp { sp_offset_by_8 }) +} + +fn is_next_instruction_expected_in_prologue(bytes: &[u8]) -> bool { + if bytes.len() < 4 { + return false; + } + + // Detect push rXX + if bytes[0] & 0xf8 == 0x50 { + return true; + } + // Detect push rXX with prefix + if bytes[0] & 0xfe == 0x40 && bytes[1] & 0xf8 == 0x50 { + return true; + } + // Detect sub rsp, 0xXX (8-bit immediate operand) + if bytes[0..2] == [0x83, 0xec] { + return true; + } + // Detect sub rsp, 0xXX with prefix (8-bit immediate operand) + if bytes[0..3] == [0x48, 0x83, 0xec] { + return true; + } + // Detect sub rsp, 0xXX (32-bit immediate operand) + if bytes[0..2] == [0x81, 0xec] { + return true; + } + // Detect sub rsp, 0xXX with prefix (32-bit immediate operand) + if bytes[0..3] == [0x48, 0x81, 0xec] { + return true; + } + // Detect mov rbp, rsp [0x48 0x89 0xe5] + if bytes[0..3] == [0x48, 0x89, 0xe5] { + return true; + } + + false +} + +// TODO: Write tests for different "sub" types +// 4e88e40 41 57 push r15 +// 4e88e42 41 56 push r14 +// 4e88e44 53 push rbx +// 4e88e45 48 81 EC 80 00 00 00 sub rsp, 0x80 +// 4e88e4c 48 89 F3 mov rbx, rsi +// +// +// 4423f9 55 push rbp +// 4423fa 48 89 E5 mov rbp, rsp +// 4423fd 41 57 push r15 +// 4423ff 41 56 push r14 +// 442401 41 55 push r13 +// 442403 41 54 push r12 +// 442405 53 push rbx +// 442406 48 83 EC 18 sub rsp, 0x18 +// 44240a 48 8B 07 mov rax, qword [rdi] diff --git a/third_party/rust/framehop/src/x86_64/macho.rs b/third_party/rust/framehop/src/x86_64/macho.rs new file mode 100644 index 000000000000..199aabf6b186 --- /dev/null +++ b/third_party/rust/framehop/src/x86_64/macho.rs @@ -0,0 +1,171 @@ +use super::arch::ArchX86_64; +use super::unwind_rule::UnwindRuleX86_64; +use crate::instruction_analysis::InstructionAnalysis; +use crate::macho::{CompactUnwindInfoUnwinderError, CompactUnwindInfoUnwinding, CuiUnwindResult}; +use macho_unwind_info::opcodes::{OpcodeX86_64, RegisterNameX86_64}; +use macho_unwind_info::Function; + +impl CompactUnwindInfoUnwinding for ArchX86_64 { + fn unwind_frame( + function: Function, + is_first_frame: bool, + address_offset_within_function: usize, + function_bytes: Option<&[u8]>, + ) -> Result, CompactUnwindInfoUnwinderError> { + let opcode = OpcodeX86_64::parse(function.opcode); + if is_first_frame { + // The pc might be in a prologue or an epilogue. The compact unwind info format ignores + // prologues and epilogues; the opcodes only describe the function body. So we do some + // instruction analysis to check for prologues and epilogues. + if let Some(function_bytes) = function_bytes { + if let Some(rule) = Self::rule_from_instruction_analysis( + function_bytes, + address_offset_within_function, + ) { + // We are inside a prologue / epilogue. Ignore the opcode and use the rule from + // instruction analysis. + return Ok(CuiUnwindResult::ExecRule(rule)); + } + if opcode == OpcodeX86_64::Null + && function_bytes.starts_with(&[0x55, 0x48, 0x89, 0xe5]) + { + // The function is uncovered but it has a `push rbp; mov rbp, rsp` prologue. + return Ok(CuiUnwindResult::ExecRule(UnwindRuleX86_64::UseFramePointer)); + } + } + if opcode == OpcodeX86_64::Null { + return Ok(CuiUnwindResult::ExecRule(UnwindRuleX86_64::JustReturn)); + } + } + + // At this point we know with high certainty that we are in a function body. + let r = match opcode { + OpcodeX86_64::Null => { + return Err(CompactUnwindInfoUnwinderError::FunctionHasNoInfo); + } + OpcodeX86_64::FramelessImmediate { + stack_size_in_bytes, + saved_regs, + } => { + if stack_size_in_bytes == 8 { + CuiUnwindResult::ExecRule(UnwindRuleX86_64::JustReturn) + } else { + let bp_positon_from_outside = saved_regs + .iter() + .rev() + .flatten() + .position(|r| *r == RegisterNameX86_64::Rbp); + match bp_positon_from_outside { + Some(pos) => { + let bp_offset_from_sp = + stack_size_in_bytes as i32 - 2 * 8 - pos as i32 * 8; + let bp_storage_offset_from_sp_by_8 = + i16::try_from(bp_offset_from_sp / 8).map_err(|_| { + CompactUnwindInfoUnwinderError::BpOffsetDoesNotFit + })?; + CuiUnwindResult::ExecRule(UnwindRuleX86_64::OffsetSpAndRestoreBp { + sp_offset_by_8: stack_size_in_bytes / 8, + bp_storage_offset_from_sp_by_8, + }) + } + None => CuiUnwindResult::ExecRule(UnwindRuleX86_64::OffsetSp { + sp_offset_by_8: stack_size_in_bytes / 8, + }), + } + } + } + OpcodeX86_64::FramelessIndirect { + immediate_offset_from_function_start, + stack_adjust_in_bytes, + saved_regs, + } => { + let function_bytes = function_bytes.ok_or( + CompactUnwindInfoUnwinderError::NoTextBytesToLookUpIndirectStackOffset, + )?; + let sub_immediate_bytes = function_bytes + .get( + immediate_offset_from_function_start as usize + ..immediate_offset_from_function_start as usize + 4, + ) + .ok_or(CompactUnwindInfoUnwinderError::IndirectStackOffsetOutOfBounds)?; + let sub_immediate = u32::from_le_bytes([ + sub_immediate_bytes[0], + sub_immediate_bytes[1], + sub_immediate_bytes[2], + sub_immediate_bytes[3], + ]); + let stack_size_in_bytes = + sub_immediate + .checked_add(stack_adjust_in_bytes.into()) + .ok_or(CompactUnwindInfoUnwinderError::StackAdjustOverflow)?; + let sp_offset_by_8 = u16::try_from(stack_size_in_bytes / 8) + .map_err(|_| CompactUnwindInfoUnwinderError::StackSizeDoesNotFit)?; + let bp_positon_from_outside = saved_regs + .iter() + .rev() + .flatten() + .position(|r| *r == RegisterNameX86_64::Rbp); + match bp_positon_from_outside { + Some(pos) => { + let bp_offset_from_sp = stack_size_in_bytes as i32 - 2 * 8 - pos as i32 * 8; + let bp_storage_offset_from_sp_by_8 = + i16::try_from(bp_offset_from_sp / 8) + .map_err(|_| CompactUnwindInfoUnwinderError::BpOffsetDoesNotFit)?; + CuiUnwindResult::ExecRule(UnwindRuleX86_64::OffsetSpAndRestoreBp { + sp_offset_by_8, + bp_storage_offset_from_sp_by_8, + }) + } + None => { + CuiUnwindResult::ExecRule(UnwindRuleX86_64::OffsetSp { sp_offset_by_8 }) + } + } + } + OpcodeX86_64::Dwarf { eh_frame_fde } => CuiUnwindResult::NeedDwarf(eh_frame_fde), + OpcodeX86_64::FrameBased { .. } => { + CuiUnwindResult::ExecRule(UnwindRuleX86_64::UseFramePointer) + } + OpcodeX86_64::UnrecognizedKind(kind) => { + return Err(CompactUnwindInfoUnwinderError::BadOpcodeKind(kind)) + } + OpcodeX86_64::InvalidFrameless => { + return Err(CompactUnwindInfoUnwinderError::InvalidFrameless) + } + }; + Ok(r) + } + + fn rule_for_stub_helper( + offset: u32, + ) -> Result, CompactUnwindInfoUnwinderError> { + // shared: + // +0x0 235cc4 4C 8D 1D 3D 03 04 00 lea r11, qword [dyld_stub_binder_276000+8] + // +0x7 235ccb 41 53 push r11 + // +0x9 235ccd FF 25 2D 03 04 00 jmp qword [dyld_stub_binder_276000] ; tail call + // +0xf 235cd3 90 nop + // first stub: + // +0x10 235cd4 68 F1 61 00 00 push 0x61f1 + // +0x15 235cd9 E9 E6 FF FF FF jmp 0x235cc4 ; jump to shared + // second stub: + // +0x1a 235cde 68 38 62 00 00 push 0x6238 + // +0x1f 235ce3 E9 DC FF FF FF jmp 0x235cc4 ; jump to shared + let rule = if offset < 0x7 { + // pop 1 and return + UnwindRuleX86_64::OffsetSp { sp_offset_by_8: 2 } + } else if offset < 0x10 { + // pop 2 and return + UnwindRuleX86_64::OffsetSp { sp_offset_by_8: 3 } + } else { + let offset_after_shared = offset - 0x10; + let offset_within_stub = offset_after_shared % 10; + if offset_within_stub < 5 { + UnwindRuleX86_64::JustReturn + // just return + } else { + // pop 1 and return + UnwindRuleX86_64::OffsetSp { sp_offset_by_8: 2 } + } + }; + Ok(CuiUnwindResult::ExecRule(rule)) + } +} diff --git a/third_party/rust/framehop/src/x86_64/mod.rs b/third_party/rust/framehop/src/x86_64/mod.rs new file mode 100644 index 000000000000..aa0ad6e7da0e --- /dev/null +++ b/third_party/rust/framehop/src/x86_64/mod.rs @@ -0,0 +1,18 @@ +mod arch; +mod cache; +mod dwarf; +mod instruction_analysis; +#[cfg(feature = "macho")] +mod macho; +#[cfg(feature = "pe")] +mod pe; +mod register_ordering; +mod unwind_rule; +mod unwinder; +mod unwindregs; + +pub use arch::*; +pub use cache::*; +pub use unwind_rule::*; +pub use unwinder::*; +pub use unwindregs::*; diff --git a/third_party/rust/framehop/src/x86_64/pe.rs b/third_party/rust/framehop/src/x86_64/pe.rs new file mode 100644 index 000000000000..2178b4598ef2 --- /dev/null +++ b/third_party/rust/framehop/src/x86_64/pe.rs @@ -0,0 +1,224 @@ +use super::{ + arch::ArchX86_64, + unwind_rule::{OffsetOrPop, UnwindRuleX86_64}, + unwindregs::Reg, +}; +use crate::arch::Arch; +use crate::pe::{PeSections, PeUnwinderError, PeUnwinding}; +use crate::unwind_result::UnwindResult; +use core::ops::ControlFlow; + +use alloc::vec::Vec; +use pe_unwind_info::x86_64::{ + FunctionEpilogInstruction, FunctionTableEntries, Register, UnwindInfo, UnwindInfoTrailer, + UnwindOperation, UnwindState, +}; + +struct State<'a, F> { + regs: &'a mut ::UnwindRegs, + read_stack: &'a mut F, +} + +impl UnwindState for State<'_, F> +where + F: FnMut(u64) -> Result, +{ + fn read_register(&mut self, register: Register) -> u64 { + self.regs.get(convert_pe_register(register)) + } + + fn read_stack(&mut self, addr: u64) -> Option { + (self.read_stack)(addr).ok() + } + + fn write_register(&mut self, register: Register, value: u64) { + self.regs.set(convert_pe_register(register), value) + } + + fn write_xmm_register(&mut self, _register: pe_unwind_info::x86_64::XmmRegister, _value: u128) { + // Ignore + } +} + +fn convert_pe_register(r: Register) -> Reg { + match r { + Register::RAX => Reg::RAX, + Register::RCX => Reg::RCX, + Register::RDX => Reg::RDX, + Register::RBX => Reg::RBX, + Register::RSP => Reg::RSP, + Register::RBP => Reg::RBP, + Register::RSI => Reg::RSI, + Register::RDI => Reg::RDI, + Register::R8 => Reg::R8, + Register::R9 => Reg::R9, + Register::R10 => Reg::R10, + Register::R11 => Reg::R11, + Register::R12 => Reg::R12, + Register::R13 => Reg::R13, + Register::R14 => Reg::R14, + Register::R15 => Reg::R15, + } +} + +impl From<&'_ FunctionEpilogInstruction> for OffsetOrPop { + fn from(value: &'_ FunctionEpilogInstruction) -> Self { + match value { + FunctionEpilogInstruction::AddSP(offset) => { + if let Ok(v) = (offset / 8).try_into() { + OffsetOrPop::OffsetBy8(v) + } else { + OffsetOrPop::None + } + } + FunctionEpilogInstruction::Pop(reg) => OffsetOrPop::Pop(convert_pe_register(*reg)), + _ => OffsetOrPop::None, + } + } +} + +impl From<&'_ UnwindOperation> for OffsetOrPop { + fn from(value: &'_ UnwindOperation) -> Self { + match value { + UnwindOperation::UnStackAlloc(offset) => { + if let Ok(v) = (offset / 8).try_into() { + OffsetOrPop::OffsetBy8(v) + } else { + OffsetOrPop::None + } + } + UnwindOperation::PopNonVolatile(reg) => OffsetOrPop::Pop(convert_pe_register(*reg)), + _ => OffsetOrPop::None, + } + } +} + +impl PeUnwinding for ArchX86_64 { + fn unwind_frame( + sections: PeSections, + address: u32, + regs: &mut Self::UnwindRegs, + is_first_frame: bool, + read_stack: &mut F, + ) -> Result, PeUnwinderError> + where + F: FnMut(u64) -> Result, + D: core::ops::Deref, + { + let entries = FunctionTableEntries::parse(sections.pdata); + let Some(function) = entries.lookup(address) else { + return Ok(UnwindResult::ExecRule(UnwindRuleX86_64::JustReturn)); + }; + + let read_stack_err = |read_stack: &mut F, addr| { + read_stack(addr).map_err(|()| PeUnwinderError::MissingStackData(Some(addr))) + }; + + let unwind_info_address = function.unwind_info_address.get(); + let unwind_info = + UnwindInfo::parse(sections.unwind_info_memory_at_rva(unwind_info_address)?) + .ok_or(PeUnwinderError::UnwindInfoParseError)?; + + if is_first_frame { + // Check whether the address is in the function epilog. If so, we need to + // simulate the remaining epilog instructions (unwind codes don't account for + // unwinding from the epilog). We only need to check this for the first unwind info (if + // there are chained infos). + let bytes = (function.end_address.get() - address) as usize; + let instruction = §ions.text_memory_at_rva(address)?[..bytes]; + if let Ok(epilog_instructions) = + FunctionEpilogInstruction::parse_sequence(instruction, unwind_info.frame_register()) + { + // If the epilog is an optional AddSP followed by Pops, we can return a cache + // rule. + if let Some(rule) = + UnwindRuleX86_64::for_sequence_of_offset_or_pop(epilog_instructions.iter()) + { + return Ok(UnwindResult::ExecRule(rule)); + } + + for instruction in epilog_instructions.iter() { + match instruction { + FunctionEpilogInstruction::AddSP(offset) => { + let rsp = regs.get(Reg::RSP); + regs.set(Reg::RSP, rsp + *offset as u64); + } + FunctionEpilogInstruction::AddSPFromFP(offset) => { + let fp = unwind_info + .frame_register() + .expect("invalid fp register offset"); + let fp = convert_pe_register(fp); + let fp = regs.get(fp); + regs.set(Reg::RSP, fp + *offset as u64); + } + FunctionEpilogInstruction::Pop(reg) => { + let rsp = regs.get(Reg::RSP); + let val = read_stack_err(read_stack, rsp)?; + regs.set(convert_pe_register(*reg), val); + regs.set(Reg::RSP, rsp + 8); + } + } + } + + let rsp = regs.get(Reg::RSP); + let ra = read_stack_err(read_stack, rsp)?; + regs.set(Reg::RSP, rsp + 8); + + return Ok(UnwindResult::Uncacheable(ra)); + } + } + + // Get all chained UnwindInfo and resolve errors when collecting. + let chained_info = core::iter::successors(Some(Ok(unwind_info)), |info| { + let Ok(info) = info else { + return None; + }; + if let Some(UnwindInfoTrailer::ChainedUnwindInfo { chained }) = info.trailer() { + let unwind_info_address = chained.unwind_info_address.get(); + Some( + sections + .unwind_info_memory_at_rva(unwind_info_address) + .and_then(|data| { + UnwindInfo::parse(data).ok_or(PeUnwinderError::UnwindInfoParseError) + }), + ) + } else { + None + } + }) + .collect::, _>>()?; + + // Get all operations across chained UnwindInfo. The first should be filtered to only those + // operations which are before the offset in the function. + let offset = address - function.begin_address.get(); + let operations = chained_info.into_iter().enumerate().flat_map(|(i, info)| { + info.unwind_operations() + .skip_while(move |(o, _)| i == 0 && *o as u32 > offset) + .map(|(_, op)| op) + }); + + // We need to collect operations to first check (without losing ownership) whether an + // unwind rule can be returned. + let operations = operations.collect::>(); + if let Some(rule) = UnwindRuleX86_64::for_sequence_of_offset_or_pop(operations.iter()) { + return Ok(UnwindResult::ExecRule(rule)); + } + + // Resolve operations to get the return address. + let mut state = State { regs, read_stack }; + for op in operations { + if let ControlFlow::Break(ra) = unwind_info + .resolve_operation(&mut state, &op) + .ok_or(PeUnwinderError::MissingStackData(None))? + { + return Ok(UnwindResult::Uncacheable(ra)); + } + } + + let rsp = regs.get(Reg::RSP); + let ra = read_stack_err(read_stack, rsp)?; + regs.set(Reg::RSP, rsp + 8); + + Ok(UnwindResult::Uncacheable(ra)) + } +} diff --git a/third_party/rust/framehop/src/x86_64/register_ordering.rs b/third_party/rust/framehop/src/x86_64/register_ordering.rs new file mode 100644 index 000000000000..d76733073b3a --- /dev/null +++ b/third_party/rust/framehop/src/x86_64/register_ordering.rs @@ -0,0 +1,84 @@ +use super::unwindregs::Reg; +use arrayvec::ArrayVec; + +const ENCODE_REGISTERS: [Reg; 8] = [ + Reg::RBX, + Reg::RBP, + Reg::RDI, + Reg::RSI, + Reg::R12, + Reg::R13, + Reg::R14, + Reg::R15, +]; + +pub fn decode(count: u8, encoded_ordering: u16) -> ArrayVec { + let mut regs: ArrayVec = ENCODE_REGISTERS.into(); + let mut r = encoded_ordering; + let mut n: u16 = 8; + while r != 0 { + let index = r % n; + if index != 0 { + regs[(8 - n as usize)..].swap(index as usize, 0); + } + r /= n; + n -= 1; + } + regs.truncate(count as usize); + regs +} + +pub fn encode(registers: &[Reg]) -> Option<(u8, u16)> { + if registers.len() > ENCODE_REGISTERS.len() { + return None; + } + + let count = registers.len() as u8; + let mut r: u16 = 0; + let mut reg_order: ArrayVec = ENCODE_REGISTERS.into(); + + let mut scale: u16 = 1; + for (i, reg) in registers.iter().enumerate() { + let index = reg_order[i..].iter().position(|r| r == reg)?; + if index as u16 != 0 { + reg_order[i..].swap(index, 0); + } + r += index as u16 * scale; + scale *= 8 - i as u16; + } + Some((count, r)) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn unhandled_orderings() { + use super::Reg::*; + + assert_eq!(encode(&[RAX]), None, "RAX is a volatile register, i.e. not a callee-save register, so it does not need to be restored during epilogs and is not covered by the encoding."); + assert_eq!(encode(&[RSI, RSI]), None, "Valid register orderings only contain each register (at most) once, so there is no encoding for a sequence with repeated registers."); + } + + #[test] + fn roundtrip_all() { + // Test all possible register orderings. + // That is, for all permutations of length 0 to 8 of the ENCODE_REGISTERS array, check that + // the register ordering rountrips successfully through encoding and decoding. + use itertools::Itertools; + for permutation in (0..=8).flat_map(|k| ENCODE_REGISTERS.iter().cloned().permutations(k)) { + let permutation = permutation.as_slice(); + let encoding = encode(permutation); + if let Some((count, encoded)) = encoding { + assert_eq!( + decode(count, encoded).as_slice(), + permutation, + "Register permutation should roundtrip correctly", + ); + } else { + panic!("Register permutation failed to encode: {permutation:?}"); + } + } + } +} diff --git a/third_party/rust/framehop/src/x86_64/unwind_rule.rs b/third_party/rust/framehop/src/x86_64/unwind_rule.rs new file mode 100644 index 000000000000..1c927190ac8f --- /dev/null +++ b/third_party/rust/framehop/src/x86_64/unwind_rule.rs @@ -0,0 +1,309 @@ +use super::register_ordering; +use super::unwindregs::{Reg, UnwindRegsX86_64}; +use crate::add_signed::checked_add_signed; +use crate::error::Error; +use crate::unwind_rule::UnwindRule; +use arrayvec::ArrayVec; + +/// For all of these: return address is *(new_sp - 8) +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum UnwindRuleX86_64 { + EndOfStack, + /// (sp, bp) = (sp + 8, bp) + JustReturn, + /// (sp, bp) = if is_first_frame (sp + 8, bp) else (bp + 16, *bp) + JustReturnIfFirstFrameOtherwiseFp, + /// (sp, bp) = (sp + 8x, bp) + OffsetSp { + sp_offset_by_8: u16, + }, + /// (sp, bp) = (sp + 8x, *(sp + 8y)) + OffsetSpAndRestoreBp { + sp_offset_by_8: u16, + bp_storage_offset_from_sp_by_8: i16, + }, + /// (sp, bp) = (bp + 16, *bp) + UseFramePointer, + /// (sp, ...) = (sp + 8 * (offset + register count), ... popped according to encoded ordering) + /// This supports the common case of pushed callee-saved registers followed by a stack + /// allocation. Up to 8 registers can be stored, which covers all callee-saved registers (aside + /// from RSP which is implicit). + /// + /// The registers are stored in a separate compressed ordering to facilitate restoring register + /// values if desired. If not for this we could simply store the total offset. + OffsetSpAndPopRegisters { + /// The additional stack pointer offset to undo before popping the registers, divided by 8 bytes. + sp_offset_by_8: u16, + /// The number of registers to pop from the stack. + register_count: u8, + /// An encoded ordering of the callee-save registers to pop from the stack, see register_ordering. + encoded_registers_to_pop: u16, + }, +} + +pub enum OffsetOrPop { + None, + OffsetBy8(u16), + Pop(Reg), +} + +impl UnwindRuleX86_64 { + /// Get the rule which represents the given operations, if possible. + pub fn for_sequence_of_offset_or_pop(iter: I) -> Option + where + I: Iterator, + T: Into, + { + let mut iter = iter.map(Into::into).peekable(); + let sp_offset_by_8 = if let Some(&OffsetOrPop::OffsetBy8(offset)) = iter.peek() { + iter.next(); + offset + } else { + 0 + }; + + let mut regs = ArrayVec::::new(); + for i in iter { + if let OffsetOrPop::Pop(reg) = i { + // If try_push errors we've exceeded the number of supported registers: there's no + // way to encode these operations as an unwind rule. + regs.try_push(reg).ok()?; + } else { + return None; + } + } + + if regs.is_empty() && sp_offset_by_8 == 0 { + Some(Self::JustReturn) + } else { + let (register_count, encoded_registers_to_pop) = register_ordering::encode(®s)?; + Some(Self::OffsetSpAndPopRegisters { + sp_offset_by_8, + register_count, + encoded_registers_to_pop, + }) + } + } +} + +impl UnwindRule for UnwindRuleX86_64 { + type UnwindRegs = UnwindRegsX86_64; + + fn rule_for_stub_functions() -> Self { + UnwindRuleX86_64::JustReturn + } + fn rule_for_function_start() -> Self { + UnwindRuleX86_64::JustReturn + } + fn fallback_rule() -> Self { + UnwindRuleX86_64::UseFramePointer + } + + fn exec( + self, + is_first_frame: bool, + regs: &mut UnwindRegsX86_64, + read_stack: &mut F, + ) -> Result, Error> + where + F: FnMut(u64) -> Result, + { + let sp = regs.sp(); + let (new_sp, new_bp) = match self { + UnwindRuleX86_64::EndOfStack => return Ok(None), + UnwindRuleX86_64::JustReturn => { + let new_sp = sp.checked_add(8).ok_or(Error::IntegerOverflow)?; + (new_sp, regs.bp()) + } + UnwindRuleX86_64::JustReturnIfFirstFrameOtherwiseFp => { + if is_first_frame { + let new_sp = sp.checked_add(8).ok_or(Error::IntegerOverflow)?; + (new_sp, regs.bp()) + } else { + let sp = regs.sp(); + let bp = regs.bp(); + let new_sp = bp.checked_add(16).ok_or(Error::IntegerOverflow)?; + if new_sp <= sp { + return Err(Error::FramepointerUnwindingMovedBackwards); + } + let new_bp = read_stack(bp).map_err(|_| Error::CouldNotReadStack(bp))?; + (new_sp, new_bp) + } + } + UnwindRuleX86_64::OffsetSp { sp_offset_by_8 } => { + let sp_offset = u64::from(sp_offset_by_8) * 8; + let new_sp = sp.checked_add(sp_offset).ok_or(Error::IntegerOverflow)?; + (new_sp, regs.bp()) + } + UnwindRuleX86_64::OffsetSpAndRestoreBp { + sp_offset_by_8, + bp_storage_offset_from_sp_by_8, + } => { + let sp_offset = u64::from(sp_offset_by_8) * 8; + let new_sp = sp.checked_add(sp_offset).ok_or(Error::IntegerOverflow)?; + let bp_storage_offset_from_sp = i64::from(bp_storage_offset_from_sp_by_8) * 8; + let bp_location = checked_add_signed(sp, bp_storage_offset_from_sp) + .ok_or(Error::IntegerOverflow)?; + let new_bp = match read_stack(bp_location) { + Ok(new_bp) => new_bp, + Err(()) if is_first_frame && bp_location < sp => { + // Ignore errors when reading beyond the stack pointer in the first frame. + // These negative offsets are sometimes seen in x86_64 epilogues, where + // a bunch of registers are popped one after the other, and the compiler + // doesn't always set the already-popped register to "unchanged" (because + // doing so would take up extra space in the dwarf information). + // read_stack may legitimately refuse to read beyond the stack pointer, + // for example when the stack bytes are coming from a linux perf event + // sample record, where the ustack bytes are copied starting from sp. + regs.bp() + } + Err(()) => return Err(Error::CouldNotReadStack(bp_location)), + }; + (new_sp, new_bp) + } + UnwindRuleX86_64::UseFramePointer => { + // Do a frame pointer stack walk. Code that is compiled with frame pointers + // has the following function prologues and epilogues: + // + // Function prologue: + // pushq %rbp + // movq %rsp, %rbp + // + // Function epilogue: + // popq %rbp + // ret + // + // Functions are called with callq; callq pushes the return address onto the stack. + // When a function reaches its end, ret pops the return address from the stack and jumps to it. + // So when a function is called, we have the following stack layout: + // + // [... rest of the stack] + // ^ rsp ^ rbp + // callq some_function + // [return address] [... rest of the stack] + // ^ rsp ^ rbp + // pushq %rbp + // [caller's frame pointer] [return address] [... rest of the stack] + // ^ rsp ^ rbp + // movq %rsp, %rbp + // [caller's frame pointer] [return address] [... rest of the stack] + // ^ rsp, rbp + // + // [... more stack] [caller's frame pointer] [return address] [... rest of the stack] + // ^ rsp ^ rbp + // + // So: *rbp is the caller's frame pointer, and *(rbp + 8) is the return address. + // + // Or, in other words, the following linked list is built up on the stack: + // #[repr(C)] + // struct CallFrameInfo { + // previous: *const CallFrameInfo, + // return_address: *const c_void, + // } + // and rbp is a *const CallFrameInfo. + let sp = regs.sp(); + let bp = regs.bp(); + if bp == 0 { + return Ok(None); + } + let new_sp = bp.checked_add(16).ok_or(Error::IntegerOverflow)?; + if new_sp <= sp { + return Err(Error::FramepointerUnwindingMovedBackwards); + } + let new_bp = read_stack(bp).map_err(|_| Error::CouldNotReadStack(bp))?; + // new_bp is the caller's bp. If the caller uses frame pointers, then bp should be + // a valid frame pointer and we could do a coherency check on new_bp to make sure + // it's moving in the right direction. But if the caller is using bp as a general + // purpose register, then any value (including zero) would be a valid value. + // At this point we don't know how the caller uses bp, so we leave new_bp unchecked. + + (new_sp, new_bp) + } + UnwindRuleX86_64::OffsetSpAndPopRegisters { + sp_offset_by_8, + register_count, + encoded_registers_to_pop, + } => { + let sp = regs.sp(); + let mut sp = sp + .checked_add(sp_offset_by_8 as u64 * 8) + .ok_or(Error::IntegerOverflow)?; + for reg in register_ordering::decode(register_count, encoded_registers_to_pop) { + let value = read_stack(sp).map_err(|_| Error::CouldNotReadStack(sp))?; + sp = sp.checked_add(8).ok_or(Error::IntegerOverflow)?; + regs.set(reg, value); + } + (sp.checked_add(8).ok_or(Error::IntegerOverflow)?, regs.bp()) + } + }; + let return_address = + read_stack(new_sp - 8).map_err(|_| Error::CouldNotReadStack(new_sp - 8))?; + if return_address == 0 { + return Ok(None); + } + if new_sp == sp && return_address == regs.ip() { + return Err(Error::DidNotAdvance); + } + regs.set_ip(return_address); + regs.set_sp(new_sp); + regs.set_bp(new_bp); + Ok(Some(return_address)) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_basic() { + let stack = [ + 1, 2, 0x100300, 4, 0x40, 0x100200, 5, 6, 0x70, 0x100100, 7, 8, 9, 10, 0x0, 0x0, + ]; + let mut read_stack = |addr| Ok(stack[(addr / 8) as usize]); + let mut regs = UnwindRegsX86_64::new(0x100400, 0x10, 0x20); + let res = + UnwindRuleX86_64::OffsetSp { sp_offset_by_8: 1 }.exec(true, &mut regs, &mut read_stack); + assert_eq!(res, Ok(Some(0x100300))); + assert_eq!(regs.ip(), 0x100300); + assert_eq!(regs.sp(), 0x18); + assert_eq!(regs.bp(), 0x20); + let res = UnwindRuleX86_64::UseFramePointer.exec(true, &mut regs, &mut read_stack); + assert_eq!(res, Ok(Some(0x100200))); + assert_eq!(regs.ip(), 0x100200); + assert_eq!(regs.sp(), 0x30); + assert_eq!(regs.bp(), 0x40); + let res = UnwindRuleX86_64::UseFramePointer.exec(false, &mut regs, &mut read_stack); + assert_eq!(res, Ok(Some(0x100100))); + assert_eq!(regs.ip(), 0x100100); + assert_eq!(regs.sp(), 0x50); + assert_eq!(regs.bp(), 0x70); + let res = UnwindRuleX86_64::UseFramePointer.exec(false, &mut regs, &mut read_stack); + assert_eq!(res, Ok(None)); + } + + #[test] + fn test_overflow() { + // This test makes sure that debug builds don't panic when trying to use frame pointer + // unwinding on code that was using the bp register as a general-purpose register and + // storing -1 in it. -1 is u64::MAX, so an unchecked add panics in debug builds. + let stack = [ + 1, 2, 0x100300, 4, 0x40, 0x100200, 5, 6, 0x70, 0x100100, 7, 8, 9, 10, 0x0, 0x0, + ]; + let mut read_stack = |addr| Ok(stack[(addr / 8) as usize]); + let mut regs = UnwindRegsX86_64::new(0x100400, u64::MAX / 8 * 8, u64::MAX); + let res = UnwindRuleX86_64::JustReturn.exec(true, &mut regs, &mut read_stack); + assert_eq!(res, Err(Error::IntegerOverflow)); + let res = + UnwindRuleX86_64::OffsetSp { sp_offset_by_8: 1 }.exec(true, &mut regs, &mut read_stack); + assert_eq!(res, Err(Error::IntegerOverflow)); + let res = UnwindRuleX86_64::OffsetSpAndRestoreBp { + sp_offset_by_8: 1, + bp_storage_offset_from_sp_by_8: 2, + } + .exec(true, &mut regs, &mut read_stack); + assert_eq!(res, Err(Error::IntegerOverflow)); + let res = UnwindRuleX86_64::UseFramePointer.exec(true, &mut regs, &mut read_stack); + assert_eq!(res, Err(Error::IntegerOverflow)); + } +} diff --git a/third_party/rust/framehop/src/x86_64/unwinder.rs b/third_party/rust/framehop/src/x86_64/unwinder.rs new file mode 100644 index 000000000000..bb98d6037638 --- /dev/null +++ b/third_party/rust/framehop/src/x86_64/unwinder.rs @@ -0,0 +1,68 @@ +use core::ops::Deref; + +use super::arch::ArchX86_64; +use super::cache::CacheX86_64; +use super::unwindregs::UnwindRegsX86_64; +use crate::cache::{AllocationPolicy, MayAllocateDuringUnwind}; +use crate::error::Error; +use crate::unwinder::UnwinderInternal; +use crate::unwinder::{Module, Unwinder}; +use crate::FrameAddress; + +/// The unwinder for the x86_64 CPU architecture. Use the [`Unwinder`] trait for unwinding. +/// +/// Type arguments: +/// +/// - `D`: The type for unwind section data in the modules. See [`Module`]. +/// - `P`: The [`AllocationPolicy`]. +pub struct UnwinderX86_64(UnwinderInternal); + +impl Default for UnwinderX86_64 { + fn default() -> Self { + Self::new() + } +} + +impl Clone for UnwinderX86_64 { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl UnwinderX86_64 { + /// Create an unwinder for a process. + pub fn new() -> Self { + Self(UnwinderInternal::new()) + } +} + +impl, P: AllocationPolicy> Unwinder for UnwinderX86_64 { + type UnwindRegs = UnwindRegsX86_64; + type Cache = CacheX86_64

; + type Module = Module; + + fn add_module(&mut self, module: Module) { + self.0.add_module(module); + } + + fn remove_module(&mut self, module_address_range_start: u64) { + self.0.remove_module(module_address_range_start); + } + + fn max_known_code_address(&self) -> u64 { + self.0.max_known_code_address() + } + + fn unwind_frame( + &self, + address: FrameAddress, + regs: &mut UnwindRegsX86_64, + cache: &mut CacheX86_64

, + read_stack: &mut F, + ) -> Result, Error> + where + F: FnMut(u64) -> Result, + { + self.0.unwind_frame(address, regs, &mut cache.0, read_stack) + } +} diff --git a/third_party/rust/framehop/src/x86_64/unwindregs.rs b/third_party/rust/framehop/src/x86_64/unwindregs.rs new file mode 100644 index 000000000000..77319ff74176 --- /dev/null +++ b/third_party/rust/framehop/src/x86_64/unwindregs.rs @@ -0,0 +1,102 @@ +use core::fmt::Debug; + +use crate::display_utils::HexNum; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct UnwindRegsX86_64 { + ip: u64, + regs: [u64; 16], +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(u8)] +pub enum Reg { + RAX, + RDX, + RCX, + RBX, + RSI, + RDI, + RBP, + RSP, + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, +} + +impl UnwindRegsX86_64 { + pub fn new(ip: u64, sp: u64, bp: u64) -> Self { + let mut r = Self { + ip, + regs: Default::default(), + }; + r.set_sp(sp); + r.set_bp(bp); + r + } + + #[inline(always)] + pub fn get(&self, reg: Reg) -> u64 { + self.regs[reg as usize] + } + #[inline(always)] + pub fn set(&mut self, reg: Reg, value: u64) { + self.regs[reg as usize] = value; + } + + #[inline(always)] + pub fn ip(&self) -> u64 { + self.ip + } + #[inline(always)] + pub fn set_ip(&mut self, ip: u64) { + self.ip = ip + } + + #[inline(always)] + pub fn sp(&self) -> u64 { + self.get(Reg::RSP) + } + #[inline(always)] + pub fn set_sp(&mut self, sp: u64) { + self.set(Reg::RSP, sp) + } + + #[inline(always)] + pub fn bp(&self) -> u64 { + self.get(Reg::RBP) + } + #[inline(always)] + pub fn set_bp(&mut self, bp: u64) { + self.set(Reg::RBP, bp) + } +} + +impl Debug for UnwindRegsX86_64 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("UnwindRegsX86_64") + .field("ip", &HexNum(self.ip())) + .field("rax", &HexNum(self.get(Reg::RAX))) + .field("rdx", &HexNum(self.get(Reg::RDX))) + .field("rcx", &HexNum(self.get(Reg::RCX))) + .field("rbx", &HexNum(self.get(Reg::RBX))) + .field("rsi", &HexNum(self.get(Reg::RSI))) + .field("rdi", &HexNum(self.get(Reg::RDI))) + .field("rbp", &HexNum(self.get(Reg::RBP))) + .field("rsp", &HexNum(self.get(Reg::RSP))) + .field("r8", &HexNum(self.get(Reg::R8))) + .field("r9", &HexNum(self.get(Reg::R9))) + .field("r10", &HexNum(self.get(Reg::R10))) + .field("r11", &HexNum(self.get(Reg::R11))) + .field("r12", &HexNum(self.get(Reg::R12))) + .field("r13", &HexNum(self.get(Reg::R13))) + .field("r14", &HexNum(self.get(Reg::R14))) + .field("r15", &HexNum(self.get(Reg::R15))) + .finish() + } +} diff --git a/third_party/rust/gimli/.cargo-checksum.json b/third_party/rust/gimli/.cargo-checksum.json new file mode 100644 index 000000000000..0e8c64a55b24 --- /dev/null +++ b/third_party/rust/gimli/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"CHANGELOG.md":"39644968fcea2bf6cf14f94047cc8b5e9785797631c0cd8033e4e2cdbcf27969","Cargo.toml":"1ecca3db954f8885686c1e3ca6b7222d500bc26926a46438eabd519569109c32","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"6b55025491f62ca7dd19a7a6cdb9154b06db33c85247c88a19804ab1c1ba2b5e","src/arch.rs":"735a8e871479263ad2dd86c38cc68167da320800ca70b8b1d25a224e5f3d0bd8","src/common.rs":"92bb5bc1eebe0a1906389a75096288773bb86b8895b827851bfb082a3c4999f8","src/constants.rs":"33b74f752fc11aefa1f5ef36a08c2fac19453e6d16f8e490d15b2eabcd63a55c","src/endianity.rs":"1f7e62ae34f540c06bedf1e7948739211556eea7dd83731a5ca52c7d687ed0fc","src/leb128.rs":"996d5c79d027f97c010ca487bc4ff5f8265f4b9e63d62b4e4fa291383c259ee9","src/lib.rs":"538a8080f33a0641f831e883085425c36cbce2ae39e0cd5e0b6c7c062bca7712","src/read/abbrev.rs":"f937e45d151ac5073f2c526b792e86e5ba96d3d36cb0377682a596c272be589a","src/read/addr.rs":"a6a535b690793e4c8ec85127d558e796cb8f6272533cd0418886bbc44289039e","src/read/aranges.rs":"fd3ff965cfd23c8b425c555f8f34a190764ae993433f32c63f9452c6604806cd","src/read/cfi.rs":"93e7572e44d97d10977833cedab78d68b6f0fec643edda4a613ad8ae845a93ce","src/read/dwarf.rs":"0f30d814dfe067aa6fbd0b80dac8e1a2532e2c5cd5e584c151a8915356b6b2d7","src/read/endian_reader.rs":"25752b609d74ad7dc85df84d044d0e931024a95af72a760cd51f834016775b3e","src/read/endian_slice.rs":"5b44661714967780b8c9f52fdaf655a53e309c38cbd3daf11bf6b1d5f6d067bb","src/read/index.rs":"2a28d032bc3bc5235545ac526b367512ac0aa7807909b6c02c8d3f84f5beff87","src/read/line.rs":"463fedce39895af793cdce413d9593cfd3470939f9f944fd7814ded5946d5b7e","src/read/lists.rs":"67ca9e1a36a91feb4996d035211de845205212bfda02163685d217818567ff93","src/read/loclists.rs":"a05933e752d44c1d26e83c321dbc1b8a3616b1d76ad15f488858f7f74fd3aece","src/read/lookup.rs":"0cf89ba12b9d48b1fe035dd3a497730323acb9427a9457abbc2f7c58c4c71165","src/read/mod.rs":"1154168832c544acd31f467668fb86536232138c84e5918ba3b1cc66d1554d05","src/read/op.rs":"8782f09332eea1a218aa524a67c9c1cc2e73a8210b30402519dbe8fcf21dcf6e","src/read/pubnames.rs":"ed752ee1a7017e6d3be42d81e4ddaaac960ef08081463a19106c9f041526d4a3","src/read/pubtypes.rs":"5e75b32c0923e827aff0bb2db456797a0e8d38ba46be992558a7990b3196bcf5","src/read/reader.rs":"afc9c2cfbfe0fce5b1825d029f8e841100f48b04b86181950a213fbb82e6ad63","src/read/relocate.rs":"6844b113eb8218152e29912accc54b26bc2498e97bfe4af824472ddb69b8601c","src/read/rnglists.rs":"d1afeb1779d145493a1fc665fa32820c63c539e40b10ecd5b5f343836da188e6","src/read/str.rs":"4dd98cc8d93ce6f06c194eae034bfe0a3d45a9f06fbeaca38d8f29a9c7cf15a5","src/read/unit.rs":"bcff85e55148bf141984a4cb20eb5983cfd85de6e8a4535cef2ab19e8e0f5103","src/read/util.rs":"61e41212f1c8336988c9a7a1523c2913af8c8a66d2dd59d3631ba179e801e3bd","src/read/value.rs":"1c0db3759c65ffda3520fcecd36118367dfb46845035d5d97fcba2f0ea780380","src/test_util.rs":"291eefa6b51c6d934ba2f4a4c9bc7c403046fc1cccf4d43487820f0154bb89e2","src/write/abbrev.rs":"fa02163389e92e804d139cf84f833ab6af932083f0eb2d74464b4a70bd3237ff","src/write/cfi.rs":"323ab703251a41fe83172d749c8afec7d869c5d52e8edd85d7b87450102e6e3a","src/write/dwarf.rs":"8a1a0893e31134ad68993994594f3024ad0c8af7c1188b29e0ffc26b42edef21","src/write/endian_vec.rs":"1d5811986648816a677580b22630f5059757a381487d73e9adbb3008c9ae0c58","src/write/line.rs":"80f7626f15467d69fb73a9d9fda7863fe343f236d5fcdbc353bdf2a2a4b1bb42","src/write/loc.rs":"2a58b0f57ab344f23de81e459f6fefa153e29e0384af31bbcbc80095af0fa703","src/write/mod.rs":"6e43a028baf73bf50ee276a3f08f31adc69cacdde25d56b55f14c0d48ca6f3aa","src/write/op.rs":"e599fa116366f273ca33da3428132f2b9da21c0cc50a0c0ccfd0f524ccb4e82e","src/write/range.rs":"28033849e7912f60d137c2f2e0065c5169a7f16896b179178c8e3674d7c2785e","src/write/relocate.rs":"117b97eae3ca2aad9d5b242652ebbdb333440e877be37873a7ef5ba1a39ced43","src/write/section.rs":"126a0202d606ea94d5b7ee4853afefb05f2546710210954fd0cc18af8674a511","src/write/str.rs":"4850cc2fee55980f9cbb6b4169f9861ab9d05c2b28a85c2b790480b83a66f514","src/write/unit.rs":"35419f917bd759ab026c9701ac0aef9a945ffb95a10f1c9c72608020206edf44","src/write/writer.rs":"7d5dd07b82ec3becebb060c106d4ea697cbd8b9b64a5de78403511a5244e08b1"},"package":"e2e1d97fbe9722ba9bbd0c97051c2956e726562b61f86a25a4360398a40edfc9"} \ No newline at end of file diff --git a/third_party/rust/gimli/CHANGELOG.md b/third_party/rust/gimli/CHANGELOG.md new file mode 100644 index 000000000000..18a269194854 --- /dev/null +++ b/third_party/rust/gimli/CHANGELOG.md @@ -0,0 +1,1102 @@ +# `gimli` Change Log + +-------------------------------------------------------------------------------- + +## 0.30.0 + +Released 2024/05/26. + +### Breaking changes + +* Added context to some `read::Error` variants. + [#703](https://github.com/gimli-rs/gimli/pull/703) + +* Changed type of `read::UnitIndexSection::section` to `IndexSectionId`. + [#716](https://github.com/gimli-rs/gimli/pull/716) + +### Changed + +* Fixed `write::Operation::ImplicitPointer::size`. + [#712](https://github.com/gimli-rs/gimli/pull/712) + +* Changed `read::RngListIter` and `read::LocListIter` to skip ranges where + the end is before the beginning, instead of returning an error. + [#715](https://github.com/gimli-rs/gimli/pull/715) + +* Fixed clippy warnings. + [#713](https://github.com/gimli-rs/gimli/pull/713) + +### Added + +* Added `read::UnitRef`. + [#711](https://github.com/gimli-rs/gimli/pull/711) + +-------------------------------------------------------------------------------- + +## 0.29.0 + +Released 2024/04/11. + +### Breaking changes + +* Changed `Reader` type parameter to `ReaderOffset` for `read::UnwindContext` and related types. + Replaced `Expression` with `UnwindExpression` in unwind information types. + [#703](https://github.com/gimli-rs/gimli/pull/703) + +### Changed + +* Changed `write::Sections::for_each` and `for_each_mut` to specify section lifetime. + [#699](https://github.com/gimli-rs/gimli/pull/699) + +* Fixed writing unwind information with an LSDA encoding that is not `DW_EH_PE_absptr`. + [#704](https://github.com/gimli-rs/gimli/pull/704) + +* Fixed parsing for an empty DWP index. + [#706](https://github.com/gimli-rs/gimli/pull/706) + +* Improved error handling in `read::Unit::dwo_name`. + [#693](https://github.com/gimli-rs/gimli/pull/693) + +* Fixed warnings. + [#692](https://github.com/gimli-rs/gimli/pull/692) + [#694](https://github.com/gimli-rs/gimli/pull/694) + [#695](https://github.com/gimli-rs/gimli/pull/695) + [#696](https://github.com/gimli-rs/gimli/pull/696) + +### Added + +* Added MIPS register definitions. + [#690](https://github.com/gimli-rs/gimli/pull/690) + +* Added PowerPC register definitions. + [#691](https://github.com/gimli-rs/gimli/pull/691) + +* Added `read::DwarfSections` and `read::DwarfPackageSections`. + [#698](https://github.com/gimli-rs/gimli/pull/698) + +* Implemented `BitOr` for `DwEhPe`. + [#709](https://github.com/gimli-rs/gimli/pull/709) + +* Added `read::Relocate`, `read::RelocateReader`, and `write::RelocateWriter`. + [#709](https://github.com/gimli-rs/gimli/pull/709) + +-------------------------------------------------------------------------------- + +## 0.28.1 + +Released 2023/11/24. + +### Changed + +* Changed `read::AbbreviationsCache` to require manual population using + `Dwarf::populate_abbreviations_cache`. + [#679](https://github.com/gimli-rs/gimli/pull/679) + +* Changed the default `read::UnwindContextStorage` to use `Box` instead of `Vec` + so that its memory usage is limited. + [#687](https://github.com/gimli-rs/gimli/pull/687) + +* Changed `read::UnwindTable::new` to always reset the context, because + previous errors may have left the context in an invalid state. + [#684](https://github.com/gimli-rs/gimli/pull/684) + +* Changed the `Debug` implementation for `read::EndianSlice` to limit the number + of bytes it displays. + [#686](https://github.com/gimli-rs/gimli/pull/686) + +### Added + +* Added more AArch64 register definitions. + [#680](https://github.com/gimli-rs/gimli/pull/680) + +* Added `read::Unit::new_with_abbreviations`. + [#677](https://github.com/gimli-rs/gimli/pull/677) + +* Added `read::Evaluation::value_result`. + [#676](https://github.com/gimli-rs/gimli/pull/676) + +-------------------------------------------------------------------------------- + +## 0.28.0 + +Released 2023/08/12. + +### Breaking changes + +* Deleted `impl From for &[u8]`. Use `EndianSlice::slice` instead. + [#669](https://github.com/gimli-rs/gimli/pull/669) + +* Deleted `impl Index for EndianSlice` and + `impl Index> for EndianSlice`. + [#669](https://github.com/gimli-rs/gimli/pull/669) + +* Replaced `impl From for u64` with `Pointer::pointer`. + [#670](https://github.com/gimli-rs/gimli/pull/670) + +* Updated `fallible-iterator` to 0.3.0. + [#672](https://github.com/gimli-rs/gimli/pull/672) + +* Changed some optional dependencies to use the `dep:` feature syntax. + [#672](https://github.com/gimli-rs/gimli/pull/672) + +* Added `non_exhaustive` attribute to `read::RegisterRule`, + `read::CallFrameInstruction`, and `write::CallFrameInstruction`. + [#673](https://github.com/gimli-rs/gimli/pull/673) + +### Changed + +* The minimum supported rust version for the `read` feature and its dependencies + increased to 1.60.0. + +* The minimum supported rust version for other features increased to 1.65.0. + +### Added + +* Added `Vendor`, `read::DebugFrame::set_vendor`, and `read::EhFrame::set_vendor`. + [#673](https://github.com/gimli-rs/gimli/pull/673) + +* Added more ARM and AArch64 register definitions, and + `DW_CFA_AARCH64_negate_ra_state` support. + [#673](https://github.com/gimli-rs/gimli/pull/673) + +-------------------------------------------------------------------------------- + +## 0.27.3 + +Released 2023/06/14. + +### Changed + +* Excluded test fixtures from published package. + [#661](https://github.com/gimli-rs/gimli/pull/661) + +### Added + +* Added `FallibleIterator` implementation for `read::OperationIter`. + [#649](https://github.com/gimli-rs/gimli/pull/649) + +* Added `DW_AT_GNU_deleted` constant. + [#658](https://github.com/gimli-rs/gimli/pull/658) + +-------------------------------------------------------------------------------- + +## 0.27.2 + +Released 2023/02/15. + +### Added + +* Added support for tombstones in `read::LineRows`. + [#642](https://github.com/gimli-rs/gimli/pull/642) + +-------------------------------------------------------------------------------- + +## 0.27.1 + +Released 2023/01/23. + +### Added + +* Added `SectionId::xcoff_name` and `read::Section::xcoff_section_name`. + [#635](https://github.com/gimli-rs/gimli/pull/635) + +* Added `read::Dwarf::make_dwo` and `read::Unit::dwo_name`. + [#637](https://github.com/gimli-rs/gimli/pull/637) + +### Changed + +* Changed `read::DwarfPackage::sections` to handle supplementary files. + [#638](https://github.com/gimli-rs/gimli/pull/638) + +-------------------------------------------------------------------------------- + +## 0.27.0 + +Released 2022/11/23. + +### Breaking changes + +* Added `read::Dwarf::abbreviations_cache` to cache abbreviations at offset 0. + Changed `read::Dwarf::abbreviations` to return `Result>`, + and changed `read::Unit::abbreviations` to `Arc`. + [#628](https://github.com/gimli-rs/gimli/pull/628) + +### Added + +* Added LoongArch register definitions. + [#624](https://github.com/gimli-rs/gimli/pull/624) + +* Added support for tombstones in `read::LocListIter` and `read::RngListIter`. + [#631](https://github.com/gimli-rs/gimli/pull/631) + +-------------------------------------------------------------------------------- + +## 0.26.2 + +Released 2022/07/16. + +### Changed + +* Fixed CFI personality encoding when writing. + [#609](https://github.com/gimli-rs/gimli/pull/609) + +* Fixed use of raw pointer for mutation, detected by Miri. + [#614](https://github.com/gimli-rs/gimli/pull/614) + +* Fixed `DW_OP_GNU_implicit_pointer` handling for DWARF version 2. + [#618](https://github.com/gimli-rs/gimli/pull/618) + +### Added + +* Added `read::EhHdrTable::iter`. + [#619](https://github.com/gimli-rs/gimli/pull/619) + +-------------------------------------------------------------------------------- + +## 0.26.1 + +Released 2021/11/02. + +### Changed + +* Fixed segmentation fault in `ArrayVec>::into_vec`, which may be used by + `read::Evaluation::result`. This regression was introduced in 0.26.0. + [#601](https://github.com/gimli-rs/gimli/pull/601) + +-------------------------------------------------------------------------------- + +## 0.26.0 + +Released 2021/10/24. + +### Breaking changes + +* Removed `read::UninitializedUnwindContext`. Use `Box` instead. + [#593](https://github.com/gimli-rs/gimli/pull/593) + +* Renamed `read::Error::CfiStackFull` to `StackFull`. + [#595](https://github.com/gimli-rs/gimli/pull/595) + +* Added `UnwindContextStorage` type parameter to `read::UnwindContext`, `read::UnwindTable`, + `read::UnwindTableRow`, and `read::RegisterRuleMap`. + [#595](https://github.com/gimli-rs/gimli/pull/595) + +* Added `EvaluationStorage` type parameter to `read::Evaluation`. + [#595](https://github.com/gimli-rs/gimli/pull/595) + +* Added `read::SectionId::DebugCuIndex` and `read::SectionId::DebugTuIndex`. + [#588](https://github.com/gimli-rs/gimli/pull/588) + +### Changed + +* Fixed `DW_EH_PE_pcrel` handling in default `write::Writer::write_eh_pointer` implementation. + [#576](https://github.com/gimli-rs/gimli/pull/576) + +* Fixed `read::AttributeSpecification::size` for some forms. + [#597](https://github.com/gimli-rs/gimli/pull/597) + +* Display more unit details in dwarfdump. + [#584](https://github.com/gimli-rs/gimli/pull/584) + +### Added + +* Added `write::DebuggingInformationEntry::delete_child`. + [#570](https://github.com/gimli-rs/gimli/pull/570) + +* Added ARM and AArch64 register definitions. + [#574](https://github.com/gimli-rs/gimli/pull/574) + [#577](https://github.com/gimli-rs/gimli/pull/577) + +* Added RISC-V register definitions. + [#579](https://github.com/gimli-rs/gimli/pull/579) + +* Added `read::DwarfPackage`, `read::DebugCuIndex`, and `read::DebugTuIndex`. + [#588](https://github.com/gimli-rs/gimli/pull/588) + +* Added `read-core` feature to allow building without `liballoc`. + [#596](https://github.com/gimli-rs/gimli/pull/596) + +* Added `read::EntriesRaw::skip_attributes`. + [#597](https://github.com/gimli-rs/gimli/pull/597) + +-------------------------------------------------------------------------------- + +## 0.25.0 + +Released 2021/07/26. + +### Breaking changes + +* `read::FrameDescriptionEntry::unwind_info_for_address` now returns a reference + instead of cloning. + [#557](https://github.com/gimli-rs/gimli/pull/557) + +* `read::AttributeValue::RangeListsRef` now contains a `RawRangeListsOffset` + to allow handling of GNU split DWARF extensions. + Use `read::Dwarf::ranges_offset_from_raw` to handle it. + [#568](https://github.com/gimli-rs/gimli/pull/568) + [#569](https://github.com/gimli-rs/gimli/pull/569) + +* Added `read::Unit::dwo_id`. + [#569](https://github.com/gimli-rs/gimli/pull/569) + +### Changed + +* `.debug_aranges` parsing now accepts version 3. + [#560](https://github.com/gimli-rs/gimli/pull/560) + +* `read::Dwarf::attr_ranges_offset` and its callers now handle GNU split DWARF extensions. + [#568](https://github.com/gimli-rs/gimli/pull/568) + [#569](https://github.com/gimli-rs/gimli/pull/569) + +### Added + +* Added `read::DebugLineStr::new`. + [#556](https://github.com/gimli-rs/gimli/pull/556) + +* Added `read::UnwindTable::into_current_row`. + [#557](https://github.com/gimli-rs/gimli/pull/557) + +* Added more `DW_LANG` constants. + [#565](https://github.com/gimli-rs/gimli/pull/565) + +* dwarfdump: added DWO parent support. + [#568](https://github.com/gimli-rs/gimli/pull/568) + +* Added `read::Dwarf` methods: `ranges_offset_from_raw`, `raw_ranges`, and `raw_locations`. + [#568](https://github.com/gimli-rs/gimli/pull/568) + [#569](https://github.com/gimli-rs/gimli/pull/569) + +-------------------------------------------------------------------------------- + +## 0.24.0 + +Released 2021/05/01. + +### Breaking changes + +* Minimum Rust version increased to 1.42.0. + +* Added `read::Dwarf::debug_aranges`. + [#539](https://github.com/gimli-rs/gimli/pull/539) + +* Replaced `read::DebugAranges::items` with `read::DebugAranges::headers`. + [#539](https://github.com/gimli-rs/gimli/pull/539) + +* Added `read::Operation::Wasm*`. + [#546](https://github.com/gimli-rs/gimli/pull/546) + +* `read::LineRow::line` now returns `Option`. + The `read::ColumnType::Column` variant now contains a `NonZeroU64`. + [#551](https://github.com/gimli-rs/gimli/pull/551) + +* Replaced `read::Dwarf::debug_str_sup` with `read::Dwarf::sup`. + Deleted `sup` parameter of `read::Dwarf::load`. + Added `read::Dwarf::load_sup`. + [#554](https://github.com/gimli-rs/gimli/pull/554) + +### Added + +* dwarfdump: Supplementary object file support. + [#552](https://github.com/gimli-rs/gimli/pull/552) + +### Changed + +* Support `DW_FORM_addrx*` for `DW_AT_low_pc`/`DW_AT_high_pc` in `read::Dwarf`. + [#541](https://github.com/gimli-rs/gimli/pull/541) + +* Performance improvement in `EndianReader`. + [#549](https://github.com/gimli-rs/gimli/pull/549) + +-------------------------------------------------------------------------------- + +## 0.23.0 + +Released 2020/10/27. + +### Breaking changes + +* Added more variants to `read::UnitType`. + Added `read::AttributeValue::DwoId` + [#521](https://github.com/gimli-rs/gimli/pull/521) + +* Replaced `CompilationUnitHeader` and `TypeUnitHeader` with `UnitHeader`. + Replaced `CompilationUnitHeadersIter` with `DebugInfoUnitHeadersIter`. + Replaced `TypeUnitHeadersIter` with `DebugTypesUnitHeadersIter`. + [#523](https://github.com/gimli-rs/gimli/pull/523) + + +### Added + +* Added read support for split DWARF. + [#527](https://github.com/gimli-rs/gimli/pull/527) + [#529](https://github.com/gimli-rs/gimli/pull/529) + +* Added `read::Dwarf::attr_address`. + [#524](https://github.com/gimli-rs/gimli/pull/524) + +* Added read support for `DW_AT_GNU_addr_base` and `DW_AT_GNU_ranges_base`. + [#525](https://github.com/gimli-rs/gimli/pull/525) + +* dwarfdump: Display index values for attributes. + [#526](https://github.com/gimli-rs/gimli/pull/526) + +* Added `name_to_register`. + [#532](https://github.com/gimli-rs/gimli/pull/532) + +-------------------------------------------------------------------------------- + +## 0.22.0 + +Released 2020/07/03. + +### Breaking changes + +* Fixed `UnitHeader::size_of_header` for DWARF 5 units. + [#518](https://github.com/gimli-rs/gimli/pull/518) + +### Added + +* Added fuzz targets in CI. + [#512](https://github.com/gimli-rs/gimli/pull/512) + +* Added read support for `DW_OP_GNU_addr_index` and `DW_OP_GNU_const_index`. + [#516](https://github.com/gimli-rs/gimli/pull/516) + +* Added `.dwo` support to dwarfdump. + [#516](https://github.com/gimli-rs/gimli/pull/516) + +* Added `SectionId::dwo_name` and `Section::dwo_section_name`. + [#517](https://github.com/gimli-rs/gimli/pull/517) + +### Fixed + +* Fixed panic when reading `DW_FORM_indirect` combined with `DW_FORM_implicit_const`. + [#502](https://github.com/gimli-rs/gimli/pull/502) + +* Fixed panic for `read::Abbreviations::get(0)`. + [#505](https://github.com/gimli-rs/gimli/pull/505) + +* Fixed arithmetic overflow when reading `.debug_line`. + [#508](https://github.com/gimli-rs/gimli/pull/508) + +* Fixed arithmetic overflow when reading CFI. + [#509](https://github.com/gimli-rs/gimli/pull/509) + +* Fixed arithmetic overflow and division by zero when reading `.debug_aranges`. + [#510](https://github.com/gimli-rs/gimli/pull/510) + +* Don't return error from `read::Unit::new` when `DW_AT_name` or `DW_AT_comp_dir` is missing. + [#515](https://github.com/gimli-rs/gimli/pull/515) + +-------------------------------------------------------------------------------- + +## 0.21.0 + +Released 2020/05/12. + +### Breaking changes + +* Minimum Rust version increased to 1.38.0. + +* Replaced `read::Operation::Literal` with `Operation::UnsignedConstant` and `Operation::SignedConstant`. + Changed `read::Operation::Bra` and `read::Operation::Skip` to contain the target offset instead of the bytecode. + [#479](https://github.com/gimli-rs/gimli/pull/479) + +* Changed `write::Expression` to support references. Existing users can convert to use `Expression::raw`. + [#479](https://github.com/gimli-rs/gimli/pull/479) + +* Replaced `write::AttributeValue::AnyUnitEntryRef` with `DebugInfoRef`. + Renamed `write::AttributeValue::ThisUnitEntryRef` to `UnitRef`. + [#479](https://github.com/gimli-rs/gimli/pull/479) + +* Added more optional features: `endian-reader` and `fallible-iterator`. + [#495](https://github.com/gimli-rs/gimli/pull/495) + [#498](https://github.com/gimli-rs/gimli/pull/498) + +### Added + +* Added `read::Expression::operations` + [#479](https://github.com/gimli-rs/gimli/pull/479) + +### Fixed + +* Fixed newlines in `dwarfdump` example. + [#470](https://github.com/gimli-rs/gimli/pull/470) + +* Ignore zero terminators when reading `.debug_frame` sections. + [#486](https://github.com/gimli-rs/gimli/pull/486) + +* Increase the number of CFI register rules supported by `read::UnwindContext`. + [#487](https://github.com/gimli-rs/gimli/pull/487) + +* Fixed version handling and return register encoding when reading `.eh_frame` sections. + [#493](https://github.com/gimli-rs/gimli/pull/493) + +### Changed + +* Added `EhFrame` and `DebugFrame` to `write::Sections`. + [#492](https://github.com/gimli-rs/gimli/pull/492) + +* Improved performance of `write::LineProgram::generate_row`. + [#476](https://github.com/gimli-rs/gimli/pull/476) + +* Removed use of the `byteorder`, `arrayvec` and `smallvec` crates. + [#494](https://github.com/gimli-rs/gimli/pull/494) + [#496](https://github.com/gimli-rs/gimli/pull/496) + [#497](https://github.com/gimli-rs/gimli/pull/497) + +-------------------------------------------------------------------------------- + +## 0.20.0 + +Released 2020/01/11. + +### Breaking changes + +* Changed type of `DwTag`, `DwAt`, and `DwForm` constants. + [#451](https://github.com/gimli-rs/gimli/pull/451) + +* Added `read/write::AttributeValue::DebugMacroRef`, and returned where + required in `read::Attribute::value`. Added `SectionId::DebugMacro`. + [#454](https://github.com/gimli-rs/gimli/pull/454) + +* Deleted `alloc` feature, and fixed `no-std` builds with stable rust. + [#459](https://github.com/gimli-rs/gimli/pull/459) + +* Deleted `read::Error::description`, and changed `` + to display what was previously the description. + [#462](https://github.com/gimli-rs/gimli/pull/462) + +### Added + +* Added GNU view constants. + [#434](https://github.com/gimli-rs/gimli/pull/434) + +* Added `read::EntriesRaw` for low level DIE parsing. + [#455](https://github.com/gimli-rs/gimli/pull/455) + +* Added `examples/simple-line.rs`. + [#460](https://github.com/gimli-rs/gimli/pull/460) + +### Fixed + +* Fixed handling of CFI augmentations without data. + [#438](https://github.com/gimli-rs/gimli/pull/438) + +* dwarfdump: fix panic for malformed expressions. + [#447](https://github.com/gimli-rs/gimli/pull/447) + +* dwarfdump: fix handling of Mach-O relocations. + [#449](https://github.com/gimli-rs/gimli/pull/449) + +### Changed + +* Improved abbreviation parsing performance. + [#451](https://github.com/gimli-rs/gimli/pull/451) + +-------------------------------------------------------------------------------- + +## 0.19.0 + +Released 2019/07/08. + +### Breaking changes + +* Small API changes related to `.debug_loc` and `.debug_loclists`: + added `read::RawLocListEntry::AddressOrOffsetPair` enum variant, + added `write::Sections::debug_loc/debug_loclists` public members, + and replaced `write::AttributeValue::LocationListsRef` with `LocationListRef`. + [#425](https://github.com/gimli-rs/gimli/pull/425) + +### Added + +* Added `read::Attribute::exprloc_value` and `read::AttributeValue::exprloc_value`. + [#422](https://github.com/gimli-rs/gimli/pull/422) + +* Added support for writing `.debug_loc` and `.debug_loclists` sections. + [#425](https://github.com/gimli-rs/gimli/pull/425) + +* Added `-G` flag to `dwarfdump` example to display global offsets. + [#427](https://github.com/gimli-rs/gimli/pull/427) + +* Added `examples/simple.rs`. + [#429](https://github.com/gimli-rs/gimli/pull/429) + +### Fixed + +* `write::LineProgram::from` no longer requires `DW_AT_name` or `DW_AT_comp_dir` + attributes to be present in the unit DIE. + [#430](https://github.com/gimli-rs/gimli/pull/430) + +-------------------------------------------------------------------------------- + +## 0.18.0 + +Released 2019/04/25. + +The focus of this release has been on improving support for reading CFI, +and adding support for writing CFI. + +### Breaking changes + +* For types which have an `Offset` type parameter, the default `Offset` + has changed from `usize` to `R::Offset`. + [#392](https://github.com/gimli-rs/gimli/pull/392) + +* Added an `Offset` type parameter to the `read::Unit` type to allow variance. + [#393](https://github.com/gimli-rs/gimli/pull/393) + +* Changed the `UninitializedUnwindContext::initialize` method to borrow `self`, + and return `&mut UnwindContext`. Deleted the `InitializedUnwindContext` type. + [#395](https://github.com/gimli-rs/gimli/pull/395) + +* Deleted the `UnwindSection` type parameters from the `CommonInformationEntry`, + `FrameDescriptionEntry`, `UninitializedUnwindContext`, + `UnwindContext`, and `UnwindTable` types. + [#399](https://github.com/gimli-rs/gimli/pull/399) + +* Changed the signature of the `get_cie` callback parameter for various functions. + The signature now matches the `UnwindSection::cie_from_offset` method, so + that method can be used as the parameter. + [#400](https://github.com/gimli-rs/gimli/pull/400) + +* Reduced the number of lifetime parameters for the `UnwindTable` type. + [#400](https://github.com/gimli-rs/gimli/pull/400) + +* Updated `fallible-iterator` to version 0.2.0. + [#407](https://github.com/gimli-rs/gimli/pull/407) + +* Added a parameter to the `Error::UnexpectedEof` enum variant. + [#408](https://github.com/gimli-rs/gimli/pull/408) + +### Added + +* Update to 2018 edition. + [#391](https://github.com/gimli-rs/gimli/pull/391) + +* Added the `FrameDescriptionEntry::unwind_info_for_address` method. + [#396](https://github.com/gimli-rs/gimli/pull/396) + +* Added the `FrameDescriptionEntry::rows` method. + [#396](https://github.com/gimli-rs/gimli/pull/396) + +* Added the `EhHdrTable::unwind_info_for_address` method. + [#400](https://github.com/gimli-rs/gimli/pull/400) + +* Added the `EhHdrTable::fde_for_address` method and deprecated the + `EhHdrTable::lookup_and_parse` method. + [#400](https://github.com/gimli-rs/gimli/pull/400) + +* Added the `EhHdrTable::pointer_to_offset` method. + [#400](https://github.com/gimli-rs/gimli/pull/400) + +* Added the `UnwindSection::fde_for_address` method. + [#396](https://github.com/gimli-rs/gimli/pull/396) + +* Added the `UnwindSection::fde_from_offset` method. + [#400](https://github.com/gimli-rs/gimli/pull/400) + +* Added the `UnwindSection::partial_fde_from_offset` method. + [#400](https://github.com/gimli-rs/gimli/pull/400) + +* Added the `Section::id` method. + [#406](https://github.com/gimli-rs/gimli/pull/406) + +* Added the `Dwarf::load` method, and corresponding methods for individual sections. + [#406](https://github.com/gimli-rs/gimli/pull/406) + +* Added the `Dwarf::borrow` method, and corresponding methods for individual sections. + [#406](https://github.com/gimli-rs/gimli/pull/406) + +* Added the `Dwarf::format_error` method. + [#408](https://github.com/gimli-rs/gimli/pull/408) + +* Added the `Dwarf::die_ranges` method. + [#417](https://github.com/gimli-rs/gimli/pull/417) + +* Added the `Dwarf::unit_ranges` method. + [#417](https://github.com/gimli-rs/gimli/pull/417) + +* Added support for writing `.debug_frame` and `.eh_frame` sections. + [#412](https://github.com/gimli-rs/gimli/pull/412) + [#419](https://github.com/gimli-rs/gimli/pull/419) + +### Fixed + +* The `code_alignment_factor` is now used when evaluating CFI instructions + that advance the location. + [#401](https://github.com/gimli-rs/gimli/pull/401) + +* Fixed parsing of pointers encoded with `DW_EH_PE_funcrel`. + [#402](https://github.com/gimli-rs/gimli/pull/402) + +* Use the FDE address encoding from the augmentation when parsing `DW_CFA_set_loc`. + [#403](https://github.com/gimli-rs/gimli/pull/403) + +* Fixed setting of `.eh_frame` base addresses in dwarfdump. + [#410](https://github.com/gimli-rs/gimli/pull/410) + +## 0.17.0 + +Released 2019/02/21. + +The focus of this release has been on improving DWARF 5 support, and +adding support for writing DWARF. + +### Breaking changes + +* Changed register values to a `Register` type instead of `u8`/`u64`. + [#328](https://github.com/gimli-rs/gimli/pull/328) + +* Replaced `BaseAddresses::set_cfi` with `set_eh_frame_hdr` and `set_eh_frame`. + Replaced `BaseAddresses::set_data` with `set_got`. + You should now use the same `BaseAddresses` value for parsing both + `.eh_frame` and `.eh_frame_hdr`. + [#351](https://github.com/gimli-rs/gimli/pull/351) + +* Renamed many types and functions related to `.debug_line`. + Renamed `LineNumberProgram` to `LineProgram`. + Renamed `IncompleteLineNumberProgram` to `IncompleteLineProgram`. + Renamed `CompleteLineNumberProgram` to `CompleteLineProgram`. + Renamed `LineNumberProgramHeader` to `LineProgramHeader`. + Renamed `LineNumberRow` to `LineRow`. + Renamed `StateMachine` to `LineRows`. + Renamed `Opcode` to `LineInstruction`. + Renamed `OpcodesIter` to `LineInstructions`. + Renamed `LineNumberSequence` to `LineSequence`. + [#359](https://github.com/gimli-rs/gimli/pull/359) + +* Added `Offset` type parameter to `AttributeValue`, `LineProgram`, + `IncompleteLineProgram`, `CompleteLineProgram`, `LineRows`, `LineInstruction`, + and `FileEntry`. + [#324](https://github.com/gimli-rs/gimli/pull/324) + +* Changed `FileEntry::path_name`, `FileEntry::directory`, and + `LineProgramHeader::directory` to return an `AttributeValue` instead + of a `Reader`. + [#366](https://github.com/gimli-rs/gimli/pull/366) + +* Renamed `FileEntry::last_modification` to `FileEntry::timestamp` + and renamed `FileEntry::length` to `FileEntry::size`. + [#366](https://github.com/gimli-rs/gimli/pull/366) + +* Added an `Encoding` type. Changed many functions that previously accepted + `Format`, version or address size parameters to accept an `Encoding` + parameter instead. + Notable changes are `LocationLists::locations`, `RangeLists::ranges`, + and `Expression::evaluation`. + [#364](https://github.com/gimli-rs/gimli/pull/364) + +* Changed return type of `LocationLists::new` and `RangeLists::new`. + [#370](https://github.com/gimli-rs/gimli/pull/370) + +* Added parameters to `LocationsLists::locations` and `RangeLists::ranges` + to support `.debug_addr`. + [#358](https://github.com/gimli-rs/gimli/pull/358) + +* Added more `AttributeValue` variants: `DebugAddrBase`, `DebugAddrIndex`, + `DebugLocListsBase`, `DebugLocListsIndex`, `DebugRngListsBase`, `DebugRngListsIndex`, + `DebugStrOffsetsBase`, `DebugStrOffsetsIndex`, `DebugLineStrRef`. + [#358](https://github.com/gimli-rs/gimli/pull/358) + +* Changed `AttributeValue::Data*` attributes to native endian integers instead + of byte arrays. + [#365](https://github.com/gimli-rs/gimli/pull/365) + +* Replaced `EvaluationResult::TextBase` with + `EvaluationResult::RequiresRelocatedAddress`. The handling of `TextBase` + was incorrect. + [#335](https://github.com/gimli-rs/gimli/pull/335) + +* Added `EvaluationResult::IndexedAddress` for operations that require an + address from `.debug_addr`. + [#358](https://github.com/gimli-rs/gimli/pull/358) + +* Added `Reader::read_slice`. Added a default implementation of + `Reader::read_u8_array` which uses this. + [#358](https://github.com/gimli-rs/gimli/pull/358) + +### Added + +* Added initial support for writing DWARF. This is targeted at supporting + line number information only. + [#340](https://github.com/gimli-rs/gimli/pull/340) + [#344](https://github.com/gimli-rs/gimli/pull/344) + [#346](https://github.com/gimli-rs/gimli/pull/346) + [#361](https://github.com/gimli-rs/gimli/pull/361) + [#362](https://github.com/gimli-rs/gimli/pull/362) + [#365](https://github.com/gimli-rs/gimli/pull/365) + [#368](https://github.com/gimli-rs/gimli/pull/368) + [#382](https://github.com/gimli-rs/gimli/pull/382) + +* Added `read` and `write` Cargo features. Both are enabled by default. + [#343](https://github.com/gimli-rs/gimli/pull/343) + +* Added support for reading DWARF 5 `.debug_line` and `.debug_line_str` sections. + [#366](https://github.com/gimli-rs/gimli/pull/366) + +* Added support for reading DWARF 5 `.debug_str_offsets` sections, including + parsing `DW_FORM_strx*` attributes. + [#358](https://github.com/gimli-rs/gimli/pull/358) + +* Added support for reading DWARF 5 `.debug_addr` sections, including parsing + `DW_FORM_addrx*` attributes and evaluating `DW_OP_addrx` and `DW_OP_constx` + operations. + [#358](https://github.com/gimli-rs/gimli/pull/358) + +* Added support for reading DWARF 5 indexed addresses and offsets in + `.debug_loclists` and `.debug_rnglists`, including parsing `DW_FORM_rnglistx` + and `DW_FORM_loclistx` attributes. + [#358](https://github.com/gimli-rs/gimli/pull/358) + +* Added high level `Dwarf` and `Unit` types. Existing code does not need to + switch to using these types, but doing so will make DWARF 5 support simpler. + [#352](https://github.com/gimli-rs/gimli/pull/352) + [#380](https://github.com/gimli-rs/gimli/pull/380) + [#381](https://github.com/gimli-rs/gimli/pull/381) + +* Added `EhFrame::set_address_size` and `DebugFrame::set_address_size` methods + to allow parsing non-native CFI sections. The default address size is still + the native size. + [#325](https://github.com/gimli-rs/gimli/pull/325) + +* Added architecture specific definitions for `Register` values and names. + Changed dwarfdump to print them. + [#328](https://github.com/gimli-rs/gimli/pull/328) + +* Added support for reading relocatable DWARF sections. + [#337](https://github.com/gimli-rs/gimli/pull/337) + +* Added parsing of `DW_FORM_data16`. + [#366](https://github.com/gimli-rs/gimli/pull/366) + +### Fixed + +* Fixed parsing DWARF 5 ranges with `start == end == 0`. + [#323](https://github.com/gimli-rs/gimli/pull/323) + +* Changed `LineRows` to be covariant in its `Reader` type parameter. + [#324](https://github.com/gimli-rs/gimli/pull/324) + +* Fixed handling of empty units in dwarfdump. + [#330](https://github.com/gimli-rs/gimli/pull/330) + +* Fixed `UnitHeader::length_including_self` for `Dwarf64`. + [#342](https://github.com/gimli-rs/gimli/pull/342) + +* Fixed parsing of `DW_CFA_set_loc`. + [#355](https://github.com/gimli-rs/gimli/pull/355) + +* Fixed handling of multiple headers in `.debug_loclists` and `.debug_rnglists`. + [#370](https://github.com/gimli-rs/gimli/pull/370) + +-------------------------------------------------------------------------------- + +## 0.16.1 + +Released 2018/08/28. + +### Added + +* Added `EhFrameHdr::lookup_and_parse`. [#316][] +* Added support for `DW_CFA_GNU_args_size`. [#319][] + +### Fixed + +* Implement `Send`/`Sync` for `SubRange`. [#305][] +* Fixed `alloc` support on nightly. [#306][] [#310][] + +[#305]: https://github.com/gimli-rs/gimli/pull/305 +[#306]: https://github.com/gimli-rs/gimli/pull/306 +[#310]: https://github.com/gimli-rs/gimli/pull/310 +[#316]: https://github.com/gimli-rs/gimli/pull/316 +[#319]: https://github.com/gimli-rs/gimli/pull/319 + +-------------------------------------------------------------------------------- + +## 0.16.0 + +Released 2018/06/01. + +### Added + +* Added support for building in `#![no_std]` environments, when the `alloc` + crate is available. Disable the "std" feature and enable the "alloc" + feature. [#138][] [#271][] + +* Added support for DWARF 5 `.debug_rnglists` and `.debug_loclists` + sections. [#272][] + +* Added support for DWARF 5 `DW_FORM_ref_sup` and `DW_FORM_strp_sup` attribute + forms. [#288][] + +* Added support for DWARF 5 operations on typed values. [#293][] + +* A `dwarf-validate` example program that checks the integrity of the given + DWARF and its references between sections. [#290][] + +* Added the `EndianReader` type, an easy way to define a custom `Reader` + implementation with a reference to a generic buffer of bytes and an associated + endianity. [#298][] [#302][] + +### Changed + +* Various speed improvements for evaluating `.debug_line` line number + programs. [#276][] + +* The example `dwarfdump` clone is a [whole lot faster + now][dwarfdump-faster]. [#282][] [#284][] [#285][] + +### Deprecated + +* `EndianBuf` has been renamed to `EndianSlice`, use that name instead. [#295][] + +### Fixed + +* Evaluating the `DW_CFA_restore_state` opcode properly maintains the current + location. Previously it would incorrectly restore the old location when + popping from evaluation stack. [#274][] + +[#271]: https://github.com/gimli-rs/gimli/issues/271 +[#138]: https://github.com/gimli-rs/gimli/issues/138 +[#274]: https://github.com/gimli-rs/gimli/issues/274 +[#272]: https://github.com/gimli-rs/gimli/issues/272 +[#276]: https://github.com/gimli-rs/gimli/issues/276 +[#282]: https://github.com/gimli-rs/gimli/issues/282 +[#285]: https://github.com/gimli-rs/gimli/issues/285 +[#284]: https://github.com/gimli-rs/gimli/issues/284 +[#288]: https://github.com/gimli-rs/gimli/issues/288 +[#290]: https://github.com/gimli-rs/gimli/issues/290 +[#293]: https://github.com/gimli-rs/gimli/issues/293 +[#295]: https://github.com/gimli-rs/gimli/issues/295 +[#298]: https://github.com/gimli-rs/gimli/issues/298 +[#302]: https://github.com/gimli-rs/gimli/issues/302 +[dwarfdump-faster]: https://robert.ocallahan.org/2018/03/speeding-up-dwarfdump-with-rust.html + +-------------------------------------------------------------------------------- + +## 0.15.0 + +Released 2017/12/01. + +### Added + +* Added the `EndianBuf::to_string()` method. [#233][] + +* Added more robust error handling in our example `dwarfdump` clone. [#234][] + +* Added `FrameDescriptionEntry::initial_address` method. [#237][] + +* Added `FrameDescriptionEntry::len` method. [#237][] + +* Added the `FrameDescriptionEntry::entry_len` method. [#241][] + +* Added the `CommonInformationEntry::offset` method. [#241][] + +* Added the `CommonInformationEntry::entry_len` method. [#241][] + +* Added the `CommonInformationEntry::version` method. [#241][] + +* Added the `CommonInformationEntry::augmentation` method. [#241][] + +* Added the `CommonInformationEntry::code_alignment_factor` method. [#241][] + +* Added the `CommonInformationEntry::data_alignment_factor` method. [#241][] + +* Added the `CommonInformationEntry::return_address_register` method. [#241][] + +* Added support for printing `.eh_frame` sections to our example `dwarfdump` + clone. [#241][] + +* Added support for parsing the `.eh_frame_hdr` section. On Linux, the + `.eh_frame_hdr` section provides a pointer to the already-mapped-in-memory + `.eh_frame` data, so that it doesn't need to be duplicated, and a binary + search table of its entries for faster unwinding information lookups. [#250][] + +* Added support for parsing DWARF 5 compilation unit headers. [#257][] + +* Added support for DWARF 5's `DW_FORM_implicit_const`. [#257][] + +### Changed + +* Unwinding methods now give ownership of the unwinding context back to the + caller if errors are encountered, not just on the success path. This allows + recovering from errors in signal-safe code, where constructing a new unwinding + context is not an option because it requires allocation. This is a **breaking + change** affecting `UnwindSection::unwind_info_for_address` and + `UninitializedUnwindContext::initialize`. [#241][] + +* `CfaRule` and `RegisterRule` now expose their `DW_OP` expressions as + `Expression`. This is a minor **breaking change**. [#241][] + +* The `Error::UnknownVersion` variant now contains the unknown version + number. This is a minor **breaking change**. [#245][] + +* `EvaluationResult::RequiresEntryValue` requires an `Expression` instead of a + `Reader` now. This is a minor **breaking change**. [#256][] + + +[#233]: https://github.com/gimli-rs/gimli/pull/233 +[#234]: https://github.com/gimli-rs/gimli/pull/234 +[#237]: https://github.com/gimli-rs/gimli/pull/237 +[#241]: https://github.com/gimli-rs/gimli/pull/241 +[#245]: https://github.com/gimli-rs/gimli/pull/245 +[#250]: https://github.com/gimli-rs/gimli/pull/250 +[#256]: https://github.com/gimli-rs/gimli/pull/256 +[#257]: https://github.com/gimli-rs/gimli/pull/257 + +-------------------------------------------------------------------------------- + +## 0.14.0 + +Released 2017/08/08. + +### Added + +* All `pub` types now `derive(Hash)`. [#192][] + +* All the constants from DWARF 5 are now defined. [#193][] + +* Added support for the `DW_OP_GNU_parameter_ref` GNU extension to parsing and + evaluation DWARF opcodes. [#208][] + +* Improved LEB128 parsing performance. [#216][] + +* Improved `.debug_{aranges,pubnames,pubtypes}` parsing performance. [#218][] + +* Added the ability to choose endianity dynamically at run time, rather than + only statically at compile time. [#219][] + +### Changed + +* The biggest change of this release is that `gimli` no longer requires the + object file's section be fully loaded into memory. This enables using `gimli` + on 32 bit platforms where there often isn't enough contiguous virtual memory + address space to load debugging information into. The default behavior is + still geared for 64 bit platforms, where address space overfloweth, and you + can still load the whole sections of the object file (or the entire object + file) into memory. This is abstracted over with the `gimli::Reader` + trait. This manifests as small (but many) breaking changes to much of the + public API. [#182][] + +### Fixed + +* The `DW_END_*` constants for defining endianity of a compilation unit were + previously incorrect. [#193][] + +* The `DW_OP_addr` opcode is relative to the base address of the `.text` section + of the binary, but we were incorrectly treating it as an absolute value. [#210][] + +[GitHub]: https://github.com/gimli-rs/gimli +[crates.io]: https://crates.io/crates/gimli +[contributing]: https://github.com/gimli-rs/gimli/blob/master/CONTRIBUTING.md +[easy]: https://github.com/gimli-rs/gimli/issues?q=is%3Aopen+is%3Aissue+label%3Aeasy +[#192]: https://github.com/gimli-rs/gimli/pull/192 +[#193]: https://github.com/gimli-rs/gimli/pull/193 +[#182]: https://github.com/gimli-rs/gimli/issues/182 +[#208]: https://github.com/gimli-rs/gimli/pull/208 +[#210]: https://github.com/gimli-rs/gimli/pull/210 +[#216]: https://github.com/gimli-rs/gimli/pull/216 +[#218]: https://github.com/gimli-rs/gimli/pull/218 +[#219]: https://github.com/gimli-rs/gimli/pull/219 diff --git a/third_party/rust/gimli/Cargo.toml b/third_party/rust/gimli/Cargo.toml new file mode 100644 index 000000000000..4a0be3a4b8c0 --- /dev/null +++ b/third_party/rust/gimli/Cargo.toml @@ -0,0 +1,109 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +rust-version = "1.60" +name = "gimli" +version = "0.30.0" +include = [ + "/CHANGELOG.md", + "/Cargo.toml", + "/LICENSE-APACHE", + "/LICENSE-MIT", + "/README.md", + "/src", +] +description = "A library for reading and writing the DWARF debugging format." +documentation = "https://docs.rs/gimli" +readme = "./README.md" +keywords = [ + "DWARF", + "debug", + "ELF", + "eh_frame", +] +categories = [ + "development-tools::debugging", + "development-tools::profiling", + "parser-implementations", +] +license = "MIT OR Apache-2.0" +repository = "https://github.com/gimli-rs/gimli" +resolver = "2" + +[profile.bench] +codegen-units = 1 +debug = 2 +split-debuginfo = "packed" + +[profile.test] +split-debuginfo = "packed" + +[dependencies.alloc] +version = "1.0.0" +optional = true +package = "rustc-std-workspace-alloc" + +[dependencies.compiler_builtins] +version = "0.1.2" +optional = true + +[dependencies.core] +version = "1.0.0" +optional = true +package = "rustc-std-workspace-core" + +[dependencies.fallible-iterator] +version = "0.3.0" +optional = true +default-features = false + +[dependencies.indexmap] +version = "2.0.0" +optional = true + +[dependencies.stable_deref_trait] +version = "1.1.0" +optional = true +default-features = false + +[dev-dependencies.test-assembler] +version = "0.1.3" + +[features] +default = [ + "read-all", + "write", +] +endian-reader = [ + "read", + "dep:stable_deref_trait", +] +fallible-iterator = ["dep:fallible-iterator"] +read = ["read-core"] +read-all = [ + "read", + "std", + "fallible-iterator", + "endian-reader", +] +read-core = [] +rustc-dep-of-std = [ + "dep:core", + "dep:alloc", + "dep:compiler_builtins", +] +std = [ + "fallible-iterator?/std", + "stable_deref_trait?/std", +] +write = ["dep:indexmap"] diff --git a/third_party/rust/gimli/LICENSE-APACHE b/third_party/rust/gimli/LICENSE-APACHE new file mode 100644 index 000000000000..16fe87b06e80 --- /dev/null +++ b/third_party/rust/gimli/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/third_party/rust/gimli/LICENSE-MIT b/third_party/rust/gimli/LICENSE-MIT new file mode 100644 index 000000000000..e69282e381bc --- /dev/null +++ b/third_party/rust/gimli/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2015 The Rust Project Developers + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/gimli/README.md b/third_party/rust/gimli/README.md new file mode 100644 index 000000000000..991e185d0868 --- /dev/null +++ b/third_party/rust/gimli/README.md @@ -0,0 +1,81 @@ +# `gimli` + +[![](https://img.shields.io/crates/v/gimli.svg) ![](https://img.shields.io/crates/d/gimli.svg)](https://crates.io/crates/gimli) +[![](https://docs.rs/gimli/badge.svg)](https://docs.rs/gimli/) +[![Build Status](https://github.com/gimli-rs/gimli/workflows/Rust/badge.svg)](https://github.com/gimli-rs/gimli/actions) +[![Coverage Status](https://coveralls.io/repos/github/gimli-rs/gimli/badge.svg?branch=master)](https://coveralls.io/github/gimli-rs/gimli?branch=master) + +`gimli` is a library for reading and writing the +[DWARF debugging format](https://dwarfstd.org/). + +* **Zero copy:** everything is just a reference to the original input buffer. No + copies of the input data get made. + +* **Lazy:** you can iterate compilation units without parsing their + contents. Parse only as many debugging information entry (DIE) trees as you + iterate over. `gimli` also uses `DW_AT_sibling` references to avoid parsing a + DIE's children to find its next sibling, when possible. + +* **Cross-platform:** `gimli` makes no assumptions about what kind of object + file you're working with. The flipside to that is that it's up to you to + provide an ELF loader on Linux or Mach-O loader on macOS. + + * Unsure which object file parser to use? Try the cross-platform + [`object`](https://github.com/gimli-rs/object) crate. See the + [`gimli-examples`](./crates/examples/src/bin) crate for usage with `gimli`. + +## Install + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +gimli = "0.30.0" +``` + +The minimum supported Rust version is: + +* 1.60.0 for the `read` feature and its dependencies. +* 1.65.0 for other features. + +## Documentation + +* [Documentation on docs.rs](https://docs.rs/gimli/) + +* Example programs: + + * [A simple `.debug_info` parser](./crates/examples/src/bin/simple.rs) + + * [A simple `.debug_line` parser](./crates/examples/src/bin/simple_line.rs) + + * [A `dwarfdump` clone](./crates/examples/src/bin/dwarfdump.rs) + + * [An `addr2line` clone](https://github.com/gimli-rs/addr2line) + + * [`ddbug`](https://github.com/gimli-rs/ddbug), a utility giving insight into + code generation by making debugging information readable. + + * [`dwprod`](https://github.com/fitzgen/dwprod), a tiny utility to list the + compilers used to create each compilation unit within a shared library or + executable (via `DW_AT_producer`). + + * [`dwarf-validate`](./crates/examples/src/bin/dwarf-validate.rs), a program to validate the + integrity of some DWARF and its references between sections and compilation + units. + +## License + +Licensed under either of + + * Apache License, Version 2.0 ([`LICENSE-APACHE`](./LICENSE-APACHE) or https://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([`LICENSE-MIT`](./LICENSE-MIT) or https://opensource.org/licenses/MIT) + +at your option. + +## Contribution + +See [CONTRIBUTING.md](./CONTRIBUTING.md) for hacking. + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. diff --git a/third_party/rust/gimli/src/arch.rs b/third_party/rust/gimli/src/arch.rs new file mode 100644 index 000000000000..9ee5159471f1 --- /dev/null +++ b/third_party/rust/gimli/src/arch.rs @@ -0,0 +1,1088 @@ +use crate::common::Register; + +macro_rules! registers { + ($struct_name:ident, { $($name:ident = ($val:expr, $disp:expr)),+ $(,)? } + $(, aliases { $($alias_name:ident = ($alias_val:expr, $alias_disp:expr)),+ $(,)? })?) => { + #[allow(missing_docs)] + impl $struct_name { + $( + pub const $name: Register = Register($val); + )+ + $( + $(pub const $alias_name: Register = Register($alias_val);)+ + )* + } + + impl $struct_name { + /// The name of a register, or `None` if the register number is unknown. + /// + /// Only returns the primary name for registers that alias with others. + pub fn register_name(register: Register) -> Option<&'static str> { + match register { + $( + Self::$name => Some($disp), + )+ + _ => return None, + } + } + + /// Converts a register name into a register number. + pub fn name_to_register(value: &str) -> Option { + match value { + $( + $disp => Some(Self::$name), + )+ + $( + $($alias_disp => Some(Self::$alias_name),)+ + )* + _ => return None, + } + } + } + }; +} + +/// ARM architecture specific definitions. +/// +/// See [DWARF for the ARM Architecture]( +/// https://github.com/ARM-software/abi-aa/blob/main/aadwarf32/aadwarf32.rst). +#[derive(Debug, Clone, Copy)] +pub struct Arm; + +registers!(Arm, { + R0 = (0, "R0"), + R1 = (1, "R1"), + R2 = (2, "R2"), + R3 = (3, "R3"), + R4 = (4, "R4"), + R5 = (5, "R5"), + R6 = (6, "R6"), + R7 = (7, "R7"), + R8 = (8, "R8"), + R9 = (9, "R9"), + R10 = (10, "R10"), + R11 = (11, "R11"), + R12 = (12, "R12"), + R13 = (13, "R13"), + R14 = (14, "R14"), + R15 = (15, "R15"), + + WCGR0 = (104, "wCGR0"), + WCGR1 = (105, "wCGR1"), + WCGR2 = (106, "wCGR2"), + WCGR3 = (107, "wCGR3"), + WCGR4 = (108, "wCGR4"), + WCGR5 = (109, "wCGR5"), + WCGR6 = (110, "wCGR6"), + WCGR7 = (111, "wCGR7"), + + WR0 = (112, "wR0"), + WR1 = (113, "wR1"), + WR2 = (114, "wR2"), + WR3 = (115, "wR3"), + WR4 = (116, "wR4"), + WR5 = (117, "wR5"), + WR6 = (118, "wR6"), + WR7 = (119, "wR7"), + WR8 = (120, "wR8"), + WR9 = (121, "wR9"), + WR10 = (122, "wR10"), + WR11 = (123, "wR11"), + WR12 = (124, "wR12"), + WR13 = (125, "wR13"), + WR14 = (126, "wR14"), + WR15 = (127, "wR15"), + + SPSR = (128, "SPSR"), + SPSR_FIQ = (129, "SPSR_FIQ"), + SPSR_IRQ = (130, "SPSR_IRQ"), + SPSR_ABT = (131, "SPSR_ABT"), + SPSR_UND = (132, "SPSR_UND"), + SPSR_SVC = (133, "SPSR_SVC"), + + RA_AUTH_CODE = (143, "RA_AUTH_CODE"), + + R8_USR = (144, "R8_USR"), + R9_USR = (145, "R9_USR"), + R10_USR = (146, "R10_USR"), + R11_USR = (147, "R11_USR"), + R12_USR = (148, "R12_USR"), + R13_USR = (149, "R13_USR"), + R14_USR = (150, "R14_USR"), + + R8_FIQ = (151, "R8_FIQ"), + R9_FIQ = (152, "R9_FIQ"), + R10_FIQ = (153, "R10_FIQ"), + R11_FIQ = (154, "R11_FIQ"), + R12_FIQ = (155, "R12_FIQ"), + R13_FIQ = (156, "R13_FIQ"), + R14_FIQ = (157, "R14_FIQ"), + + R13_IRQ = (158, "R13_IRQ"), + R14_IRQ = (159, "R14_IRQ"), + + R13_ABT = (160, "R13_ABT"), + R14_ABT = (161, "R14_ABT"), + + R13_UND = (162, "R13_UND"), + R14_UND = (163, "R14_UND"), + + R13_SVC = (164, "R13_SVC"), + R14_SVC = (165, "R14_SVC"), + + WC0 = (192, "wC0"), + WC1 = (193, "wC1"), + WC2 = (194, "wC2"), + WC3 = (195, "wC3"), + WC4 = (196, "wC4"), + WC5 = (197, "wC5"), + WC6 = (198, "wC6"), + WC7 = (199, "wC7"), + + D0 = (256, "D0"), + D1 = (257, "D1"), + D2 = (258, "D2"), + D3 = (259, "D3"), + D4 = (260, "D4"), + D5 = (261, "D5"), + D6 = (262, "D6"), + D7 = (263, "D7"), + D8 = (264, "D8"), + D9 = (265, "D9"), + D10 = (266, "D10"), + D11 = (267, "D11"), + D12 = (268, "D12"), + D13 = (269, "D13"), + D14 = (270, "D14"), + D15 = (271, "D15"), + D16 = (272, "D16"), + D17 = (273, "D17"), + D18 = (274, "D18"), + D19 = (275, "D19"), + D20 = (276, "D20"), + D21 = (277, "D21"), + D22 = (278, "D22"), + D23 = (279, "D23"), + D24 = (280, "D24"), + D25 = (281, "D25"), + D26 = (282, "D26"), + D27 = (283, "D27"), + D28 = (284, "D28"), + D29 = (285, "D29"), + D30 = (286, "D30"), + D31 = (287, "D31"), + + TPIDRURO = (320, "TPIDRURO"), + TPIDRURW = (321, "TPIDRURW"), + TPIDPR = (322, "TPIDPR"), + HTPIDPR = (323, "HTPIDPR"), +}, +aliases { + SP = (13, "SP"), + LR = (14, "LR"), + PC = (15, "PC"), + + ACC0 = (104, "ACC0"), + ACC1 = (105, "ACC1"), + ACC2 = (106, "ACC2"), + ACC3 = (107, "ACC3"), + ACC4 = (108, "ACC4"), + ACC5 = (109, "ACC5"), + ACC6 = (110, "ACC6"), + ACC7 = (111, "ACC7"), + + S0 = (256, "S0"), + S1 = (256, "S1"), + S2 = (257, "S2"), + S3 = (257, "S3"), + S4 = (258, "S4"), + S5 = (258, "S5"), + S6 = (259, "S6"), + S7 = (259, "S7"), + S8 = (260, "S8"), + S9 = (260, "S9"), + S10 = (261, "S10"), + S11 = (261, "S11"), + S12 = (262, "S12"), + S13 = (262, "S13"), + S14 = (263, "S14"), + S15 = (263, "S15"), + S16 = (264, "S16"), + S17 = (264, "S17"), + S18 = (265, "S18"), + S19 = (265, "S19"), + S20 = (266, "S20"), + S21 = (266, "S21"), + S22 = (267, "S22"), + S23 = (267, "S23"), + S24 = (268, "S24"), + S25 = (268, "S25"), + S26 = (269, "S26"), + S27 = (269, "S27"), + S28 = (270, "S28"), + S29 = (270, "S29"), + S30 = (271, "S30"), + S31 = (271, "S31"), +}); + +/// ARM 64-bit (AArch64) architecture specific definitions. +/// +/// See [DWARF for the ARM 64-bit Architecture]( +/// https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst). +#[derive(Debug, Clone, Copy)] +pub struct AArch64; + +registers!(AArch64, { + X0 = (0, "X0"), + X1 = (1, "X1"), + X2 = (2, "X2"), + X3 = (3, "X3"), + X4 = (4, "X4"), + X5 = (5, "X5"), + X6 = (6, "X6"), + X7 = (7, "X7"), + X8 = (8, "X8"), + X9 = (9, "X9"), + X10 = (10, "X10"), + X11 = (11, "X11"), + X12 = (12, "X12"), + X13 = (13, "X13"), + X14 = (14, "X14"), + X15 = (15, "X15"), + X16 = (16, "X16"), + X17 = (17, "X17"), + X18 = (18, "X18"), + X19 = (19, "X19"), + X20 = (20, "X20"), + X21 = (21, "X21"), + X22 = (22, "X22"), + X23 = (23, "X23"), + X24 = (24, "X24"), + X25 = (25, "X25"), + X26 = (26, "X26"), + X27 = (27, "X27"), + X28 = (28, "X28"), + X29 = (29, "X29"), + X30 = (30, "X30"), + SP = (31, "SP"), + PC = (32, "PC"), + ELR_MODE = (33, "ELR_mode"), + RA_SIGN_STATE = (34, "RA_SIGN_STATE"), + TPIDRRO_EL0 = (35, "TPIDRRO_EL0"), + TPIDR_EL0 = (36, "TPIDR_EL0"), + TPIDR_EL1 = (37, "TPIDR_EL1"), + TPIDR_EL2 = (38, "TPIDR_EL2"), + TPIDR_EL3 = (39, "TPIDR_EL3"), + + VG = (46, "VG"), + FFR = (47, "FFR"), + + P0 = (48, "P0"), + P1 = (49, "P1"), + P2 = (50, "P2"), + P3 = (51, "P3"), + P4 = (52, "P4"), + P5 = (53, "P5"), + P6 = (54, "P6"), + P7 = (55, "P7"), + P8 = (56, "P8"), + P9 = (57, "P9"), + P10 = (58, "P10"), + P11 = (59, "P11"), + P12 = (60, "P12"), + P13 = (61, "P13"), + P14 = (62, "P14"), + P15 = (63, "P15"), + + V0 = (64, "V0"), + V1 = (65, "V1"), + V2 = (66, "V2"), + V3 = (67, "V3"), + V4 = (68, "V4"), + V5 = (69, "V5"), + V6 = (70, "V6"), + V7 = (71, "V7"), + V8 = (72, "V8"), + V9 = (73, "V9"), + V10 = (74, "V10"), + V11 = (75, "V11"), + V12 = (76, "V12"), + V13 = (77, "V13"), + V14 = (78, "V14"), + V15 = (79, "V15"), + V16 = (80, "V16"), + V17 = (81, "V17"), + V18 = (82, "V18"), + V19 = (83, "V19"), + V20 = (84, "V20"), + V21 = (85, "V21"), + V22 = (86, "V22"), + V23 = (87, "V23"), + V24 = (88, "V24"), + V25 = (89, "V25"), + V26 = (90, "V26"), + V27 = (91, "V27"), + V28 = (92, "V28"), + V29 = (93, "V29"), + V30 = (94, "V30"), + V31 = (95, "V31"), + + Z0 = (96, "Z0"), + Z1 = (97, "Z1"), + Z2 = (98, "Z2"), + Z3 = (99, "Z3"), + Z4 = (100, "Z4"), + Z5 = (101, "Z5"), + Z6 = (102, "Z6"), + Z7 = (103, "Z7"), + Z8 = (104, "Z8"), + Z9 = (105, "Z9"), + Z10 = (106, "Z10"), + Z11 = (107, "Z11"), + Z12 = (108, "Z12"), + Z13 = (109, "Z13"), + Z14 = (110, "Z14"), + Z15 = (111, "Z15"), + Z16 = (112, "Z16"), + Z17 = (113, "Z17"), + Z18 = (114, "Z18"), + Z19 = (115, "Z19"), + Z20 = (116, "Z20"), + Z21 = (117, "Z21"), + Z22 = (118, "Z22"), + Z23 = (119, "Z23"), + Z24 = (120, "Z24"), + Z25 = (121, "Z25"), + Z26 = (122, "Z26"), + Z27 = (123, "Z27"), + Z28 = (124, "Z28"), + Z29 = (125, "Z29"), + Z30 = (126, "Z30"), + Z31 = (127, "Z31"), +}); + +/// LoongArch architecture specific definitions. +/// +/// See [LoongArch ELF psABI specification](https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html). +#[derive(Debug, Clone, Copy)] +pub struct LoongArch; + +registers!(LoongArch, { + R0 = (0, "$r0"), + R1 = (1, "$r1"), + R2 = (2, "$r2"), + R3 = (3, "$r3"), + R4 = (4, "$r4"), + R5 = (5, "$r5"), + R6 = (6, "$r6"), + R7 = (7, "$r7"), + R8 = (8, "$r8"), + R9 = (9, "$r9"), + R10 = (10, "$r10"), + R11 = (11, "$r11"), + R12 = (12, "$r12"), + R13 = (13, "$r13"), + R14 = (14, "$r14"), + R15 = (15, "$r15"), + R16 = (16, "$r16"), + R17 = (17, "$r17"), + R18 = (18, "$r18"), + R19 = (19, "$r19"), + R20 = (20, "$r20"), + R21 = (21, "$r21"), + R22 = (22, "$r22"), + R23 = (23, "$r23"), + R24 = (24, "$r24"), + R25 = (25, "$r25"), + R26 = (26, "$r26"), + R27 = (27, "$r27"), + R28 = (28, "$r28"), + R29 = (29, "$r29"), + R30 = (30, "$r30"), + R31 = (31, "$r31"), + + F0 = (32, "$f0"), + F1 = (33, "$f1"), + F2 = (34, "$f2"), + F3 = (35, "$f3"), + F4 = (36, "$f4"), + F5 = (37, "$f5"), + F6 = (38, "$f6"), + F7 = (39, "$f7"), + F8 = (40, "$f8"), + F9 = (41, "$f9"), + F10 = (42, "$f10"), + F11 = (43, "$f11"), + F12 = (44, "$f12"), + F13 = (45, "$f13"), + F14 = (46, "$f14"), + F15 = (47, "$f15"), + F16 = (48, "$f16"), + F17 = (49, "$f17"), + F18 = (50, "$f18"), + F19 = (51, "$f19"), + F20 = (52, "$f20"), + F21 = (53, "$f21"), + F22 = (54, "$f22"), + F23 = (55, "$f23"), + F24 = (56, "$f24"), + F25 = (57, "$f25"), + F26 = (58, "$f26"), + F27 = (59, "$f27"), + F28 = (60, "$f28"), + F29 = (61, "$f29"), + F30 = (62, "$f30"), + F31 = (63, "$f31"), + FCC0 = (64, "$fcc0"), + FCC1 = (65, "$fcc1"), + FCC2 = (66, "$fcc2"), + FCC3 = (67, "$fcc3"), + FCC4 = (68, "$fcc4"), + FCC5 = (69, "$fcc5"), + FCC6 = (70, "$fcc6"), + FCC7 = (71, "$fcc7"), +}, +aliases { + ZERO = (0, "$zero"), + RA = (1, "$ra"), + TP = (2, "$tp"), + SP = (3, "$sp"), + A0 = (4, "$a0"), + A1 = (5, "$a1"), + A2 = (6, "$a2"), + A3 = (7, "$a3"), + A4 = (8, "$a4"), + A5 = (9, "$a5"), + A6 = (10, "$a6"), + A7 = (11, "$a7"), + T0 = (12, "$t0"), + T1 = (13, "$t1"), + T2 = (14, "$t2"), + T3 = (15, "$t3"), + T4 = (16, "$t4"), + T5 = (17, "$t5"), + T6 = (18, "$t6"), + T7 = (19, "$t7"), + T8 = (20, "$t8"), + FP = (22, "$fp"), + S0 = (23, "$s0"), + S1 = (24, "$s1"), + S2 = (25, "$s2"), + S3 = (26, "$s3"), + S4 = (27, "$s4"), + S5 = (28, "$s5"), + S6 = (29, "$s6"), + S7 = (30, "$s7"), + S8 = (31, "$s8"), + + FA0 = (32, "$fa0"), + FA1 = (33, "$fa1"), + FA2 = (34, "$fa2"), + FA3 = (35, "$fa3"), + FA4 = (36, "$fa4"), + FA5 = (37, "$fa5"), + FA6 = (38, "$fa6"), + FA7 = (39, "$fa7"), + FT0 = (40, "$ft0"), + FT1 = (41, "$ft1"), + FT2 = (42, "$ft2"), + FT3 = (43, "$ft3"), + FT4 = (44, "$ft4"), + FT5 = (45, "$ft5"), + FT6 = (46, "$ft6"), + FT7 = (47, "$ft7"), + FT8 = (48, "$ft8"), + FT9 = (49, "$ft9"), + FT10 = (50, "$ft10"), + FT11 = (51, "$ft11"), + FT12 = (52, "$ft12"), + FT13 = (53, "$ft13"), + FT14 = (54, "$ft14"), + FT15 = (55, "$ft15"), + FS0 = (56, "$fs0"), + FS1 = (57, "$fs1"), + FS2 = (58, "$fs2"), + FS3 = (59, "$fs3"), + FS4 = (60, "$fs4"), + FS5 = (61, "$fs5"), + FS6 = (62, "$fs6"), + FS7 = (63, "$fs7"), +}); + +/// MIPS architecture specific definitions. +/// +/// See [MIPS Details](https://en.wikibooks.org/wiki/MIPS_Assembly/MIPS_Details). +#[derive(Debug, Clone, Copy)] +pub struct MIPS; + +registers!(MIPS, { + R0 = (0, "$0"), + R1 = (1, "$1"), + R2 = (2, "$2"), + R3 = (3, "$3"), + R4 = (4, "$4"), + R5 = (5, "$5"), + R6 = (6, "$6"), + R7 = (7, "$7"), + R8 = (8, "$8"), + R9 = (9, "$9"), + R10 = (10, "$10"), + R11 = (11, "$11"), + R12 = (12, "$12"), + R13 = (13, "$13"), + R14 = (14, "$14"), + R15 = (15, "$15"), + R16 = (16, "$16"), + R17 = (17, "$17"), + R18 = (18, "$18"), + R19 = (19, "$19"), + R20 = (20, "$20"), + R21 = (21, "$21"), + R22 = (22, "$22"), + R23 = (23, "$23"), + R24 = (24, "$24"), + R25 = (25, "$25"), + R26 = (26, "$26"), + R27 = (27, "$27"), + R28 = (28, "$28"), + R29 = (29, "$29"), + R30 = (30, "$30"), + R31 = (31, "$31"), + + F0 = (32, "$f0"), + F1 = (33, "$f1"), + F2 = (34, "$f2"), + F3 = (35, "$f3"), + F4 = (36, "$f4"), + F5 = (37, "$f5"), + F6 = (38, "$f6"), + F7 = (39, "$f7"), + F8 = (40, "$f8"), + F9 = (41, "$f9"), + F10 = (42, "$f10"), + F11 = (43, "$f11"), + F12 = (44, "$f12"), + F13 = (45, "$f13"), + F14 = (46, "$f14"), + F15 = (47, "$f15"), + F16 = (48, "$f16"), + F17 = (49, "$f17"), + F18 = (50, "$f18"), + F19 = (51, "$f19"), + F20 = (52, "$f20"), + F21 = (53, "$f21"), + F22 = (54, "$f22"), + F23 = (55, "$f23"), + F24 = (56, "$f24"), + F25 = (57, "$f25"), + F26 = (58, "$f26"), + F27 = (59, "$f27"), + F28 = (60, "$f28"), + F29 = (61, "$f29"), + F30 = (62, "$f30"), + F31 = (63, "$f31"), +}, +aliases { + ZERO = (0, "$zero"), + AT = (1, "$at"), + V0 = (2, "$v0"), + V1 = (3, "$v1"), + A0 = (4, "$a0"), + A1 = (5, "$a1"), + A2 = (6, "$a2"), + A3 = (7, "$a3"), + T0 = (8, "$t0"), + T1 = (9, "$t1"), + T2 = (10, "$t2"), + T3 = (11, "$t3"), + T4 = (12, "$t4"), + T5 = (13, "$t5"), + T6 = (14, "$t6"), + T7 = (15, "$t7"), + S0 = (16, "$s0"), + S1 = (17, "$s1"), + S2 = (18, "$s2"), + S3 = (19, "$s3"), + S4 = (20, "$s4"), + S5 = (21, "$s5"), + S6 = (22, "$s6"), + S7 = (23, "$s7"), + T8 = (24, "$t8"), + T9 = (25, "$t9"), + K0 = (26, "$k0"), + K1 = (27, "$k1"), + GP = (28, "$gp"), + SP = (29, "$sp"), + FP = (30, "$fp"), + RA = (31, "$ra"), + + S8 = (30, "$s8") +}); + +/// RISC-V architecture specific definitions. +/// +/// See [RISC-V ELF psABI specification](https://github.com/riscv/riscv-elf-psabi-doc). +#[derive(Debug, Clone, Copy)] +pub struct RiscV; + +registers!(RiscV, { + X0 = (0, "x0"), + X1 = (1, "x1"), + X2 = (2, "x2"), + X3 = (3, "x3"), + X4 = (4, "x4"), + X5 = (5, "x5"), + X6 = (6, "x6"), + X7 = (7, "x7"), + X8 = (8, "x8"), + X9 = (9, "x9"), + X10 = (10, "x10"), + X11 = (11, "x11"), + X12 = (12, "x12"), + X13 = (13, "x13"), + X14 = (14, "x14"), + X15 = (15, "x15"), + X16 = (16, "x16"), + X17 = (17, "x17"), + X18 = (18, "x18"), + X19 = (19, "x19"), + X20 = (20, "x20"), + X21 = (21, "x21"), + X22 = (22, "x22"), + X23 = (23, "x23"), + X24 = (24, "x24"), + X25 = (25, "x25"), + X26 = (26, "x26"), + X27 = (27, "x27"), + X28 = (28, "x28"), + X29 = (29, "x29"), + X30 = (30, "x30"), + X31 = (31, "x31"), + + F0 = (32, "f0"), + F1 = (33, "f1"), + F2 = (34, "f2"), + F3 = (35, "f3"), + F4 = (36, "f4"), + F5 = (37, "f5"), + F6 = (38, "f6"), + F7 = (39, "f7"), + F8 = (40, "f8"), + F9 = (41, "f9"), + F10 = (42, "f10"), + F11 = (43, "f11"), + F12 = (44, "f12"), + F13 = (45, "f13"), + F14 = (46, "f14"), + F15 = (47, "f15"), + F16 = (48, "f16"), + F17 = (49, "f17"), + F18 = (50, "f18"), + F19 = (51, "f19"), + F20 = (52, "f20"), + F21 = (53, "f21"), + F22 = (54, "f22"), + F23 = (55, "f23"), + F24 = (56, "f24"), + F25 = (57, "f25"), + F26 = (58, "f26"), + F27 = (59, "f27"), + F28 = (60, "f28"), + F29 = (61, "f29"), + F30 = (62, "f30"), + F31 = (63, "f31"), +}, +aliases { + ZERO = (0, "zero"), + RA = (1, "ra"), + SP = (2, "sp"), + GP = (3, "gp"), + TP = (4, "tp"), + T0 = (5, "t0"), + T1 = (6, "t1"), + T2 = (7, "t2"), + S0 = (8, "s0"), + S1 = (9, "s1"), + A0 = (10, "a0"), + A1 = (11, "a1"), + A2 = (12, "a2"), + A3 = (13, "a3"), + A4 = (14, "a4"), + A5 = (15, "a5"), + A6 = (16, "a6"), + A7 = (17, "a7"), + S2 = (18, "s2"), + S3 = (19, "s3"), + S4 = (20, "s4"), + S5 = (21, "s5"), + S6 = (22, "s6"), + S7 = (23, "s7"), + S8 = (24, "s8"), + S9 = (25, "s9"), + S10 = (26, "s10"), + S11 = (27, "s11"), + T3 = (28, "t3"), + T4 = (29, "t4"), + T5 = (30, "t5"), + T6 = (31, "t6"), + + FT0 = (32, "ft0"), + FT1 = (33, "ft1"), + FT2 = (34, "ft2"), + FT3 = (35, "ft3"), + FT4 = (36, "ft4"), + FT5 = (37, "ft5"), + FT6 = (38, "ft6"), + FT7 = (39, "ft7"), + FS0 = (40, "fs0"), + FS1 = (41, "fs1"), + FA0 = (42, "fa0"), + FA1 = (43, "fa1"), + FA2 = (44, "fa2"), + FA3 = (45, "fa3"), + FA4 = (46, "fa4"), + FA5 = (47, "fa5"), + FA6 = (48, "fa6"), + FA7 = (49, "fa7"), + FS2 = (50, "fs2"), + FS3 = (51, "fs3"), + FS4 = (52, "fs4"), + FS5 = (53, "fs5"), + FS6 = (54, "fs6"), + FS7 = (55, "fs7"), + FS8 = (56, "fs8"), + FS9 = (57, "fs9"), + FS10 = (58, "fs10"), + FS11 = (59, "fs11"), + FT8 = (60, "ft8"), + FT9 = (61, "ft9"), + FT10 = (62, "ft10"), + FT11 = (63, "ft11"), +}); + +/// Intel i386 architecture specific definitions. +/// +/// See Intel386 psABi version 1.1 at the [X86 psABI wiki](https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI). +#[derive(Debug, Clone, Copy)] +pub struct X86; + +registers!(X86, { + EAX = (0, "eax"), + ECX = (1, "ecx"), + EDX = (2, "edx"), + EBX = (3, "ebx"), + ESP = (4, "esp"), + EBP = (5, "ebp"), + ESI = (6, "esi"), + EDI = (7, "edi"), + + // Return Address register. This is stored in `0(%esp, "")` and is not a physical register. + RA = (8, "RA"), + + ST0 = (11, "st0"), + ST1 = (12, "st1"), + ST2 = (13, "st2"), + ST3 = (14, "st3"), + ST4 = (15, "st4"), + ST5 = (16, "st5"), + ST6 = (17, "st6"), + ST7 = (18, "st7"), + + XMM0 = (21, "xmm0"), + XMM1 = (22, "xmm1"), + XMM2 = (23, "xmm2"), + XMM3 = (24, "xmm3"), + XMM4 = (25, "xmm4"), + XMM5 = (26, "xmm5"), + XMM6 = (27, "xmm6"), + XMM7 = (28, "xmm7"), + + MM0 = (29, "mm0"), + MM1 = (30, "mm1"), + MM2 = (31, "mm2"), + MM3 = (32, "mm3"), + MM4 = (33, "mm4"), + MM5 = (34, "mm5"), + MM6 = (35, "mm6"), + MM7 = (36, "mm7"), + + MXCSR = (39, "mxcsr"), + + ES = (40, "es"), + CS = (41, "cs"), + SS = (42, "ss"), + DS = (43, "ds"), + FS = (44, "fs"), + GS = (45, "gs"), + + TR = (48, "tr"), + LDTR = (49, "ldtr"), + + FS_BASE = (93, "fs.base"), + GS_BASE = (94, "gs.base"), +}); + +/// AMD64 architecture specific definitions. +/// +/// See x86-64 psABI version 1.0 at the [X86 psABI wiki](https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI). +#[derive(Debug, Clone, Copy)] +pub struct X86_64; + +registers!(X86_64, { + RAX = (0, "rax"), + RDX = (1, "rdx"), + RCX = (2, "rcx"), + RBX = (3, "rbx"), + RSI = (4, "rsi"), + RDI = (5, "rdi"), + RBP = (6, "rbp"), + RSP = (7, "rsp"), + + R8 = (8, "r8"), + R9 = (9, "r9"), + R10 = (10, "r10"), + R11 = (11, "r11"), + R12 = (12, "r12"), + R13 = (13, "r13"), + R14 = (14, "r14"), + R15 = (15, "r15"), + + // Return Address register. This is stored in `0(%rsp, "")` and is not a physical register. + RA = (16, "RA"), + + XMM0 = (17, "xmm0"), + XMM1 = (18, "xmm1"), + XMM2 = (19, "xmm2"), + XMM3 = (20, "xmm3"), + XMM4 = (21, "xmm4"), + XMM5 = (22, "xmm5"), + XMM6 = (23, "xmm6"), + XMM7 = (24, "xmm7"), + + XMM8 = (25, "xmm8"), + XMM9 = (26, "xmm9"), + XMM10 = (27, "xmm10"), + XMM11 = (28, "xmm11"), + XMM12 = (29, "xmm12"), + XMM13 = (30, "xmm13"), + XMM14 = (31, "xmm14"), + XMM15 = (32, "xmm15"), + + ST0 = (33, "st0"), + ST1 = (34, "st1"), + ST2 = (35, "st2"), + ST3 = (36, "st3"), + ST4 = (37, "st4"), + ST5 = (38, "st5"), + ST6 = (39, "st6"), + ST7 = (40, "st7"), + + MM0 = (41, "mm0"), + MM1 = (42, "mm1"), + MM2 = (43, "mm2"), + MM3 = (44, "mm3"), + MM4 = (45, "mm4"), + MM5 = (46, "mm5"), + MM6 = (47, "mm6"), + MM7 = (48, "mm7"), + + RFLAGS = (49, "rFLAGS"), + ES = (50, "es"), + CS = (51, "cs"), + SS = (52, "ss"), + DS = (53, "ds"), + FS = (54, "fs"), + GS = (55, "gs"), + + FS_BASE = (58, "fs.base"), + GS_BASE = (59, "gs.base"), + + TR = (62, "tr"), + LDTR = (63, "ldtr"), + MXCSR = (64, "mxcsr"), + FCW = (65, "fcw"), + FSW = (66, "fsw"), + + XMM16 = (67, "xmm16"), + XMM17 = (68, "xmm17"), + XMM18 = (69, "xmm18"), + XMM19 = (70, "xmm19"), + XMM20 = (71, "xmm20"), + XMM21 = (72, "xmm21"), + XMM22 = (73, "xmm22"), + XMM23 = (74, "xmm23"), + XMM24 = (75, "xmm24"), + XMM25 = (76, "xmm25"), + XMM26 = (77, "xmm26"), + XMM27 = (78, "xmm27"), + XMM28 = (79, "xmm28"), + XMM29 = (80, "xmm29"), + XMM30 = (81, "xmm30"), + XMM31 = (82, "xmm31"), + + K0 = (118, "k0"), + K1 = (119, "k1"), + K2 = (120, "k2"), + K3 = (121, "k3"), + K4 = (122, "k4"), + K5 = (123, "k5"), + K6 = (124, "k6"), + K7 = (125, "k7"), +}); + +/// PowerPC 64bit +/// +/// See [64-bit ELF ABI Specification for OpenPOWER Architecture](https://openpowerfoundation.org/specifications/64bitelfabi/). +#[derive(Debug, Clone, Copy)] +pub struct PowerPc64; + +registers!(PowerPc64, { + R0 = (0, "r0"), + R1 = (1, "r1"), + R2 = (2, "r2"), + R3 = (3, "r3"), + R4 = (4, "r4"), + R5 = (5, "r5"), + R6 = (6, "r6"), + R7 = (7, "r7"), + R8 = (8, "r8"), + R9 = (9, "r9"), + R10 = (10, "r10"), + R11 = (11, "r11"), + R12 = (12, "r12"), + R13 = (13, "r13"), + R14 = (14, "r14"), + R15 = (15, "r15"), + R16 = (16, "r16"), + R17 = (17, "r17"), + R18 = (18, "r18"), + R19 = (19, "r19"), + R20 = (20, "r20"), + R21 = (21, "r21"), + R22 = (22, "r22"), + R23 = (23, "r23"), + R24 = (24, "r24"), + R25 = (25, "r25"), + R26 = (26, "r26"), + R27 = (27, "r27"), + R28 = (28, "r28"), + R29 = (29, "r29"), + R30 = (30, "r30"), + R31 = (31, "r31"), + + F0 = (32, "f0"), + F1 = (33, "f1"), + F2 = (34, "f2"), + F3 = (35, "f3"), + F4 = (36, "f4"), + F5 = (37, "f5"), + F6 = (38, "f6"), + F7 = (39, "f7"), + F8 = (40, "f8"), + F9 = (41, "f9"), + F10 = (42, "f10"), + F11 = (43, "f11"), + F12 = (44, "f12"), + F13 = (45, "f13"), + F14 = (46, "f14"), + F15 = (47, "f15"), + F16 = (48, "f16"), + F17 = (49, "f17"), + F18 = (50, "f18"), + F19 = (51, "f19"), + F20 = (52, "f20"), + F21 = (53, "f21"), + F22 = (54, "f22"), + F23 = (55, "f23"), + F24 = (56, "f24"), + F25 = (57, "f25"), + F26 = (58, "f26"), + F27 = (59, "f27"), + F28 = (60, "f28"), + F29 = (61, "f29"), + F30 = (62, "f30"), + F31 = (63, "f31"), + + LR = (65, "lr"), + CTR = (66, "ctr"), + + CR0 = (68, "cr0"), + CR1 = (69, "cr1"), + CR2 = (70, "cr2"), + CR3 = (71, "cr3"), + CR4 = (72, "cr4"), + CR5 = (73, "cr5"), + CR6 = (74, "cr6"), + CR7 = (75, "cr7"), + XER = (76, "xer"), + + VR0 = (77, "vr0"), + VR1 = (78, "vr1"), + VR2 = (79, "vr2"), + VR3 = (80, "vr3"), + VR4 = (81, "vr4"), + VR5 = (82, "vr5"), + VR6 = (83, "vr6"), + VR7 = (84, "vr7"), + VR8 = (85, "vr8"), + VR9 = (86, "vr9"), + VR10 = (87, "vr10"), + VR11 = (88, "vr11"), + VR12 = (89, "vr12"), + VR13 = (90, "vr13"), + VR14 = (91, "vr14"), + VR15 = (92, "vr15"), + VR16 = (93, "vr16"), + VR17 = (94, "vr17"), + VR18 = (95, "vr18"), + VR19 = (96, "vr19"), + VR20 = (97, "vr20"), + VR21 = (98, "vr21"), + VR22 = (99, "vr22"), + VR23 = (100, "vr23"), + VR24 = (101, "vr24"), + VR25 = (102, "vr25"), + VR26 = (103, "vr26"), + VR27 = (104, "vr27"), + VR28 = (105, "vr28"), + VR29 = (106, "vr29"), + VR30 = (107, "vr30"), + VR31 = (108, "vr31"), + + VSCR = (110, "vscr"), + TFHAR = (114, "tfhar"), + TFIAR = (115, "tfiar"), + TEXASR = (116, "texasr"), +}); + +#[cfg(test)] +mod tests { + + #[test] + #[cfg(feature = "std")] + fn test_aarch64_registers() { + use super::*; + use std::collections::HashSet; + + let mut names = HashSet::new(); + for n in (0..=39).chain(46..=127) { + let name = AArch64::register_name(Register(n)) + .unwrap_or_else(|| panic!("Register {} should have a name.", n)); + assert!(names.insert(name)); + } + } + + #[test] + #[cfg(feature = "std")] + fn test_power64_registers() { + use super::*; + use std::collections::HashSet; + + let mut names = HashSet::new(); + for n in (0..=63).chain(68..=75).chain(77..=108) { + let name = PowerPc64::register_name(Register(n)) + .unwrap_or_else(|| panic!("Register {} should have a name.", n)); + assert!(names.insert(name)); + } + } +} diff --git a/third_party/rust/gimli/src/common.rs b/third_party/rust/gimli/src/common.rs new file mode 100644 index 000000000000..cad9568af79b --- /dev/null +++ b/third_party/rust/gimli/src/common.rs @@ -0,0 +1,392 @@ +/// Whether the format of a compilation unit is 32- or 64-bit. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Format { + /// 64-bit DWARF + Dwarf64 = 8, + /// 32-bit DWARF + Dwarf32 = 4, +} + +impl Format { + /// Return the serialized size of an initial length field for the format. + #[inline] + pub fn initial_length_size(self) -> u8 { + match self { + Format::Dwarf32 => 4, + Format::Dwarf64 => 12, + } + } + + /// Return the natural word size for the format + #[inline] + pub fn word_size(self) -> u8 { + match self { + Format::Dwarf32 => 4, + Format::Dwarf64 => 8, + } + } +} + +/// Which vendor extensions to support. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub enum Vendor { + /// A default set of extensions, including some common GNU extensions. + Default, + /// AAarch64 extensions. + AArch64, +} + +/// Encoding parameters that are commonly used for multiple DWARF sections. +/// +/// This is intended to be small enough to pass by value. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +// `address_size` and `format` are used more often than `version`, so keep +// them first. +#[repr(C)] +pub struct Encoding { + /// The size of an address. + pub address_size: u8, + + // The size of a segment selector. + // TODO: pub segment_size: u8, + /// Whether the DWARF format is 32- or 64-bit. + pub format: Format, + + /// The DWARF version of the header. + pub version: u16, +} + +/// Encoding parameters for a line number program. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct LineEncoding { + /// The size in bytes of the smallest target machine instruction. + pub minimum_instruction_length: u8, + + /// The maximum number of individual operations that may be encoded in an + /// instruction. + pub maximum_operations_per_instruction: u8, + + /// The initial value of the `is_stmt` register. + pub default_is_stmt: bool, + + /// The minimum value which a special opcode can add to the line register. + pub line_base: i8, + + /// The range of values which a special opcode can add to the line register. + pub line_range: u8, +} + +impl Default for LineEncoding { + fn default() -> Self { + // Values from LLVM. + LineEncoding { + minimum_instruction_length: 1, + maximum_operations_per_instruction: 1, + default_is_stmt: true, + line_base: -5, + line_range: 14, + } + } +} + +/// A DWARF register number. +/// +/// The meaning of this value is ABI dependent. This is generally encoded as +/// a ULEB128, but supported architectures need 16 bits at most. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Register(pub u16); + +/// An offset into the `.debug_abbrev` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct DebugAbbrevOffset(pub T); + +/// An offset to a set of entries in the `.debug_addr` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DebugAddrBase(pub T); + +/// An index into a set of addresses in the `.debug_addr` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DebugAddrIndex(pub T); + +/// An offset into the `.debug_aranges` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DebugArangesOffset(pub T); + +/// An offset into the `.debug_info` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)] +pub struct DebugInfoOffset(pub T); + +/// An offset into the `.debug_line` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DebugLineOffset(pub T); + +/// An offset into the `.debug_line_str` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DebugLineStrOffset(pub T); + +/// An offset into either the `.debug_loc` section or the `.debug_loclists` section, +/// depending on the version of the unit the offset was contained in. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct LocationListsOffset(pub T); + +/// An offset to a set of location list offsets in the `.debug_loclists` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DebugLocListsBase(pub T); + +/// An index into a set of location list offsets in the `.debug_loclists` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DebugLocListsIndex(pub T); + +/// An offset into the `.debug_macinfo` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct DebugMacinfoOffset(pub T); + +/// An offset into the `.debug_macro` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct DebugMacroOffset(pub T); + +/// An offset into either the `.debug_ranges` section or the `.debug_rnglists` section, +/// depending on the version of the unit the offset was contained in. +/// +/// If this is from a DWARF 4 DWO file, then it must additionally be offset by the +/// value of `DW_AT_GNU_ranges_base`. You can use `Dwarf::ranges_offset_from_raw` to do this. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct RawRangeListsOffset(pub T); + +/// An offset into either the `.debug_ranges` section or the `.debug_rnglists` section, +/// depending on the version of the unit the offset was contained in. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct RangeListsOffset(pub T); + +/// An offset to a set of range list offsets in the `.debug_rnglists` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DebugRngListsBase(pub T); + +/// An index into a set of range list offsets in the `.debug_rnglists` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DebugRngListsIndex(pub T); + +/// An offset into the `.debug_str` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DebugStrOffset(pub T); + +/// An offset to a set of entries in the `.debug_str_offsets` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DebugStrOffsetsBase(pub T); + +/// An index into a set of entries in the `.debug_str_offsets` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DebugStrOffsetsIndex(pub T); + +/// An offset into the `.debug_types` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)] +pub struct DebugTypesOffset(pub T); + +/// A type signature as used in the `.debug_types` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct DebugTypeSignature(pub u64); + +/// An offset into the `.debug_frame` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct DebugFrameOffset(pub T); + +impl From for DebugFrameOffset { + #[inline] + fn from(o: T) -> Self { + DebugFrameOffset(o) + } +} + +/// An offset into the `.eh_frame` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct EhFrameOffset(pub T); + +impl From for EhFrameOffset { + #[inline] + fn from(o: T) -> Self { + EhFrameOffset(o) + } +} + +/// An offset into the `.debug_info` or `.debug_types` sections. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)] +pub enum UnitSectionOffset { + /// An offset into the `.debug_info` section. + DebugInfoOffset(DebugInfoOffset), + /// An offset into the `.debug_types` section. + DebugTypesOffset(DebugTypesOffset), +} + +impl From> for UnitSectionOffset { + fn from(offset: DebugInfoOffset) -> Self { + UnitSectionOffset::DebugInfoOffset(offset) + } +} + +impl From> for UnitSectionOffset { + fn from(offset: DebugTypesOffset) -> Self { + UnitSectionOffset::DebugTypesOffset(offset) + } +} + +impl UnitSectionOffset +where + T: Clone, +{ + /// Returns the `DebugInfoOffset` inside, or `None` otherwise. + pub fn as_debug_info_offset(&self) -> Option> { + match self { + UnitSectionOffset::DebugInfoOffset(offset) => Some(offset.clone()), + UnitSectionOffset::DebugTypesOffset(_) => None, + } + } + /// Returns the `DebugTypesOffset` inside, or `None` otherwise. + pub fn as_debug_types_offset(&self) -> Option> { + match self { + UnitSectionOffset::DebugInfoOffset(_) => None, + UnitSectionOffset::DebugTypesOffset(offset) => Some(offset.clone()), + } + } +} + +/// An identifier for a DWARF section. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)] +pub enum SectionId { + /// The `.debug_abbrev` section. + DebugAbbrev, + /// The `.debug_addr` section. + DebugAddr, + /// The `.debug_aranges` section. + DebugAranges, + /// The `.debug_cu_index` section. + DebugCuIndex, + /// The `.debug_frame` section. + DebugFrame, + /// The `.eh_frame` section. + EhFrame, + /// The `.eh_frame_hdr` section. + EhFrameHdr, + /// The `.debug_info` section. + DebugInfo, + /// The `.debug_line` section. + DebugLine, + /// The `.debug_line_str` section. + DebugLineStr, + /// The `.debug_loc` section. + DebugLoc, + /// The `.debug_loclists` section. + DebugLocLists, + /// The `.debug_macinfo` section. + DebugMacinfo, + /// The `.debug_macro` section. + DebugMacro, + /// The `.debug_pubnames` section. + DebugPubNames, + /// The `.debug_pubtypes` section. + DebugPubTypes, + /// The `.debug_ranges` section. + DebugRanges, + /// The `.debug_rnglists` section. + DebugRngLists, + /// The `.debug_str` section. + DebugStr, + /// The `.debug_str_offsets` section. + DebugStrOffsets, + /// The `.debug_tu_index` section. + DebugTuIndex, + /// The `.debug_types` section. + DebugTypes, +} + +impl SectionId { + /// Returns the ELF section name for this kind. + pub fn name(self) -> &'static str { + match self { + SectionId::DebugAbbrev => ".debug_abbrev", + SectionId::DebugAddr => ".debug_addr", + SectionId::DebugAranges => ".debug_aranges", + SectionId::DebugCuIndex => ".debug_cu_index", + SectionId::DebugFrame => ".debug_frame", + SectionId::EhFrame => ".eh_frame", + SectionId::EhFrameHdr => ".eh_frame_hdr", + SectionId::DebugInfo => ".debug_info", + SectionId::DebugLine => ".debug_line", + SectionId::DebugLineStr => ".debug_line_str", + SectionId::DebugLoc => ".debug_loc", + SectionId::DebugLocLists => ".debug_loclists", + SectionId::DebugMacinfo => ".debug_macinfo", + SectionId::DebugMacro => ".debug_macro", + SectionId::DebugPubNames => ".debug_pubnames", + SectionId::DebugPubTypes => ".debug_pubtypes", + SectionId::DebugRanges => ".debug_ranges", + SectionId::DebugRngLists => ".debug_rnglists", + SectionId::DebugStr => ".debug_str", + SectionId::DebugStrOffsets => ".debug_str_offsets", + SectionId::DebugTuIndex => ".debug_tu_index", + SectionId::DebugTypes => ".debug_types", + } + } + + /// Returns the ELF section name for this kind, when found in a .dwo or .dwp file. + pub fn dwo_name(self) -> Option<&'static str> { + Some(match self { + SectionId::DebugAbbrev => ".debug_abbrev.dwo", + SectionId::DebugCuIndex => ".debug_cu_index", + SectionId::DebugInfo => ".debug_info.dwo", + SectionId::DebugLine => ".debug_line.dwo", + // The debug_loc section can be present in the dwo when using the + // GNU split-dwarf extension to DWARF4. + SectionId::DebugLoc => ".debug_loc.dwo", + SectionId::DebugLocLists => ".debug_loclists.dwo", + SectionId::DebugMacinfo => ".debug_macinfo.dwo", + SectionId::DebugMacro => ".debug_macro.dwo", + SectionId::DebugRngLists => ".debug_rnglists.dwo", + SectionId::DebugStr => ".debug_str.dwo", + SectionId::DebugStrOffsets => ".debug_str_offsets.dwo", + SectionId::DebugTuIndex => ".debug_tu_index", + SectionId::DebugTypes => ".debug_types.dwo", + _ => return None, + }) + } + + /// Returns the XCOFF section name for this kind. + pub fn xcoff_name(self) -> Option<&'static str> { + Some(match self { + SectionId::DebugAbbrev => ".dwabrev", + SectionId::DebugAranges => ".dwarnge", + SectionId::DebugFrame => ".dwframe", + SectionId::DebugInfo => ".dwinfo", + SectionId::DebugLine => ".dwline", + SectionId::DebugLoc => ".dwloc", + SectionId::DebugMacinfo => ".dwmac", + SectionId::DebugPubNames => ".dwpbnms", + SectionId::DebugPubTypes => ".dwpbtyp", + SectionId::DebugRanges => ".dwrnges", + SectionId::DebugStr => ".dwstr", + _ => return None, + }) + } +} + +/// An optionally-provided implementation-defined compilation unit ID to enable +/// split DWARF and linking a split compilation unit back together. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct DwoId(pub u64); + +/// The "type" of file with DWARF debugging information. This determines, among other things, +/// which files DWARF sections should be loaded from. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DwarfFileType { + /// A normal executable or object file. + Main, + /// A .dwo split DWARF file. + Dwo, + // TODO: Supplementary files, .dwps? +} + +impl Default for DwarfFileType { + fn default() -> Self { + DwarfFileType::Main + } +} diff --git a/third_party/rust/gimli/src/constants.rs b/third_party/rust/gimli/src/constants.rs new file mode 100644 index 000000000000..4bb43ec951b0 --- /dev/null +++ b/third_party/rust/gimli/src/constants.rs @@ -0,0 +1,1443 @@ +// This file originally from https://github.com/philipc/rust-dwarf/ and +// distributed under either MIT or Apache 2.0 licenses. +// +// Copyright 2016 The rust-dwarf Developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Constant definitions. +//! +//! The DWARF spec's `DW_AT_*` type is represented as `struct DwAt(u16)`, +//! `DW_FORM_*` as `DwForm(u16)`, etc. +//! +//! There are also exported const definitions for each constant. + +#![allow(non_upper_case_globals)] +#![allow(missing_docs)] + +use core::{fmt, ops}; + +// The `dw!` macro turns this: +// +// dw!(DwFoo(u32) { +// DW_FOO_bar = 0, +// DW_FOO_baz = 1, +// DW_FOO_bang = 2, +// }); +// +// into this: +// +// #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +// pub struct DwFoo(pub u32); +// +// pub const DW_FOO_bar: DwFoo = DwFoo(0); +// pub const DW_FOO_baz: DwFoo = DwFoo(1); +// pub const DW_FOO_bang: DwFoo = DwFoo(2); +// +// impl DwFoo { +// pub fn static_string(&self) -> Option<&'static str> { +// ... +// } +// } +// +// impl fmt::Display for DwFoo { +// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { +// ... +// } +// } +macro_rules! dw { + ($(#[$meta:meta])* $struct_name:ident($struct_type:ty) + { $($name:ident = $val:expr),+ $(,)? } + $(, aliases { $($alias_name:ident = $alias_val:expr),+ $(,)? })? + ) => { + $(#[$meta])* + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] + pub struct $struct_name(pub $struct_type); + + $( + pub const $name: $struct_name = $struct_name($val); + )+ + $($( + pub const $alias_name: $struct_name = $struct_name($alias_val); + )+)* + + impl $struct_name { + pub fn static_string(&self) -> Option<&'static str> { + Some(match *self { + $( + $name => stringify!($name), + )+ + _ => return None, + }) + } + } + + impl fmt::Display for $struct_name { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + if let Some(s) = self.static_string() { + f.pad(s) + } else { + #[cfg(feature = "read")] + { + f.pad(&format!("Unknown {}: {}", stringify!($struct_name), self.0)) + } + #[cfg(not(feature = "read"))] + { + write!(f, "Unknown {}: {}", stringify!($struct_name), self.0) + } + } + } + } + }; +} + +dw!( +/// The section type field in a `.dwp` unit index. +/// +/// This is used for version 5 and later. +/// +/// See Section 7.3.5. +DwSect(u32) { + DW_SECT_INFO = 1, + DW_SECT_ABBREV = 3, + DW_SECT_LINE = 4, + DW_SECT_LOCLISTS = 5, + DW_SECT_STR_OFFSETS = 6, + DW_SECT_MACRO = 7, + DW_SECT_RNGLISTS = 8, +}); + +dw!( +/// The section type field in a `.dwp` unit index with version 2. +DwSectV2(u32) { + DW_SECT_V2_INFO = 1, + DW_SECT_V2_TYPES = 2, + DW_SECT_V2_ABBREV = 3, + DW_SECT_V2_LINE = 4, + DW_SECT_V2_LOC = 5, + DW_SECT_V2_STR_OFFSETS = 6, + DW_SECT_V2_MACINFO = 7, + DW_SECT_V2_MACRO = 8, +}); + +dw!( +/// The unit type field in a unit header. +/// +/// See Section 7.5.1, Table 7.2. +DwUt(u8) { + DW_UT_compile = 0x01, + DW_UT_type = 0x02, + DW_UT_partial = 0x03, + DW_UT_skeleton = 0x04, + DW_UT_split_compile = 0x05, + DW_UT_split_type = 0x06, + DW_UT_lo_user = 0x80, + DW_UT_hi_user = 0xff, +}); + +dw!( +/// The opcode for a call frame instruction. +/// +/// Section 7.24: +/// > Call frame instructions are encoded in one or more bytes. The primary +/// > opcode is encoded in the high order two bits of the first byte (that is, +/// > opcode = byte >> 6). An operand or extended opcode may be encoded in the +/// > low order 6 bits. Additional operands are encoded in subsequent bytes. +DwCfa(u8) { + DW_CFA_advance_loc = 0x01 << 6, + DW_CFA_offset = 0x02 << 6, + DW_CFA_restore = 0x03 << 6, + DW_CFA_nop = 0, + DW_CFA_set_loc = 0x01, + DW_CFA_advance_loc1 = 0x02, + DW_CFA_advance_loc2 = 0x03, + DW_CFA_advance_loc4 = 0x04, + DW_CFA_offset_extended = 0x05, + DW_CFA_restore_extended = 0x06, + DW_CFA_undefined = 0x07, + DW_CFA_same_value = 0x08, + DW_CFA_register = 0x09, + DW_CFA_remember_state = 0x0a, + DW_CFA_restore_state = 0x0b, + DW_CFA_def_cfa = 0x0c, + DW_CFA_def_cfa_register = 0x0d, + DW_CFA_def_cfa_offset = 0x0e, + DW_CFA_def_cfa_expression = 0x0f, + DW_CFA_expression = 0x10, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_def_cfa_sf = 0x12, + DW_CFA_def_cfa_offset_sf = 0x13, + DW_CFA_val_offset = 0x14, + DW_CFA_val_offset_sf = 0x15, + DW_CFA_val_expression = 0x16, + + DW_CFA_lo_user = 0x1c, + DW_CFA_hi_user = 0x3f, + + DW_CFA_MIPS_advance_loc8 = 0x1d, + DW_CFA_GNU_window_save = 0x2d, + DW_CFA_GNU_args_size = 0x2e, + DW_CFA_GNU_negative_offset_extended = 0x2f, +}, +aliases { + DW_CFA_AARCH64_negate_ra_state = 0x2d, +}); + +dw!( +/// The child determination encodings for DIE attributes. +/// +/// See Section 7.5.3, Table 7.4. +DwChildren(u8) { + DW_CHILDREN_no = 0, + DW_CHILDREN_yes = 1, +}); + +dw!( +/// The tag encodings for DIE attributes. +/// +/// See Section 7.5.3, Table 7.3. +DwTag(u16) { + DW_TAG_null = 0x00, + + DW_TAG_array_type = 0x01, + DW_TAG_class_type = 0x02, + DW_TAG_entry_point = 0x03, + DW_TAG_enumeration_type = 0x04, + DW_TAG_formal_parameter = 0x05, + DW_TAG_imported_declaration = 0x08, + DW_TAG_label = 0x0a, + DW_TAG_lexical_block = 0x0b, + DW_TAG_member = 0x0d, + DW_TAG_pointer_type = 0x0f, + DW_TAG_reference_type = 0x10, + DW_TAG_compile_unit = 0x11, + DW_TAG_string_type = 0x12, + DW_TAG_structure_type = 0x13, + DW_TAG_subroutine_type = 0x15, + DW_TAG_typedef = 0x16, + DW_TAG_union_type = 0x17, + DW_TAG_unspecified_parameters = 0x18, + DW_TAG_variant = 0x19, + DW_TAG_common_block = 0x1a, + DW_TAG_common_inclusion = 0x1b, + DW_TAG_inheritance = 0x1c, + DW_TAG_inlined_subroutine = 0x1d, + DW_TAG_module = 0x1e, + DW_TAG_ptr_to_member_type = 0x1f, + DW_TAG_set_type = 0x20, + DW_TAG_subrange_type = 0x21, + DW_TAG_with_stmt = 0x22, + DW_TAG_access_declaration = 0x23, + DW_TAG_base_type = 0x24, + DW_TAG_catch_block = 0x25, + DW_TAG_const_type = 0x26, + DW_TAG_constant = 0x27, + DW_TAG_enumerator = 0x28, + DW_TAG_file_type = 0x29, + DW_TAG_friend = 0x2a, + DW_TAG_namelist = 0x2b, + DW_TAG_namelist_item = 0x2c, + DW_TAG_packed_type = 0x2d, + DW_TAG_subprogram = 0x2e, + DW_TAG_template_type_parameter = 0x2f, + DW_TAG_template_value_parameter = 0x30, + DW_TAG_thrown_type = 0x31, + DW_TAG_try_block = 0x32, + DW_TAG_variant_part = 0x33, + DW_TAG_variable = 0x34, + DW_TAG_volatile_type = 0x35, + +// DWARF 3. + DW_TAG_dwarf_procedure = 0x36, + DW_TAG_restrict_type = 0x37, + DW_TAG_interface_type = 0x38, + DW_TAG_namespace = 0x39, + DW_TAG_imported_module = 0x3a, + DW_TAG_unspecified_type = 0x3b, + DW_TAG_partial_unit = 0x3c, + DW_TAG_imported_unit = 0x3d, + DW_TAG_condition = 0x3f, + DW_TAG_shared_type = 0x40, + +// DWARF 4. + DW_TAG_type_unit = 0x41, + DW_TAG_rvalue_reference_type = 0x42, + DW_TAG_template_alias = 0x43, + +// DWARF 5. + DW_TAG_coarray_type = 0x44, + DW_TAG_generic_subrange = 0x45, + DW_TAG_dynamic_type = 0x46, + DW_TAG_atomic_type = 0x47, + DW_TAG_call_site = 0x48, + DW_TAG_call_site_parameter = 0x49, + DW_TAG_skeleton_unit = 0x4a, + DW_TAG_immutable_type = 0x4b, + + DW_TAG_lo_user = 0x4080, + DW_TAG_hi_user = 0xffff, + +// SGI/MIPS extensions. + DW_TAG_MIPS_loop = 0x4081, + +// HP extensions. + DW_TAG_HP_array_descriptor = 0x4090, + DW_TAG_HP_Bliss_field = 0x4091, + DW_TAG_HP_Bliss_field_set = 0x4092, + +// GNU extensions. + DW_TAG_format_label = 0x4101, + DW_TAG_function_template = 0x4102, + DW_TAG_class_template = 0x4103, + DW_TAG_GNU_BINCL = 0x4104, + DW_TAG_GNU_EINCL = 0x4105, + DW_TAG_GNU_template_template_param = 0x4106, + DW_TAG_GNU_template_parameter_pack = 0x4107, + DW_TAG_GNU_formal_parameter_pack = 0x4108, + DW_TAG_GNU_call_site = 0x4109, + DW_TAG_GNU_call_site_parameter = 0x410a, + + DW_TAG_APPLE_property = 0x4200, + +// SUN extensions. + DW_TAG_SUN_function_template = 0x4201, + DW_TAG_SUN_class_template = 0x4202, + DW_TAG_SUN_struct_template = 0x4203, + DW_TAG_SUN_union_template = 0x4204, + DW_TAG_SUN_indirect_inheritance = 0x4205, + DW_TAG_SUN_codeflags = 0x4206, + DW_TAG_SUN_memop_info = 0x4207, + DW_TAG_SUN_omp_child_func = 0x4208, + DW_TAG_SUN_rtti_descriptor = 0x4209, + DW_TAG_SUN_dtor_info = 0x420a, + DW_TAG_SUN_dtor = 0x420b, + DW_TAG_SUN_f90_interface = 0x420c, + DW_TAG_SUN_fortran_vax_structure = 0x420d, + +// ALTIUM extensions. + DW_TAG_ALTIUM_circ_type = 0x5101, + DW_TAG_ALTIUM_mwa_circ_type = 0x5102, + DW_TAG_ALTIUM_rev_carry_type = 0x5103, + DW_TAG_ALTIUM_rom = 0x5111, + +// Extensions for UPC. + DW_TAG_upc_shared_type = 0x8765, + DW_TAG_upc_strict_type = 0x8766, + DW_TAG_upc_relaxed_type = 0x8767, + +// PGI (STMicroelectronics) extensions. + DW_TAG_PGI_kanji_type = 0xa000, + DW_TAG_PGI_interface_block = 0xa020, + +// Borland extensions. + DW_TAG_BORLAND_property = 0xb000, + DW_TAG_BORLAND_Delphi_string = 0xb001, + DW_TAG_BORLAND_Delphi_dynamic_array = 0xb002, + DW_TAG_BORLAND_Delphi_set = 0xb003, + DW_TAG_BORLAND_Delphi_variant = 0xb004, +}); + +dw!( +/// The attribute encodings for DIE attributes. +/// +/// See Section 7.5.4, Table 7.5. +DwAt(u16) { + DW_AT_null = 0x00, + + DW_AT_sibling = 0x01, + DW_AT_location = 0x02, + DW_AT_name = 0x03, + DW_AT_ordering = 0x09, + DW_AT_byte_size = 0x0b, + DW_AT_bit_offset = 0x0c, + DW_AT_bit_size = 0x0d, + DW_AT_stmt_list = 0x10, + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12, + DW_AT_language = 0x13, + DW_AT_discr = 0x15, + DW_AT_discr_value = 0x16, + DW_AT_visibility = 0x17, + DW_AT_import = 0x18, + DW_AT_string_length = 0x19, + DW_AT_common_reference = 0x1a, + DW_AT_comp_dir = 0x1b, + DW_AT_const_value = 0x1c, + DW_AT_containing_type = 0x1d, + DW_AT_default_value = 0x1e, + DW_AT_inline = 0x20, + DW_AT_is_optional = 0x21, + DW_AT_lower_bound = 0x22, + DW_AT_producer = 0x25, + DW_AT_prototyped = 0x27, + DW_AT_return_addr = 0x2a, + DW_AT_start_scope = 0x2c, + DW_AT_bit_stride = 0x2e, + DW_AT_upper_bound = 0x2f, + DW_AT_abstract_origin = 0x31, + DW_AT_accessibility = 0x32, + DW_AT_address_class = 0x33, + DW_AT_artificial = 0x34, + DW_AT_base_types = 0x35, + DW_AT_calling_convention = 0x36, + DW_AT_count = 0x37, + DW_AT_data_member_location = 0x38, + DW_AT_decl_column = 0x39, + DW_AT_decl_file = 0x3a, + DW_AT_decl_line = 0x3b, + DW_AT_declaration = 0x3c, + DW_AT_discr_list = 0x3d, + DW_AT_encoding = 0x3e, + DW_AT_external = 0x3f, + DW_AT_frame_base = 0x40, + DW_AT_friend = 0x41, + DW_AT_identifier_case = 0x42, + DW_AT_macro_info = 0x43, + DW_AT_namelist_item = 0x44, + DW_AT_priority = 0x45, + DW_AT_segment = 0x46, + DW_AT_specification = 0x47, + DW_AT_static_link = 0x48, + DW_AT_type = 0x49, + DW_AT_use_location = 0x4a, + DW_AT_variable_parameter = 0x4b, + DW_AT_virtuality = 0x4c, + DW_AT_vtable_elem_location = 0x4d, + +// DWARF 3. + DW_AT_allocated = 0x4e, + DW_AT_associated = 0x4f, + DW_AT_data_location = 0x50, + DW_AT_byte_stride = 0x51, + DW_AT_entry_pc = 0x52, + DW_AT_use_UTF8 = 0x53, + DW_AT_extension = 0x54, + DW_AT_ranges = 0x55, + DW_AT_trampoline = 0x56, + DW_AT_call_column = 0x57, + DW_AT_call_file = 0x58, + DW_AT_call_line = 0x59, + DW_AT_description = 0x5a, + DW_AT_binary_scale = 0x5b, + DW_AT_decimal_scale = 0x5c, + DW_AT_small = 0x5d, + DW_AT_decimal_sign = 0x5e, + DW_AT_digit_count = 0x5f, + DW_AT_picture_string = 0x60, + DW_AT_mutable = 0x61, + DW_AT_threads_scaled = 0x62, + DW_AT_explicit = 0x63, + DW_AT_object_pointer = 0x64, + DW_AT_endianity = 0x65, + DW_AT_elemental = 0x66, + DW_AT_pure = 0x67, + DW_AT_recursive = 0x68, + +// DWARF 4. + DW_AT_signature = 0x69, + DW_AT_main_subprogram = 0x6a, + DW_AT_data_bit_offset = 0x6b, + DW_AT_const_expr = 0x6c, + DW_AT_enum_class = 0x6d, + DW_AT_linkage_name = 0x6e, + +// DWARF 5. + DW_AT_string_length_bit_size = 0x6f, + DW_AT_string_length_byte_size = 0x70, + DW_AT_rank = 0x71, + DW_AT_str_offsets_base = 0x72, + DW_AT_addr_base = 0x73, + DW_AT_rnglists_base = 0x74, + DW_AT_dwo_name = 0x76, + DW_AT_reference = 0x77, + DW_AT_rvalue_reference = 0x78, + DW_AT_macros = 0x79, + DW_AT_call_all_calls = 0x7a, + DW_AT_call_all_source_calls = 0x7b, + DW_AT_call_all_tail_calls = 0x7c, + DW_AT_call_return_pc = 0x7d, + DW_AT_call_value = 0x7e, + DW_AT_call_origin = 0x7f, + DW_AT_call_parameter = 0x80, + DW_AT_call_pc = 0x81, + DW_AT_call_tail_call = 0x82, + DW_AT_call_target = 0x83, + DW_AT_call_target_clobbered = 0x84, + DW_AT_call_data_location = 0x85, + DW_AT_call_data_value = 0x86, + DW_AT_noreturn = 0x87, + DW_AT_alignment = 0x88, + DW_AT_export_symbols = 0x89, + DW_AT_deleted = 0x8a, + DW_AT_defaulted = 0x8b, + DW_AT_loclists_base = 0x8c, + + DW_AT_lo_user = 0x2000, + DW_AT_hi_user = 0x3fff, + +// SGI/MIPS extensions. + DW_AT_MIPS_fde = 0x2001, + DW_AT_MIPS_loop_begin = 0x2002, + DW_AT_MIPS_tail_loop_begin = 0x2003, + DW_AT_MIPS_epilog_begin = 0x2004, + DW_AT_MIPS_loop_unroll_factor = 0x2005, + DW_AT_MIPS_software_pipeline_depth = 0x2006, + DW_AT_MIPS_linkage_name = 0x2007, + DW_AT_MIPS_stride = 0x2008, + DW_AT_MIPS_abstract_name = 0x2009, + DW_AT_MIPS_clone_origin = 0x200a, + DW_AT_MIPS_has_inlines = 0x200b, + DW_AT_MIPS_stride_byte = 0x200c, + DW_AT_MIPS_stride_elem = 0x200d, + DW_AT_MIPS_ptr_dopetype = 0x200e, + DW_AT_MIPS_allocatable_dopetype = 0x200f, + DW_AT_MIPS_assumed_shape_dopetype = 0x2010, + +// This one appears to have only been implemented by Open64 for +// fortran and may conflict with other extensions. + DW_AT_MIPS_assumed_size = 0x2011, + +// TODO: HP/CPQ extensions. +// These conflict with the MIPS extensions. + + DW_AT_INTEL_other_endian = 0x2026, + +// GNU extensions + DW_AT_sf_names = 0x2101, + DW_AT_src_info = 0x2102, + DW_AT_mac_info = 0x2103, + DW_AT_src_coords = 0x2104, + DW_AT_body_begin = 0x2105, + DW_AT_body_end = 0x2106, + DW_AT_GNU_vector = 0x2107, + DW_AT_GNU_guarded_by = 0x2108, + DW_AT_GNU_pt_guarded_by = 0x2109, + DW_AT_GNU_guarded = 0x210a, + DW_AT_GNU_pt_guarded = 0x210b, + DW_AT_GNU_locks_excluded = 0x210c, + DW_AT_GNU_exclusive_locks_required = 0x210d, + DW_AT_GNU_shared_locks_required = 0x210e, + DW_AT_GNU_odr_signature = 0x210f, + DW_AT_GNU_template_name = 0x2110, + DW_AT_GNU_call_site_value = 0x2111, + DW_AT_GNU_call_site_data_value = 0x2112, + DW_AT_GNU_call_site_target = 0x2113, + DW_AT_GNU_call_site_target_clobbered = 0x2114, + DW_AT_GNU_tail_call = 0x2115, + DW_AT_GNU_all_tail_call_sites = 0x2116, + DW_AT_GNU_all_call_sites = 0x2117, + DW_AT_GNU_all_source_call_sites = 0x2118, + DW_AT_GNU_macros = 0x2119, + DW_AT_GNU_deleted = 0x211a, + +// Extensions for Fission proposal. + DW_AT_GNU_dwo_name = 0x2130, + DW_AT_GNU_dwo_id = 0x2131, + DW_AT_GNU_ranges_base = 0x2132, + DW_AT_GNU_addr_base = 0x2133, + DW_AT_GNU_pubnames = 0x2134, + DW_AT_GNU_pubtypes = 0x2135, + DW_AT_GNU_discriminator = 0x2136, + DW_AT_GNU_locviews = 0x2137, + DW_AT_GNU_entry_view = 0x2138, + +// Conflict with Sun. +// DW_AT_VMS_rtnbeg_pd_address = 0x2201, + +// Sun extensions. + DW_AT_SUN_template = 0x2201, + DW_AT_SUN_alignment = 0x2202, + DW_AT_SUN_vtable = 0x2203, + DW_AT_SUN_count_guarantee = 0x2204, + DW_AT_SUN_command_line = 0x2205, + DW_AT_SUN_vbase = 0x2206, + DW_AT_SUN_compile_options = 0x2207, + DW_AT_SUN_language = 0x2208, + DW_AT_SUN_browser_file = 0x2209, + DW_AT_SUN_vtable_abi = 0x2210, + DW_AT_SUN_func_offsets = 0x2211, + DW_AT_SUN_cf_kind = 0x2212, + DW_AT_SUN_vtable_index = 0x2213, + DW_AT_SUN_omp_tpriv_addr = 0x2214, + DW_AT_SUN_omp_child_func = 0x2215, + DW_AT_SUN_func_offset = 0x2216, + DW_AT_SUN_memop_type_ref = 0x2217, + DW_AT_SUN_profile_id = 0x2218, + DW_AT_SUN_memop_signature = 0x2219, + DW_AT_SUN_obj_dir = 0x2220, + DW_AT_SUN_obj_file = 0x2221, + DW_AT_SUN_original_name = 0x2222, + DW_AT_SUN_hwcprof_signature = 0x2223, + DW_AT_SUN_amd64_parmdump = 0x2224, + DW_AT_SUN_part_link_name = 0x2225, + DW_AT_SUN_link_name = 0x2226, + DW_AT_SUN_pass_with_const = 0x2227, + DW_AT_SUN_return_with_const = 0x2228, + DW_AT_SUN_import_by_name = 0x2229, + DW_AT_SUN_f90_pointer = 0x222a, + DW_AT_SUN_pass_by_ref = 0x222b, + DW_AT_SUN_f90_allocatable = 0x222c, + DW_AT_SUN_f90_assumed_shape_array = 0x222d, + DW_AT_SUN_c_vla = 0x222e, + DW_AT_SUN_return_value_ptr = 0x2230, + DW_AT_SUN_dtor_start = 0x2231, + DW_AT_SUN_dtor_length = 0x2232, + DW_AT_SUN_dtor_state_initial = 0x2233, + DW_AT_SUN_dtor_state_final = 0x2234, + DW_AT_SUN_dtor_state_deltas = 0x2235, + DW_AT_SUN_import_by_lname = 0x2236, + DW_AT_SUN_f90_use_only = 0x2237, + DW_AT_SUN_namelist_spec = 0x2238, + DW_AT_SUN_is_omp_child_func = 0x2239, + DW_AT_SUN_fortran_main_alias = 0x223a, + DW_AT_SUN_fortran_based = 0x223b, + + DW_AT_ALTIUM_loclist = 0x2300, + + DW_AT_use_GNAT_descriptive_type = 0x2301, + DW_AT_GNAT_descriptive_type = 0x2302, + DW_AT_GNU_numerator = 0x2303, + DW_AT_GNU_denominator = 0x2304, + DW_AT_GNU_bias = 0x2305, + + DW_AT_upc_threads_scaled = 0x3210, + +// PGI (STMicroelectronics) extensions. + DW_AT_PGI_lbase = 0x3a00, + DW_AT_PGI_soffset = 0x3a01, + DW_AT_PGI_lstride = 0x3a02, + +// Borland extensions. + DW_AT_BORLAND_property_read = 0x3b11, + DW_AT_BORLAND_property_write = 0x3b12, + DW_AT_BORLAND_property_implements = 0x3b13, + DW_AT_BORLAND_property_index = 0x3b14, + DW_AT_BORLAND_property_default = 0x3b15, + DW_AT_BORLAND_Delphi_unit = 0x3b20, + DW_AT_BORLAND_Delphi_class = 0x3b21, + DW_AT_BORLAND_Delphi_record = 0x3b22, + DW_AT_BORLAND_Delphi_metaclass = 0x3b23, + DW_AT_BORLAND_Delphi_constructor = 0x3b24, + DW_AT_BORLAND_Delphi_destructor = 0x3b25, + DW_AT_BORLAND_Delphi_anonymous_method = 0x3b26, + DW_AT_BORLAND_Delphi_interface = 0x3b27, + DW_AT_BORLAND_Delphi_ABI = 0x3b28, + DW_AT_BORLAND_Delphi_return = 0x3b29, + DW_AT_BORLAND_Delphi_frameptr = 0x3b30, + DW_AT_BORLAND_closure = 0x3b31, + +// LLVM project extensions. + DW_AT_LLVM_include_path = 0x3e00, + DW_AT_LLVM_config_macros = 0x3e01, + DW_AT_LLVM_isysroot = 0x3e02, + +// Apple extensions. + DW_AT_APPLE_optimized = 0x3fe1, + DW_AT_APPLE_flags = 0x3fe2, + DW_AT_APPLE_isa = 0x3fe3, + DW_AT_APPLE_block = 0x3fe4, + DW_AT_APPLE_major_runtime_vers = 0x3fe5, + DW_AT_APPLE_runtime_class = 0x3fe6, + DW_AT_APPLE_omit_frame_ptr = 0x3fe7, + DW_AT_APPLE_property_name = 0x3fe8, + DW_AT_APPLE_property_getter = 0x3fe9, + DW_AT_APPLE_property_setter = 0x3fea, + DW_AT_APPLE_property_attribute = 0x3feb, + DW_AT_APPLE_objc_complete_type = 0x3fec, + DW_AT_APPLE_property = 0x3fed +}); + +dw!( +/// The attribute form encodings for DIE attributes. +/// +/// See Section 7.5.6, Table 7.6. +DwForm(u16) { + DW_FORM_null = 0x00, + + DW_FORM_addr = 0x01, + DW_FORM_block2 = 0x03, + DW_FORM_block4 = 0x04, + DW_FORM_data2 = 0x05, + DW_FORM_data4 = 0x06, + DW_FORM_data8 = 0x07, + DW_FORM_string = 0x08, + DW_FORM_block = 0x09, + DW_FORM_block1 = 0x0a, + DW_FORM_data1 = 0x0b, + DW_FORM_flag = 0x0c, + DW_FORM_sdata = 0x0d, + DW_FORM_strp = 0x0e, + DW_FORM_udata = 0x0f, + DW_FORM_ref_addr = 0x10, + DW_FORM_ref1 = 0x11, + DW_FORM_ref2 = 0x12, + DW_FORM_ref4 = 0x13, + DW_FORM_ref8 = 0x14, + DW_FORM_ref_udata = 0x15, + DW_FORM_indirect = 0x16, + +// DWARF 4. + DW_FORM_sec_offset = 0x17, + DW_FORM_exprloc = 0x18, + DW_FORM_flag_present = 0x19, + DW_FORM_ref_sig8 = 0x20, + +// DWARF 5. + DW_FORM_strx = 0x1a, + DW_FORM_addrx = 0x1b, + DW_FORM_ref_sup4 = 0x1c, + DW_FORM_strp_sup = 0x1d, + DW_FORM_data16 = 0x1e, + DW_FORM_line_strp = 0x1f, + DW_FORM_implicit_const = 0x21, + DW_FORM_loclistx = 0x22, + DW_FORM_rnglistx = 0x23, + DW_FORM_ref_sup8 = 0x24, + DW_FORM_strx1 = 0x25, + DW_FORM_strx2 = 0x26, + DW_FORM_strx3 = 0x27, + DW_FORM_strx4 = 0x28, + DW_FORM_addrx1 = 0x29, + DW_FORM_addrx2 = 0x2a, + DW_FORM_addrx3 = 0x2b, + DW_FORM_addrx4 = 0x2c, + +// Extensions for Fission proposal + DW_FORM_GNU_addr_index = 0x1f01, + DW_FORM_GNU_str_index = 0x1f02, + +// Alternate debug sections proposal (output of "dwz" tool). + DW_FORM_GNU_ref_alt = 0x1f20, + DW_FORM_GNU_strp_alt = 0x1f21 +}); + +dw!( +/// The encodings of the constants used in the `DW_AT_encoding` attribute. +/// +/// See Section 7.8, Table 7.11. +DwAte(u8) { + DW_ATE_address = 0x01, + DW_ATE_boolean = 0x02, + DW_ATE_complex_float = 0x03, + DW_ATE_float = 0x04, + DW_ATE_signed = 0x05, + DW_ATE_signed_char = 0x06, + DW_ATE_unsigned = 0x07, + DW_ATE_unsigned_char = 0x08, + +// DWARF 3. + DW_ATE_imaginary_float = 0x09, + DW_ATE_packed_decimal = 0x0a, + DW_ATE_numeric_string = 0x0b, + DW_ATE_edited = 0x0c, + DW_ATE_signed_fixed = 0x0d, + DW_ATE_unsigned_fixed = 0x0e, + DW_ATE_decimal_float = 0x0f , + +// DWARF 4. + DW_ATE_UTF = 0x10, + DW_ATE_UCS = 0x11, + DW_ATE_ASCII = 0x12, + + DW_ATE_lo_user = 0x80, + DW_ATE_hi_user = 0xff, +}); + +dw!( +/// The encodings of the constants used in location list entries. +/// +/// See Section 7.7.3, Table 7.10. +DwLle(u8) { + DW_LLE_end_of_list = 0x00, + DW_LLE_base_addressx = 0x01, + DW_LLE_startx_endx = 0x02, + DW_LLE_startx_length = 0x03, + DW_LLE_offset_pair = 0x04, + DW_LLE_default_location = 0x05, + DW_LLE_base_address = 0x06, + DW_LLE_start_end = 0x07, + DW_LLE_start_length = 0x08, + DW_LLE_GNU_view_pair = 0x09, +}); + +dw!( +/// The encodings of the constants used in the `DW_AT_decimal_sign` attribute. +/// +/// See Section 7.8, Table 7.12. +DwDs(u8) { + DW_DS_unsigned = 0x01, + DW_DS_leading_overpunch = 0x02, + DW_DS_trailing_overpunch = 0x03, + DW_DS_leading_separate = 0x04, + DW_DS_trailing_separate = 0x05, +}); + +dw!( +/// The encodings of the constants used in the `DW_AT_endianity` attribute. +/// +/// See Section 7.8, Table 7.13. +DwEnd(u8) { + DW_END_default = 0x00, + DW_END_big = 0x01, + DW_END_little = 0x02, + DW_END_lo_user = 0x40, + DW_END_hi_user = 0xff, +}); + +dw!( +/// The encodings of the constants used in the `DW_AT_accessibility` attribute. +/// +/// See Section 7.9, Table 7.14. +DwAccess(u8) { + DW_ACCESS_public = 0x01, + DW_ACCESS_protected = 0x02, + DW_ACCESS_private = 0x03, +}); + +dw!( +/// The encodings of the constants used in the `DW_AT_visibility` attribute. +/// +/// See Section 7.10, Table 7.15. +DwVis(u8) { + DW_VIS_local = 0x01, + DW_VIS_exported = 0x02, + DW_VIS_qualified = 0x03, +}); + +dw!( +/// The encodings of the constants used in the `DW_AT_virtuality` attribute. +/// +/// See Section 7.11, Table 7.16. +DwVirtuality(u8) { + DW_VIRTUALITY_none = 0x00, + DW_VIRTUALITY_virtual = 0x01, + DW_VIRTUALITY_pure_virtual = 0x02, +}); + +dw!( +/// The encodings of the constants used in the `DW_AT_language` attribute. +/// +/// See Section 7.12, Table 7.17. +DwLang(u16) { + DW_LANG_C89 = 0x0001, + DW_LANG_C = 0x0002, + DW_LANG_Ada83 = 0x0003, + DW_LANG_C_plus_plus = 0x0004, + DW_LANG_Cobol74 = 0x0005, + DW_LANG_Cobol85 = 0x0006, + DW_LANG_Fortran77 = 0x0007, + DW_LANG_Fortran90 = 0x0008, + DW_LANG_Pascal83 = 0x0009, + DW_LANG_Modula2 = 0x000a, + DW_LANG_Java = 0x000b, + DW_LANG_C99 = 0x000c, + DW_LANG_Ada95 = 0x000d, + DW_LANG_Fortran95 = 0x000e, + DW_LANG_PLI = 0x000f, + DW_LANG_ObjC = 0x0010, + DW_LANG_ObjC_plus_plus = 0x0011, + DW_LANG_UPC = 0x0012, + DW_LANG_D = 0x0013, + DW_LANG_Python = 0x0014, + DW_LANG_OpenCL = 0x0015, + DW_LANG_Go = 0x0016, + DW_LANG_Modula3 = 0x0017, + DW_LANG_Haskell = 0x0018, + DW_LANG_C_plus_plus_03 = 0x0019, + DW_LANG_C_plus_plus_11 = 0x001a, + DW_LANG_OCaml = 0x001b, + DW_LANG_Rust = 0x001c, + DW_LANG_C11 = 0x001d, + DW_LANG_Swift = 0x001e, + DW_LANG_Julia = 0x001f, + DW_LANG_Dylan = 0x0020, + DW_LANG_C_plus_plus_14 = 0x0021, + DW_LANG_Fortran03 = 0x0022, + DW_LANG_Fortran08 = 0x0023, + DW_LANG_RenderScript = 0x0024, + DW_LANG_BLISS = 0x0025, + DW_LANG_Kotlin = 0x0026, + DW_LANG_Zig = 0x0027, + DW_LANG_Crystal = 0x0028, + DW_LANG_C_plus_plus_17 = 0x002a, + DW_LANG_C_plus_plus_20 = 0x002b, + DW_LANG_C17 = 0x002c, + DW_LANG_Fortran18 = 0x002d, + DW_LANG_Ada2005 = 0x002e, + DW_LANG_Ada2012 = 0x002f, + + DW_LANG_lo_user = 0x8000, + DW_LANG_hi_user = 0xffff, + + DW_LANG_Mips_Assembler = 0x8001, + DW_LANG_GOOGLE_RenderScript = 0x8e57, + DW_LANG_SUN_Assembler = 0x9001, + DW_LANG_ALTIUM_Assembler = 0x9101, + DW_LANG_BORLAND_Delphi = 0xb000, +}); + +impl DwLang { + /// Get the default DW_AT_lower_bound for this language. + pub fn default_lower_bound(self) -> Option { + match self { + DW_LANG_C89 + | DW_LANG_C + | DW_LANG_C_plus_plus + | DW_LANG_Java + | DW_LANG_C99 + | DW_LANG_ObjC + | DW_LANG_ObjC_plus_plus + | DW_LANG_UPC + | DW_LANG_D + | DW_LANG_Python + | DW_LANG_OpenCL + | DW_LANG_Go + | DW_LANG_Haskell + | DW_LANG_C_plus_plus_03 + | DW_LANG_C_plus_plus_11 + | DW_LANG_OCaml + | DW_LANG_Rust + | DW_LANG_C11 + | DW_LANG_Swift + | DW_LANG_Dylan + | DW_LANG_C_plus_plus_14 + | DW_LANG_RenderScript + | DW_LANG_BLISS => Some(0), + DW_LANG_Ada83 | DW_LANG_Cobol74 | DW_LANG_Cobol85 | DW_LANG_Fortran77 + | DW_LANG_Fortran90 | DW_LANG_Pascal83 | DW_LANG_Modula2 | DW_LANG_Ada95 + | DW_LANG_Fortran95 | DW_LANG_PLI | DW_LANG_Modula3 | DW_LANG_Julia + | DW_LANG_Fortran03 | DW_LANG_Fortran08 => Some(1), + _ => None, + } + } +} + +dw!( +/// The encodings of the constants used in the `DW_AT_address_class` attribute. +/// +/// There is only one value that is common to all target architectures. +/// See Section 7.13. +DwAddr(u64) { + DW_ADDR_none = 0x00, +}); + +dw!( +/// The encodings of the constants used in the `DW_AT_identifier_case` attribute. +/// +/// See Section 7.14, Table 7.18. +DwId(u8) { + DW_ID_case_sensitive = 0x00, + DW_ID_up_case = 0x01, + DW_ID_down_case = 0x02, + DW_ID_case_insensitive = 0x03, +}); + +dw!( +/// The encodings of the constants used in the `DW_AT_calling_convention` attribute. +/// +/// See Section 7.15, Table 7.19. +DwCc(u8) { + DW_CC_normal = 0x01, + DW_CC_program = 0x02, + DW_CC_nocall = 0x03, + DW_CC_pass_by_reference = 0x04, + DW_CC_pass_by_value = 0x05, + DW_CC_lo_user = 0x40, + DW_CC_hi_user = 0xff, +}); + +dw!( +/// The encodings of the constants used in the `DW_AT_inline` attribute. +/// +/// See Section 7.16, Table 7.20. +DwInl(u8) { + DW_INL_not_inlined = 0x00, + DW_INL_inlined = 0x01, + DW_INL_declared_not_inlined = 0x02, + DW_INL_declared_inlined = 0x03, +}); + +dw!( +/// The encodings of the constants used in the `DW_AT_ordering` attribute. +/// +/// See Section 7.17, Table 7.17. +DwOrd(u8) { + DW_ORD_row_major = 0x00, + DW_ORD_col_major = 0x01, +}); + +dw!( +/// The encodings of the constants used in the `DW_AT_discr_list` attribute. +/// +/// See Section 7.18, Table 7.22. +DwDsc(u8) { + DW_DSC_label = 0x00, + DW_DSC_range = 0x01, +}); + +dw!( +/// Name index attribute encodings. +/// +/// See Section 7.19, Table 7.23. +DwIdx(u16) { + DW_IDX_compile_unit = 1, + DW_IDX_type_unit = 2, + DW_IDX_die_offset = 3, + DW_IDX_parent = 4, + DW_IDX_type_hash = 5, + DW_IDX_lo_user = 0x2000, + DW_IDX_hi_user = 0x3fff, +}); + +dw!( +/// The encodings of the constants used in the `DW_AT_defaulted` attribute. +/// +/// See Section 7.20, Table 7.24. +DwDefaulted(u8) { + DW_DEFAULTED_no = 0x00, + DW_DEFAULTED_in_class = 0x01, + DW_DEFAULTED_out_of_class = 0x02, +}); + +dw!( +/// The encodings for the standard opcodes for line number information. +/// +/// See Section 7.22, Table 7.25. +DwLns(u8) { + DW_LNS_copy = 0x01, + DW_LNS_advance_pc = 0x02, + DW_LNS_advance_line = 0x03, + DW_LNS_set_file = 0x04, + DW_LNS_set_column = 0x05, + DW_LNS_negate_stmt = 0x06, + DW_LNS_set_basic_block = 0x07, + DW_LNS_const_add_pc = 0x08, + DW_LNS_fixed_advance_pc = 0x09, + DW_LNS_set_prologue_end = 0x0a, + DW_LNS_set_epilogue_begin = 0x0b, + DW_LNS_set_isa = 0x0c, +}); + +dw!( +/// The encodings for the extended opcodes for line number information. +/// +/// See Section 7.22, Table 7.26. +DwLne(u8) { + DW_LNE_end_sequence = 0x01, + DW_LNE_set_address = 0x02, + DW_LNE_define_file = 0x03, + DW_LNE_set_discriminator = 0x04, + + DW_LNE_lo_user = 0x80, + DW_LNE_hi_user = 0xff, +}); + +dw!( +/// The encodings for the line number header entry formats. +/// +/// See Section 7.22, Table 7.27. +DwLnct(u16) { + DW_LNCT_path = 0x1, + DW_LNCT_directory_index = 0x2, + DW_LNCT_timestamp = 0x3, + DW_LNCT_size = 0x4, + DW_LNCT_MD5 = 0x5, + DW_LNCT_lo_user = 0x2000, + DW_LNCT_hi_user = 0x3fff, +}); + +dw!( +/// The encodings for macro information entry types. +/// +/// See Section 7.23, Table 7.28. +DwMacro(u8) { + DW_MACRO_define = 0x01, + DW_MACRO_undef = 0x02, + DW_MACRO_start_file = 0x03, + DW_MACRO_end_file = 0x04, + DW_MACRO_define_strp = 0x05, + DW_MACRO_undef_strp = 0x06, + DW_MACRO_import = 0x07, + DW_MACRO_define_sup = 0x08, + DW_MACRO_undef_sup = 0x09, + DW_MACRO_import_sup = 0x0a, + DW_MACRO_define_strx = 0x0b, + DW_MACRO_undef_strx = 0x0c, + DW_MACRO_lo_user = 0xe0, + DW_MACRO_hi_user = 0xff, +}); + +dw!( +/// Range list entry encoding values. +/// +/// See Section 7.25, Table 7.30. +DwRle(u8) { + DW_RLE_end_of_list = 0x00, + DW_RLE_base_addressx = 0x01, + DW_RLE_startx_endx = 0x02, + DW_RLE_startx_length = 0x03, + DW_RLE_offset_pair = 0x04, + DW_RLE_base_address = 0x05, + DW_RLE_start_end = 0x06, + DW_RLE_start_length = 0x07, +}); + +dw!( +/// The encodings for DWARF expression operations. +/// +/// See Section 7.7.1, Table 7.9. +DwOp(u8) { + DW_OP_addr = 0x03, + DW_OP_deref = 0x06, + DW_OP_const1u = 0x08, + DW_OP_const1s = 0x09, + DW_OP_const2u = 0x0a, + DW_OP_const2s = 0x0b, + DW_OP_const4u = 0x0c, + DW_OP_const4s = 0x0d, + DW_OP_const8u = 0x0e, + DW_OP_const8s = 0x0f, + DW_OP_constu = 0x10, + DW_OP_consts = 0x11, + DW_OP_dup = 0x12, + DW_OP_drop = 0x13, + DW_OP_over = 0x14, + DW_OP_pick = 0x15, + DW_OP_swap = 0x16, + DW_OP_rot = 0x17, + DW_OP_xderef = 0x18, + DW_OP_abs = 0x19, + DW_OP_and = 0x1a, + DW_OP_div = 0x1b, + DW_OP_minus = 0x1c, + DW_OP_mod = 0x1d, + DW_OP_mul = 0x1e, + DW_OP_neg = 0x1f, + DW_OP_not = 0x20, + DW_OP_or = 0x21, + DW_OP_plus = 0x22, + DW_OP_plus_uconst = 0x23, + DW_OP_shl = 0x24, + DW_OP_shr = 0x25, + DW_OP_shra = 0x26, + DW_OP_xor = 0x27, + DW_OP_bra = 0x28, + DW_OP_eq = 0x29, + DW_OP_ge = 0x2a, + DW_OP_gt = 0x2b, + DW_OP_le = 0x2c, + DW_OP_lt = 0x2d, + DW_OP_ne = 0x2e, + DW_OP_skip = 0x2f, + DW_OP_lit0 = 0x30, + DW_OP_lit1 = 0x31, + DW_OP_lit2 = 0x32, + DW_OP_lit3 = 0x33, + DW_OP_lit4 = 0x34, + DW_OP_lit5 = 0x35, + DW_OP_lit6 = 0x36, + DW_OP_lit7 = 0x37, + DW_OP_lit8 = 0x38, + DW_OP_lit9 = 0x39, + DW_OP_lit10 = 0x3a, + DW_OP_lit11 = 0x3b, + DW_OP_lit12 = 0x3c, + DW_OP_lit13 = 0x3d, + DW_OP_lit14 = 0x3e, + DW_OP_lit15 = 0x3f, + DW_OP_lit16 = 0x40, + DW_OP_lit17 = 0x41, + DW_OP_lit18 = 0x42, + DW_OP_lit19 = 0x43, + DW_OP_lit20 = 0x44, + DW_OP_lit21 = 0x45, + DW_OP_lit22 = 0x46, + DW_OP_lit23 = 0x47, + DW_OP_lit24 = 0x48, + DW_OP_lit25 = 0x49, + DW_OP_lit26 = 0x4a, + DW_OP_lit27 = 0x4b, + DW_OP_lit28 = 0x4c, + DW_OP_lit29 = 0x4d, + DW_OP_lit30 = 0x4e, + DW_OP_lit31 = 0x4f, + DW_OP_reg0 = 0x50, + DW_OP_reg1 = 0x51, + DW_OP_reg2 = 0x52, + DW_OP_reg3 = 0x53, + DW_OP_reg4 = 0x54, + DW_OP_reg5 = 0x55, + DW_OP_reg6 = 0x56, + DW_OP_reg7 = 0x57, + DW_OP_reg8 = 0x58, + DW_OP_reg9 = 0x59, + DW_OP_reg10 = 0x5a, + DW_OP_reg11 = 0x5b, + DW_OP_reg12 = 0x5c, + DW_OP_reg13 = 0x5d, + DW_OP_reg14 = 0x5e, + DW_OP_reg15 = 0x5f, + DW_OP_reg16 = 0x60, + DW_OP_reg17 = 0x61, + DW_OP_reg18 = 0x62, + DW_OP_reg19 = 0x63, + DW_OP_reg20 = 0x64, + DW_OP_reg21 = 0x65, + DW_OP_reg22 = 0x66, + DW_OP_reg23 = 0x67, + DW_OP_reg24 = 0x68, + DW_OP_reg25 = 0x69, + DW_OP_reg26 = 0x6a, + DW_OP_reg27 = 0x6b, + DW_OP_reg28 = 0x6c, + DW_OP_reg29 = 0x6d, + DW_OP_reg30 = 0x6e, + DW_OP_reg31 = 0x6f, + DW_OP_breg0 = 0x70, + DW_OP_breg1 = 0x71, + DW_OP_breg2 = 0x72, + DW_OP_breg3 = 0x73, + DW_OP_breg4 = 0x74, + DW_OP_breg5 = 0x75, + DW_OP_breg6 = 0x76, + DW_OP_breg7 = 0x77, + DW_OP_breg8 = 0x78, + DW_OP_breg9 = 0x79, + DW_OP_breg10 = 0x7a, + DW_OP_breg11 = 0x7b, + DW_OP_breg12 = 0x7c, + DW_OP_breg13 = 0x7d, + DW_OP_breg14 = 0x7e, + DW_OP_breg15 = 0x7f, + DW_OP_breg16 = 0x80, + DW_OP_breg17 = 0x81, + DW_OP_breg18 = 0x82, + DW_OP_breg19 = 0x83, + DW_OP_breg20 = 0x84, + DW_OP_breg21 = 0x85, + DW_OP_breg22 = 0x86, + DW_OP_breg23 = 0x87, + DW_OP_breg24 = 0x88, + DW_OP_breg25 = 0x89, + DW_OP_breg26 = 0x8a, + DW_OP_breg27 = 0x8b, + DW_OP_breg28 = 0x8c, + DW_OP_breg29 = 0x8d, + DW_OP_breg30 = 0x8e, + DW_OP_breg31 = 0x8f, + DW_OP_regx = 0x90, + DW_OP_fbreg = 0x91, + DW_OP_bregx = 0x92, + DW_OP_piece = 0x93, + DW_OP_deref_size = 0x94, + DW_OP_xderef_size = 0x95, + DW_OP_nop = 0x96, + DW_OP_push_object_address = 0x97, + DW_OP_call2 = 0x98, + DW_OP_call4 = 0x99, + DW_OP_call_ref = 0x9a, + DW_OP_form_tls_address = 0x9b, + DW_OP_call_frame_cfa = 0x9c, + DW_OP_bit_piece = 0x9d, + DW_OP_implicit_value = 0x9e, + DW_OP_stack_value = 0x9f, + DW_OP_implicit_pointer = 0xa0, + DW_OP_addrx = 0xa1, + DW_OP_constx = 0xa2, + DW_OP_entry_value = 0xa3, + DW_OP_const_type = 0xa4, + DW_OP_regval_type = 0xa5, + DW_OP_deref_type = 0xa6, + DW_OP_xderef_type = 0xa7, + DW_OP_convert = 0xa8, + DW_OP_reinterpret = 0xa9, + + // GNU extensions + DW_OP_GNU_push_tls_address = 0xe0, + DW_OP_GNU_implicit_pointer = 0xf2, + DW_OP_GNU_entry_value = 0xf3, + DW_OP_GNU_const_type = 0xf4, + DW_OP_GNU_regval_type = 0xf5, + DW_OP_GNU_deref_type = 0xf6, + DW_OP_GNU_convert = 0xf7, + DW_OP_GNU_reinterpret = 0xf9, + DW_OP_GNU_parameter_ref = 0xfa, + DW_OP_GNU_addr_index = 0xfb, + DW_OP_GNU_const_index = 0xfc, + + // Wasm extensions + DW_OP_WASM_location = 0xed, +}); + +dw!( +/// Pointer encoding used by `.eh_frame`. +/// +/// The four lower bits describe the +/// format of the pointer, the upper four bits describe how the encoding should +/// be applied. +/// +/// Defined in `` +DwEhPe(u8) { +// Format of pointer encoding. + +// "Unsigned value is encoded using the Little Endian Base 128" + DW_EH_PE_uleb128 = 0x1, +// "A 2 bytes unsigned value." + DW_EH_PE_udata2 = 0x2, +// "A 4 bytes unsigned value." + DW_EH_PE_udata4 = 0x3, +// "An 8 bytes unsigned value." + DW_EH_PE_udata8 = 0x4, +// "Signed value is encoded using the Little Endian Base 128" + DW_EH_PE_sleb128 = 0x9, +// "A 2 bytes signed value." + DW_EH_PE_sdata2 = 0x0a, +// "A 4 bytes signed value." + DW_EH_PE_sdata4 = 0x0b, +// "An 8 bytes signed value." + DW_EH_PE_sdata8 = 0x0c, + +// How the pointer encoding should be applied. + +// `DW_EH_PE_pcrel` pointers are relative to their own location. + DW_EH_PE_pcrel = 0x10, +// "Value is relative to the beginning of the .text section." + DW_EH_PE_textrel = 0x20, +// "Value is relative to the beginning of the .got or .eh_frame_hdr section." + DW_EH_PE_datarel = 0x30, +// "Value is relative to the beginning of the function." + DW_EH_PE_funcrel = 0x40, +// "Value is aligned to an address unit sized boundary." + DW_EH_PE_aligned = 0x50, + +// This bit can be set for any of the above encoding applications. When set, +// the encoded value is the address of the real pointer result, not the +// pointer result itself. +// +// This isn't defined in the DWARF or the `.eh_frame` standards, but is +// generated by both GNU/Linux and macOS tooling. + DW_EH_PE_indirect = 0x80, + +// These constants apply to both the lower and upper bits. + +// "The Value is a literal pointer whose size is determined by the +// architecture." + DW_EH_PE_absptr = 0x0, +// The absence of a pointer and encoding. + DW_EH_PE_omit = 0xff, +}); + +const DW_EH_PE_FORMAT_MASK: u8 = 0b0000_1111; + +// Ignores indirection bit. +const DW_EH_PE_APPLICATION_MASK: u8 = 0b0111_0000; + +impl ops::BitOr for DwEhPe { + type Output = DwEhPe; + + fn bitor(self, rhs: DwEhPe) -> DwEhPe { + DwEhPe(self.0 | rhs.0) + } +} + +impl DwEhPe { + /// Get the pointer encoding's format. + #[inline] + pub fn format(self) -> DwEhPe { + DwEhPe(self.0 & DW_EH_PE_FORMAT_MASK) + } + + /// Get the pointer encoding's application. + #[inline] + pub fn application(self) -> DwEhPe { + DwEhPe(self.0 & DW_EH_PE_APPLICATION_MASK) + } + + /// Is this encoding the absent pointer encoding? + #[inline] + pub fn is_absent(self) -> bool { + self == DW_EH_PE_omit + } + + /// Is this coding indirect? If so, its encoded value is the address of the + /// real pointer result, not the pointer result itself. + #[inline] + pub fn is_indirect(self) -> bool { + self.0 & DW_EH_PE_indirect.0 != 0 + } + + /// Is this a known, valid pointer encoding? + pub fn is_valid_encoding(self) -> bool { + if self.is_absent() { + return true; + } + + match self.format() { + DW_EH_PE_absptr | DW_EH_PE_uleb128 | DW_EH_PE_udata2 | DW_EH_PE_udata4 + | DW_EH_PE_udata8 | DW_EH_PE_sleb128 | DW_EH_PE_sdata2 | DW_EH_PE_sdata4 + | DW_EH_PE_sdata8 => {} + _ => return false, + } + + match self.application() { + DW_EH_PE_absptr | DW_EH_PE_pcrel | DW_EH_PE_textrel | DW_EH_PE_datarel + | DW_EH_PE_funcrel | DW_EH_PE_aligned => {} + _ => return false, + } + + true + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_dw_eh_pe_format() { + let encoding = DW_EH_PE_pcrel | DW_EH_PE_uleb128; + assert_eq!(encoding.format(), DW_EH_PE_uleb128); + } + + #[test] + fn test_dw_eh_pe_application() { + let encoding = DW_EH_PE_pcrel | DW_EH_PE_uleb128; + assert_eq!(encoding.application(), DW_EH_PE_pcrel); + } + + #[test] + fn test_dw_eh_pe_is_absent() { + assert!(!DW_EH_PE_absptr.is_absent()); + assert!(DW_EH_PE_omit.is_absent()); + } + + #[test] + fn test_dw_eh_pe_is_valid_encoding_ok() { + let encoding = DW_EH_PE_uleb128 | DW_EH_PE_pcrel; + assert!(encoding.is_valid_encoding()); + assert!(DW_EH_PE_absptr.is_valid_encoding()); + assert!(DW_EH_PE_omit.is_valid_encoding()); + } + + #[test] + fn test_dw_eh_pe_is_valid_encoding_bad_format() { + let encoding = DwEhPe((DW_EH_PE_sdata8.0 + 1) | DW_EH_PE_pcrel.0); + assert!(!encoding.is_valid_encoding()); + } + + #[test] + fn test_dw_eh_pe_is_valid_encoding_bad_application() { + let encoding = DwEhPe(DW_EH_PE_sdata8.0 | (DW_EH_PE_aligned.0 + 1)); + assert!(!encoding.is_valid_encoding()); + } +} diff --git a/third_party/rust/gimli/src/endianity.rs b/third_party/rust/gimli/src/endianity.rs new file mode 100644 index 000000000000..3201551f1b8a --- /dev/null +++ b/third_party/rust/gimli/src/endianity.rs @@ -0,0 +1,256 @@ +//! Types for compile-time and run-time endianity. + +use core::convert::TryInto; +use core::fmt::Debug; + +/// A trait describing the endianity of some buffer. +pub trait Endianity: Debug + Default + Clone + Copy + PartialEq + Eq { + /// Return true for big endian byte order. + fn is_big_endian(self) -> bool; + + /// Return true for little endian byte order. + #[inline] + fn is_little_endian(self) -> bool { + !self.is_big_endian() + } + + /// Reads an unsigned 16 bit integer from `buf`. + /// + /// # Panics + /// + /// Panics when `buf.len() < 2`. + #[inline] + fn read_u16(self, buf: &[u8]) -> u16 { + let bytes: &[u8; 2] = buf[..2].try_into().unwrap(); + if self.is_big_endian() { + u16::from_be_bytes(*bytes) + } else { + u16::from_le_bytes(*bytes) + } + } + + /// Reads an unsigned 32 bit integer from `buf`. + /// + /// # Panics + /// + /// Panics when `buf.len() < 4`. + #[inline] + fn read_u32(self, buf: &[u8]) -> u32 { + let bytes: &[u8; 4] = buf[..4].try_into().unwrap(); + if self.is_big_endian() { + u32::from_be_bytes(*bytes) + } else { + u32::from_le_bytes(*bytes) + } + } + + /// Reads an unsigned 64 bit integer from `buf`. + /// + /// # Panics + /// + /// Panics when `buf.len() < 8`. + #[inline] + fn read_u64(self, buf: &[u8]) -> u64 { + let bytes: &[u8; 8] = buf[..8].try_into().unwrap(); + if self.is_big_endian() { + u64::from_be_bytes(*bytes) + } else { + u64::from_le_bytes(*bytes) + } + } + + /// Read an unsigned n-bytes integer u64. + /// + /// # Panics + /// + /// Panics when `buf.len() < 1` or `buf.len() > 8`. + #[inline] + fn read_uint(&mut self, buf: &[u8]) -> u64 { + let mut tmp = [0; 8]; + if self.is_big_endian() { + tmp[8 - buf.len()..].copy_from_slice(buf); + } else { + tmp[..buf.len()].copy_from_slice(buf); + } + self.read_u64(&tmp) + } + + /// Reads a signed 16 bit integer from `buf`. + /// + /// # Panics + /// + /// Panics when `buf.len() < 2`. + #[inline] + fn read_i16(self, buf: &[u8]) -> i16 { + self.read_u16(buf) as i16 + } + + /// Reads a signed 32 bit integer from `buf`. + /// + /// # Panics + /// + /// Panics when `buf.len() < 4`. + #[inline] + fn read_i32(self, buf: &[u8]) -> i32 { + self.read_u32(buf) as i32 + } + + /// Reads a signed 64 bit integer from `buf`. + /// + /// # Panics + /// + /// Panics when `buf.len() < 8`. + #[inline] + fn read_i64(self, buf: &[u8]) -> i64 { + self.read_u64(buf) as i64 + } + + /// Reads a 32 bit floating point number from `buf`. + /// + /// # Panics + /// + /// Panics when `buf.len() < 8`. + #[inline] + fn read_f32(self, buf: &[u8]) -> f32 { + f32::from_bits(self.read_u32(buf)) + } + + /// Reads a 32 bit floating point number from `buf`. + /// + /// # Panics + /// + /// Panics when `buf.len() < 8`. + #[inline] + fn read_f64(self, buf: &[u8]) -> f64 { + f64::from_bits(self.read_u64(buf)) + } + + /// Writes an unsigned 16 bit integer `n` to `buf`. + /// + /// # Panics + /// + /// Panics when `buf.len() < 2`. + #[inline] + fn write_u16(self, buf: &mut [u8], n: u16) { + let bytes = if self.is_big_endian() { + n.to_be_bytes() + } else { + n.to_le_bytes() + }; + buf[..2].copy_from_slice(&bytes); + } + + /// Writes an unsigned 32 bit integer `n` to `buf`. + /// + /// # Panics + /// + /// Panics when `buf.len() < 4`. + #[inline] + fn write_u32(self, buf: &mut [u8], n: u32) { + let bytes = if self.is_big_endian() { + n.to_be_bytes() + } else { + n.to_le_bytes() + }; + buf[..4].copy_from_slice(&bytes); + } + + /// Writes an unsigned 64 bit integer `n` to `buf`. + /// + /// # Panics + /// + /// Panics when `buf.len() < 8`. + #[inline] + fn write_u64(self, buf: &mut [u8], n: u64) { + let bytes = if self.is_big_endian() { + n.to_be_bytes() + } else { + n.to_le_bytes() + }; + buf[..8].copy_from_slice(&bytes); + } +} + +/// Byte order that is selectable at runtime. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum RunTimeEndian { + /// Little endian byte order. + Little, + /// Big endian byte order. + Big, +} + +impl Default for RunTimeEndian { + #[cfg(target_endian = "little")] + #[inline] + fn default() -> RunTimeEndian { + RunTimeEndian::Little + } + + #[cfg(target_endian = "big")] + #[inline] + fn default() -> RunTimeEndian { + RunTimeEndian::Big + } +} + +impl Endianity for RunTimeEndian { + #[inline] + fn is_big_endian(self) -> bool { + self != RunTimeEndian::Little + } +} + +/// Little endian byte order. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct LittleEndian; + +impl Default for LittleEndian { + #[inline] + fn default() -> LittleEndian { + LittleEndian + } +} + +impl Endianity for LittleEndian { + #[inline] + fn is_big_endian(self) -> bool { + false + } +} + +/// Big endian byte order. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct BigEndian; + +impl Default for BigEndian { + #[inline] + fn default() -> BigEndian { + BigEndian + } +} + +impl Endianity for BigEndian { + #[inline] + fn is_big_endian(self) -> bool { + true + } +} + +/// The native endianity for the target platform. +#[cfg(target_endian = "little")] +pub type NativeEndian = LittleEndian; + +#[cfg(target_endian = "little")] +#[allow(non_upper_case_globals)] +#[doc(hidden)] +pub const NativeEndian: LittleEndian = LittleEndian; + +/// The native endianity for the target platform. +#[cfg(target_endian = "big")] +pub type NativeEndian = BigEndian; + +#[cfg(target_endian = "big")] +#[allow(non_upper_case_globals)] +#[doc(hidden)] +pub const NativeEndian: BigEndian = BigEndian; diff --git a/third_party/rust/gimli/src/leb128.rs b/third_party/rust/gimli/src/leb128.rs new file mode 100644 index 000000000000..de81cfdcf1e2 --- /dev/null +++ b/third_party/rust/gimli/src/leb128.rs @@ -0,0 +1,612 @@ +//! Read and write DWARF's "Little Endian Base 128" (LEB128) variable length +//! integer encoding. +//! +//! The implementation is a direct translation of the psuedocode in the DWARF 4 +//! standard's appendix C. +//! +//! Read and write signed integers: +//! +//! ``` +//! # #[cfg(all(feature = "read", feature = "write"))] { +//! use gimli::{EndianSlice, NativeEndian, leb128}; +//! +//! let mut buf = [0; 1024]; +//! +//! // Write to anything that implements `std::io::Write`. +//! { +//! let mut writable = &mut buf[..]; +//! leb128::write::signed(&mut writable, -12345).expect("Should write number"); +//! } +//! +//! // Read from anything that implements `gimli::Reader`. +//! let mut readable = EndianSlice::new(&buf[..], NativeEndian); +//! let val = leb128::read::signed(&mut readable).expect("Should read number"); +//! assert_eq!(val, -12345); +//! # } +//! ``` +//! +//! Or read and write unsigned integers: +//! +//! ``` +//! # #[cfg(all(feature = "read", feature = "write"))] { +//! use gimli::{EndianSlice, NativeEndian, leb128}; +//! +//! let mut buf = [0; 1024]; +//! +//! { +//! let mut writable = &mut buf[..]; +//! leb128::write::unsigned(&mut writable, 98765).expect("Should write number"); +//! } +//! +//! let mut readable = EndianSlice::new(&buf[..], NativeEndian); +//! let val = leb128::read::unsigned(&mut readable).expect("Should read number"); +//! assert_eq!(val, 98765); +//! # } +//! ``` + +const CONTINUATION_BIT: u8 = 1 << 7; +#[cfg(feature = "read-core")] +const SIGN_BIT: u8 = 1 << 6; + +#[inline] +fn low_bits_of_byte(byte: u8) -> u8 { + byte & !CONTINUATION_BIT +} + +#[inline] +#[allow(dead_code)] +fn low_bits_of_u64(val: u64) -> u8 { + let byte = val & u64::from(core::u8::MAX); + low_bits_of_byte(byte as u8) +} + +/// A module for reading signed and unsigned integers that have been LEB128 +/// encoded. +#[cfg(feature = "read-core")] +pub mod read { + use super::{low_bits_of_byte, CONTINUATION_BIT, SIGN_BIT}; + use crate::read::{Error, Reader, Result}; + + /// Read bytes until the LEB128 continuation bit is not set. + pub fn skip(r: &mut R) -> Result<()> { + loop { + let byte = r.read_u8()?; + if byte & CONTINUATION_BIT == 0 { + return Ok(()); + } + } + } + + /// Read an unsigned LEB128 number from the given `Reader` and + /// return it or an error if reading failed. + pub fn unsigned(r: &mut R) -> Result { + let mut result = 0; + let mut shift = 0; + + loop { + let byte = r.read_u8()?; + if shift == 63 && byte != 0x00 && byte != 0x01 { + return Err(Error::BadUnsignedLeb128); + } + + let low_bits = u64::from(low_bits_of_byte(byte)); + result |= low_bits << shift; + + if byte & CONTINUATION_BIT == 0 { + return Ok(result); + } + + shift += 7; + } + } + + /// Read an LEB128 u16 from the given `Reader` and + /// return it or an error if reading failed. + pub fn u16(r: &mut R) -> Result { + let byte = r.read_u8()?; + let mut result = u16::from(low_bits_of_byte(byte)); + if byte & CONTINUATION_BIT == 0 { + return Ok(result); + } + + let byte = r.read_u8()?; + result |= u16::from(low_bits_of_byte(byte)) << 7; + if byte & CONTINUATION_BIT == 0 { + return Ok(result); + } + + let byte = r.read_u8()?; + if byte > 0x03 { + return Err(Error::BadUnsignedLeb128); + } + result += u16::from(byte) << 14; + Ok(result) + } + + /// Read a signed LEB128 number from the given `Reader` and + /// return it or an error if reading failed. + pub fn signed(r: &mut R) -> Result { + let mut result = 0; + let mut shift = 0; + let size = 64; + let mut byte; + + loop { + byte = r.read_u8()?; + if shift == 63 && byte != 0x00 && byte != 0x7f { + return Err(Error::BadSignedLeb128); + } + + let low_bits = i64::from(low_bits_of_byte(byte)); + result |= low_bits << shift; + shift += 7; + + if byte & CONTINUATION_BIT == 0 { + break; + } + } + + if shift < size && (SIGN_BIT & byte) == SIGN_BIT { + // Sign extend the result. + result |= !0 << shift; + } + + Ok(result) + } +} + +/// A module for writing integers encoded as LEB128. +#[cfg(feature = "write")] +pub mod write { + use super::{low_bits_of_u64, CONTINUATION_BIT}; + use std::io; + + /// Write the given unsigned number using the LEB128 encoding to the given + /// `std::io::Write`able. Returns the number of bytes written to `w`, or an + /// error if writing failed. + pub fn unsigned(w: &mut W, mut val: u64) -> Result + where + W: io::Write, + { + let mut bytes_written = 0; + loop { + let mut byte = low_bits_of_u64(val); + val >>= 7; + if val != 0 { + // More bytes to come, so set the continuation bit. + byte |= CONTINUATION_BIT; + } + + let buf = [byte]; + w.write_all(&buf)?; + bytes_written += 1; + + if val == 0 { + return Ok(bytes_written); + } + } + } + + /// Return the size of the LEB128 encoding of the given unsigned number. + pub fn uleb128_size(mut val: u64) -> usize { + let mut size = 0; + loop { + val >>= 7; + size += 1; + if val == 0 { + return size; + } + } + } + + /// Write the given signed number using the LEB128 encoding to the given + /// `std::io::Write`able. Returns the number of bytes written to `w`, or an + /// error if writing failed. + pub fn signed(w: &mut W, mut val: i64) -> Result + where + W: io::Write, + { + let mut bytes_written = 0; + loop { + let mut byte = val as u8; + // Keep the sign bit for testing + val >>= 6; + let done = val == 0 || val == -1; + if done { + byte &= !CONTINUATION_BIT; + } else { + // Remove the sign bit + val >>= 1; + // More bytes to come, so set the continuation bit. + byte |= CONTINUATION_BIT; + } + + let buf = [byte]; + w.write_all(&buf)?; + bytes_written += 1; + + if done { + return Ok(bytes_written); + } + } + } + + /// Return the size of the LEB128 encoding of the given signed number. + pub fn sleb128_size(mut val: i64) -> usize { + let mut size = 0; + loop { + val >>= 6; + let done = val == 0 || val == -1; + val >>= 1; + size += 1; + if done { + return size; + } + } + } +} + +#[cfg(test)] +#[cfg(all(feature = "read", feature = "write"))] +mod tests { + use super::{low_bits_of_byte, low_bits_of_u64, read, write, CONTINUATION_BIT}; + use crate::endianity::NativeEndian; + use crate::read::{EndianSlice, Error, ReaderOffsetId}; + + trait ResultExt { + fn map_eof(self, input: &[u8]) -> Self; + } + + impl ResultExt for Result { + fn map_eof(self, input: &[u8]) -> Self { + match self { + Err(Error::UnexpectedEof(id)) => { + let id = ReaderOffsetId(id.0 - input.as_ptr() as u64); + Err(Error::UnexpectedEof(id)) + } + r => r, + } + } + } + + #[test] + fn test_low_bits_of_byte() { + for i in 0..127 { + assert_eq!(i, low_bits_of_byte(i)); + assert_eq!(i, low_bits_of_byte(i | CONTINUATION_BIT)); + } + } + + #[test] + fn test_low_bits_of_u64() { + for i in 0u64..127 { + assert_eq!(i as u8, low_bits_of_u64(1 << 16 | i)); + assert_eq!( + i as u8, + low_bits_of_u64(i << 16 | i | (u64::from(CONTINUATION_BIT))) + ); + } + } + + // Examples from the DWARF 4 standard, section 7.6, figure 22. + #[test] + fn test_read_unsigned() { + let buf = [2u8]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + 2, + read::unsigned(&mut readable).expect("Should read number") + ); + + let buf = [127u8]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + 127, + read::unsigned(&mut readable).expect("Should read number") + ); + + let buf = [CONTINUATION_BIT, 1]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + 128, + read::unsigned(&mut readable).expect("Should read number") + ); + + let buf = [1u8 | CONTINUATION_BIT, 1]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + 129, + read::unsigned(&mut readable).expect("Should read number") + ); + + let buf = [2u8 | CONTINUATION_BIT, 1]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + 130, + read::unsigned(&mut readable).expect("Should read number") + ); + + let buf = [57u8 | CONTINUATION_BIT, 100]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + 12857, + read::unsigned(&mut readable).expect("Should read number") + ); + } + + // Examples from the DWARF 4 standard, section 7.6, figure 23. + #[test] + fn test_read_signed() { + let buf = [2u8]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!(2, read::signed(&mut readable).expect("Should read number")); + + let buf = [0x7eu8]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!(-2, read::signed(&mut readable).expect("Should read number")); + + let buf = [127u8 | CONTINUATION_BIT, 0]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + 127, + read::signed(&mut readable).expect("Should read number") + ); + + let buf = [1u8 | CONTINUATION_BIT, 0x7f]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + -127, + read::signed(&mut readable).expect("Should read number") + ); + + let buf = [CONTINUATION_BIT, 1]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + 128, + read::signed(&mut readable).expect("Should read number") + ); + + let buf = [CONTINUATION_BIT, 0x7f]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + -128, + read::signed(&mut readable).expect("Should read number") + ); + + let buf = [1u8 | CONTINUATION_BIT, 1]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + 129, + read::signed(&mut readable).expect("Should read number") + ); + + let buf = [0x7fu8 | CONTINUATION_BIT, 0x7e]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + -129, + read::signed(&mut readable).expect("Should read number") + ); + } + + #[test] + fn test_read_signed_63_bits() { + let buf = [ + CONTINUATION_BIT, + CONTINUATION_BIT, + CONTINUATION_BIT, + CONTINUATION_BIT, + CONTINUATION_BIT, + CONTINUATION_BIT, + CONTINUATION_BIT, + CONTINUATION_BIT, + 0x40, + ]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + -0x4000_0000_0000_0000, + read::signed(&mut readable).expect("Should read number") + ); + } + + #[test] + fn test_read_unsigned_not_enough_data() { + let buf = [CONTINUATION_BIT]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + read::unsigned(&mut readable).map_eof(&buf), + Err(Error::UnexpectedEof(ReaderOffsetId(1))) + ); + } + + #[test] + fn test_read_signed_not_enough_data() { + let buf = [CONTINUATION_BIT]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + read::signed(&mut readable).map_eof(&buf), + Err(Error::UnexpectedEof(ReaderOffsetId(1))) + ); + } + + #[test] + fn test_write_unsigned_not_enough_space() { + let mut buf = [0; 1]; + let mut writable = &mut buf[..]; + match write::unsigned(&mut writable, 128) { + Err(e) => assert_eq!(e.kind(), std::io::ErrorKind::WriteZero), + otherwise => panic!("Unexpected: {:?}", otherwise), + } + } + + #[test] + fn test_write_signed_not_enough_space() { + let mut buf = [0; 1]; + let mut writable = &mut buf[..]; + match write::signed(&mut writable, 128) { + Err(e) => assert_eq!(e.kind(), std::io::ErrorKind::WriteZero), + otherwise => panic!("Unexpected: {:?}", otherwise), + } + } + + #[test] + fn dogfood_signed() { + fn inner(i: i64) { + let mut buf = [0u8; 1024]; + + { + let mut writable = &mut buf[..]; + write::signed(&mut writable, i).expect("Should write signed number"); + } + + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + let result = read::signed(&mut readable).expect("Should be able to read it back again"); + assert_eq!(i, result); + } + for i in -513..513 { + inner(i); + } + inner(core::i64::MIN); + } + + #[test] + fn dogfood_unsigned() { + for i in 0..1025 { + let mut buf = [0u8; 1024]; + + { + let mut writable = &mut buf[..]; + write::unsigned(&mut writable, i).expect("Should write signed number"); + } + + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + let result = + read::unsigned(&mut readable).expect("Should be able to read it back again"); + assert_eq!(i, result); + } + } + + #[test] + fn test_read_unsigned_overflow() { + let buf = [ + 2u8 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 1, + ]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert!(read::unsigned(&mut readable).is_err()); + } + + #[test] + fn test_read_signed_overflow() { + let buf = [ + 2u8 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 2 | CONTINUATION_BIT, + 1, + ]; + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert!(read::signed(&mut readable).is_err()); + } + + #[test] + fn test_read_multiple() { + let buf = [2u8 | CONTINUATION_BIT, 1u8, 1u8]; + + let mut readable = EndianSlice::new(&buf[..], NativeEndian); + assert_eq!( + read::unsigned(&mut readable).expect("Should read first number"), + 130u64 + ); + assert_eq!( + read::unsigned(&mut readable).expect("Should read first number"), + 1u64 + ); + } + + #[test] + fn test_read_u16() { + for (buf, val) in [ + (&[2][..], 2), + (&[0x7f][..], 0x7f), + (&[0x80, 1][..], 0x80), + (&[0x81, 1][..], 0x81), + (&[0x82, 1][..], 0x82), + (&[0xff, 0x7f][..], 0x3fff), + (&[0x80, 0x80, 1][..], 0x4000), + (&[0xff, 0xff, 1][..], 0x7fff), + (&[0xff, 0xff, 3][..], 0xffff), + ] + .iter() + { + let mut readable = EndianSlice::new(buf, NativeEndian); + assert_eq!(*val, read::u16(&mut readable).expect("Should read number")); + } + + for buf in [ + &[0x80][..], + &[0x80, 0x80][..], + &[0x80, 0x80, 4][..], + &[0x80, 0x80, 0x80, 3][..], + ] + .iter() + { + let mut readable = EndianSlice::new(buf, NativeEndian); + assert!(read::u16(&mut readable).is_err(), "{:?}", buf); + } + } +} diff --git a/third_party/rust/gimli/src/lib.rs b/third_party/rust/gimli/src/lib.rs new file mode 100644 index 000000000000..213e2cbddc95 --- /dev/null +++ b/third_party/rust/gimli/src/lib.rs @@ -0,0 +1,79 @@ +//! `gimli` is a library for reading and writing the +//! [DWARF debugging format](https://dwarfstd.org/). +//! +//! See the [read](./read/index.html) and [write](./write/index.html) modules +//! for examples and API documentation. +//! +//! ## Cargo Features +//! +//! Cargo features that can be enabled with `gimli`: +//! +//! * `std`: Enabled by default. Use the `std` library. Disabling this feature +//! allows using `gimli` in embedded environments that do not have access to +//! `std`. Note that even when `std` is disabled, `gimli` still requires an +//! implementation of the `alloc` crate. +//! +//! * `read`: Enabled by default. Enables the `read` module. Use of `std` is +//! optional. +//! +//! * `write`: Enabled by default. Enables the `write` module. Always uses +//! the `std` library. +#![deny(missing_docs)] +#![deny(missing_debug_implementations)] +// Selectively enable rust 2018 warnings +#![warn(bare_trait_objects)] +#![warn(unused_extern_crates)] +#![warn(ellipsis_inclusive_range_patterns)] +#![warn(elided_lifetimes_in_paths)] +#![warn(explicit_outlives_requirements)] +// Style. +#![allow(clippy::bool_to_int_with_if)] +#![allow(clippy::collapsible_else_if)] +#![allow(clippy::comparison_chain)] +#![allow(clippy::manual_range_contains)] +#![allow(clippy::needless_late_init)] +#![allow(clippy::too_many_arguments)] +// False positives with `fallible_iterator`. +#![allow(clippy::should_implement_trait)] +// False positives. +#![allow(clippy::derive_partial_eq_without_eq)] +#![no_std] + +#[allow(unused_imports)] +#[cfg(any(feature = "read", feature = "write"))] +#[macro_use] +extern crate alloc; + +#[cfg(any(feature = "std", feature = "write"))] +#[macro_use] +extern crate std; + +#[cfg(feature = "endian-reader")] +pub use stable_deref_trait::{CloneStableDeref, StableDeref}; + +mod common; +pub use crate::common::*; + +mod arch; +pub use crate::arch::*; + +pub mod constants; +// For backwards compat. +pub use crate::constants::*; + +mod endianity; +pub use crate::endianity::*; + +pub mod leb128; + +#[cfg(feature = "read-core")] +pub mod read; +// For backwards compat. +#[cfg(feature = "read-core")] +pub use crate::read::*; + +#[cfg(feature = "write")] +pub mod write; + +#[cfg(test)] +mod test_util; diff --git a/third_party/rust/gimli/src/read/abbrev.rs b/third_party/rust/gimli/src/read/abbrev.rs new file mode 100644 index 000000000000..b9931b2e9bfa --- /dev/null +++ b/third_party/rust/gimli/src/read/abbrev.rs @@ -0,0 +1,1092 @@ +//! Functions for parsing DWARF debugging abbreviations. + +use alloc::collections::btree_map; +use alloc::sync::Arc; +use alloc::vec::Vec; +use core::convert::TryFrom; +use core::fmt::{self, Debug}; +use core::iter::FromIterator; +use core::ops::Deref; + +use crate::common::{DebugAbbrevOffset, Encoding, SectionId}; +use crate::constants; +use crate::endianity::Endianity; +use crate::read::{ + DebugInfoUnitHeadersIter, EndianSlice, Error, Reader, ReaderOffset, Result, Section, UnitHeader, +}; + +/// The `DebugAbbrev` struct represents the abbreviations describing +/// `DebuggingInformationEntry`s' attribute names and forms found in the +/// `.debug_abbrev` section. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugAbbrev { + debug_abbrev_section: R, +} + +impl<'input, Endian> DebugAbbrev> +where + Endian: Endianity, +{ + /// Construct a new `DebugAbbrev` instance from the data in the `.debug_abbrev` + /// section. + /// + /// It is the caller's responsibility to read the `.debug_abbrev` section and + /// present it as a `&[u8]` slice. That means using some ELF loader on + /// Linux, a Mach-O loader on macOS, etc. + /// + /// ``` + /// use gimli::{DebugAbbrev, LittleEndian}; + /// + /// # let buf = [0x00, 0x01, 0x02, 0x03]; + /// # let read_debug_abbrev_section_somehow = || &buf; + /// let debug_abbrev = DebugAbbrev::new(read_debug_abbrev_section_somehow(), LittleEndian); + /// ``` + pub fn new(debug_abbrev_section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(debug_abbrev_section, endian)) + } +} + +impl DebugAbbrev { + /// Parse the abbreviations at the given `offset` within this + /// `.debug_abbrev` section. + /// + /// The `offset` should generally be retrieved from a unit header. + pub fn abbreviations( + &self, + debug_abbrev_offset: DebugAbbrevOffset, + ) -> Result { + let input = &mut self.debug_abbrev_section.clone(); + input.skip(debug_abbrev_offset.0)?; + Abbreviations::parse(input) + } +} + +impl DebugAbbrev { + /// Create a `DebugAbbrev` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfSections::borrow`. + pub fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugAbbrev + where + F: FnMut(&'a T) -> R, + { + borrow(&self.debug_abbrev_section).into() + } +} + +impl Section for DebugAbbrev { + fn id() -> SectionId { + SectionId::DebugAbbrev + } + + fn reader(&self) -> &R { + &self.debug_abbrev_section + } +} + +impl From for DebugAbbrev { + fn from(debug_abbrev_section: R) -> Self { + DebugAbbrev { + debug_abbrev_section, + } + } +} + +/// The strategy to use for caching abbreviations. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub enum AbbreviationsCacheStrategy { + /// Cache abbreviations that are used more than once. + /// + /// This is useful if the units in the `.debug_info` section will be parsed only once. + Duplicates, + /// Cache all abbreviations. + /// + /// This is useful if the units in the `.debug_info` section will be parsed more than once. + All, +} + +/// A cache of previously parsed `Abbreviations`. +#[derive(Debug, Default)] +pub struct AbbreviationsCache { + abbreviations: btree_map::BTreeMap>>, +} + +impl AbbreviationsCache { + /// Create an empty abbreviations cache. + pub fn new() -> Self { + Self::default() + } + + /// Parse abbreviations and store them in the cache. + /// + /// This will iterate over the given units to determine the abbreviations + /// offsets. Any existing cache entries are discarded. + /// + /// Errors during parsing abbreviations are also stored in the cache. + /// Errors during iterating over the units are ignored. + pub fn populate( + &mut self, + strategy: AbbreviationsCacheStrategy, + debug_abbrev: &DebugAbbrev, + mut units: DebugInfoUnitHeadersIter, + ) { + let mut offsets = Vec::new(); + match strategy { + AbbreviationsCacheStrategy::Duplicates => { + while let Ok(Some(unit)) = units.next() { + offsets.push(unit.debug_abbrev_offset()); + } + offsets.sort_unstable_by_key(|offset| offset.0); + let mut prev_offset = R::Offset::from_u8(0); + let mut count = 0; + offsets.retain(|offset| { + if count == 0 || prev_offset != offset.0 { + prev_offset = offset.0; + count = 1; + } else { + count += 1; + } + count == 2 + }); + } + AbbreviationsCacheStrategy::All => { + while let Ok(Some(unit)) = units.next() { + offsets.push(unit.debug_abbrev_offset()); + } + offsets.sort_unstable_by_key(|offset| offset.0); + offsets.dedup(); + } + } + self.abbreviations = offsets + .into_iter() + .map(|offset| { + ( + offset.0.into_u64(), + debug_abbrev.abbreviations(offset).map(Arc::new), + ) + }) + .collect(); + } + + /// Set an entry in the abbreviations cache. + /// + /// This is only required if you want to manually populate the cache. + pub fn set( + &mut self, + offset: DebugAbbrevOffset, + abbreviations: Arc, + ) { + self.abbreviations + .insert(offset.0.into_u64(), Ok(abbreviations)); + } + + /// Parse the abbreviations at the given offset. + /// + /// This uses the cache if possible, but does not update it. + pub fn get( + &self, + debug_abbrev: &DebugAbbrev, + offset: DebugAbbrevOffset, + ) -> Result> { + match self.abbreviations.get(&offset.0.into_u64()) { + Some(entry) => entry.clone(), + None => debug_abbrev.abbreviations(offset).map(Arc::new), + } + } +} + +/// A set of type abbreviations. +/// +/// Construct an `Abbreviations` instance with the +/// [`abbreviations()`](struct.UnitHeader.html#method.abbreviations) +/// method. +#[derive(Debug, Default, Clone)] +pub struct Abbreviations { + vec: Vec, + map: btree_map::BTreeMap, +} + +impl Abbreviations { + /// Construct a new, empty set of abbreviations. + fn empty() -> Abbreviations { + Abbreviations { + vec: Vec::new(), + map: btree_map::BTreeMap::new(), + } + } + + /// Insert an abbreviation into the set. + /// + /// Returns `Ok` if it is the first abbreviation in the set with its code, + /// `Err` if the code is a duplicate and there already exists an + /// abbreviation in the set with the given abbreviation's code. + fn insert(&mut self, abbrev: Abbreviation) -> ::core::result::Result<(), ()> { + let code_usize = abbrev.code as usize; + if code_usize as u64 == abbrev.code { + // Optimize for sequential abbreviation codes by storing them + // in a Vec, as long as the map doesn't already contain them. + // A potential further optimization would be to allow some + // holes in the Vec, but there's no need for that yet. + if code_usize - 1 < self.vec.len() { + return Err(()); + } else if code_usize - 1 == self.vec.len() { + if !self.map.is_empty() && self.map.contains_key(&abbrev.code) { + return Err(()); + } else { + self.vec.push(abbrev); + return Ok(()); + } + } + } + match self.map.entry(abbrev.code) { + btree_map::Entry::Occupied(_) => Err(()), + btree_map::Entry::Vacant(entry) => { + entry.insert(abbrev); + Ok(()) + } + } + } + + /// Get the abbreviation associated with the given code. + #[inline] + pub fn get(&self, code: u64) -> Option<&Abbreviation> { + if let Ok(code) = usize::try_from(code) { + let index = code.checked_sub(1)?; + if index < self.vec.len() { + return Some(&self.vec[index]); + } + } + + self.map.get(&code) + } + + /// Parse a series of abbreviations, terminated by a null abbreviation. + fn parse(input: &mut R) -> Result { + let mut abbrevs = Abbreviations::empty(); + + while let Some(abbrev) = Abbreviation::parse(input)? { + if abbrevs.insert(abbrev).is_err() { + return Err(Error::DuplicateAbbreviationCode); + } + } + + Ok(abbrevs) + } +} + +/// An abbreviation describes the shape of a `DebuggingInformationEntry`'s type: +/// its code, tag type, whether it has children, and its set of attributes. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Abbreviation { + code: u64, + tag: constants::DwTag, + has_children: constants::DwChildren, + attributes: Attributes, +} + +impl Abbreviation { + /// Construct a new `Abbreviation`. + /// + /// ### Panics + /// + /// Panics if `code` is `0`. + pub(crate) fn new( + code: u64, + tag: constants::DwTag, + has_children: constants::DwChildren, + attributes: Attributes, + ) -> Abbreviation { + assert_ne!(code, 0); + Abbreviation { + code, + tag, + has_children, + attributes, + } + } + + /// Get this abbreviation's code. + #[inline] + pub fn code(&self) -> u64 { + self.code + } + + /// Get this abbreviation's tag. + #[inline] + pub fn tag(&self) -> constants::DwTag { + self.tag + } + + /// Return true if this abbreviation's type has children, false otherwise. + #[inline] + pub fn has_children(&self) -> bool { + self.has_children == constants::DW_CHILDREN_yes + } + + /// Get this abbreviation's attributes. + #[inline] + pub fn attributes(&self) -> &[AttributeSpecification] { + &self.attributes[..] + } + + /// Parse an abbreviation's tag. + fn parse_tag(input: &mut R) -> Result { + let val = input.read_uleb128_u16()?; + if val == 0 { + Err(Error::AbbreviationTagZero) + } else { + Ok(constants::DwTag(val)) + } + } + + /// Parse an abbreviation's "does the type have children?" byte. + fn parse_has_children(input: &mut R) -> Result { + let val = input.read_u8()?; + let val = constants::DwChildren(val); + if val == constants::DW_CHILDREN_no || val == constants::DW_CHILDREN_yes { + Ok(val) + } else { + Err(Error::BadHasChildren) + } + } + + /// Parse a series of attribute specifications, terminated by a null attribute + /// specification. + fn parse_attributes(input: &mut R) -> Result { + let mut attrs = Attributes::new(); + + while let Some(attr) = AttributeSpecification::parse(input)? { + attrs.push(attr); + } + + Ok(attrs) + } + + /// Parse an abbreviation. Return `None` for the null abbreviation, `Some` + /// for an actual abbreviation. + fn parse(input: &mut R) -> Result> { + let code = input.read_uleb128()?; + if code == 0 { + return Ok(None); + } + + let tag = Self::parse_tag(input)?; + let has_children = Self::parse_has_children(input)?; + let attributes = Self::parse_attributes(input)?; + let abbrev = Abbreviation::new(code, tag, has_children, attributes); + Ok(Some(abbrev)) + } +} + +/// A list of attributes found in an `Abbreviation` +#[derive(Clone)] +pub(crate) enum Attributes { + Inline { + buf: [AttributeSpecification; MAX_ATTRIBUTES_INLINE], + len: usize, + }, + Heap(Vec), +} + +// Length of 5 based on benchmark results for both x86-64 and i686. +const MAX_ATTRIBUTES_INLINE: usize = 5; + +impl Attributes { + /// Returns a new empty list of attributes + fn new() -> Attributes { + let default = + AttributeSpecification::new(constants::DW_AT_null, constants::DW_FORM_null, None); + Attributes::Inline { + buf: [default; 5], + len: 0, + } + } + + /// Pushes a new value onto this list of attributes. + fn push(&mut self, attr: AttributeSpecification) { + match self { + Attributes::Heap(list) => list.push(attr), + Attributes::Inline { + buf, + len: MAX_ATTRIBUTES_INLINE, + } => { + let mut list = buf.to_vec(); + list.push(attr); + *self = Attributes::Heap(list); + } + Attributes::Inline { buf, len } => { + buf[*len] = attr; + *len += 1; + } + } + } +} + +impl Debug for Attributes { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (**self).fmt(f) + } +} + +impl PartialEq for Attributes { + fn eq(&self, other: &Attributes) -> bool { + **self == **other + } +} + +impl Eq for Attributes {} + +impl Deref for Attributes { + type Target = [AttributeSpecification]; + fn deref(&self) -> &[AttributeSpecification] { + match self { + Attributes::Inline { buf, len } => &buf[..*len], + Attributes::Heap(list) => list, + } + } +} + +impl FromIterator for Attributes { + fn from_iter(iter: I) -> Attributes + where + I: IntoIterator, + { + let mut list = Attributes::new(); + for item in iter { + list.push(item); + } + list + } +} + +impl From> for Attributes { + fn from(list: Vec) -> Attributes { + Attributes::Heap(list) + } +} + +/// The description of an attribute in an abbreviated type. It is a pair of name +/// and form. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct AttributeSpecification { + name: constants::DwAt, + form: constants::DwForm, + implicit_const_value: i64, +} + +impl AttributeSpecification { + /// Construct a new `AttributeSpecification` from the given name and form + /// and implicit const value. + #[inline] + pub fn new( + name: constants::DwAt, + form: constants::DwForm, + implicit_const_value: Option, + ) -> AttributeSpecification { + debug_assert!( + (form == constants::DW_FORM_implicit_const && implicit_const_value.is_some()) + || (form != constants::DW_FORM_implicit_const && implicit_const_value.is_none()) + ); + AttributeSpecification { + name, + form, + implicit_const_value: implicit_const_value.unwrap_or(0), + } + } + + /// Get the attribute's name. + #[inline] + pub fn name(&self) -> constants::DwAt { + self.name + } + + /// Get the attribute's form. + #[inline] + pub fn form(&self) -> constants::DwForm { + self.form + } + + /// Get the attribute's implicit const value. + #[inline] + pub fn implicit_const_value(&self) -> Option { + if self.form == constants::DW_FORM_implicit_const { + Some(self.implicit_const_value) + } else { + None + } + } + + /// Return the size of the attribute, in bytes. + /// + /// Note that because some attributes are variably sized, the size cannot + /// always be known without parsing, in which case we return `None`. + pub fn size(&self, header: &UnitHeader) -> Option { + get_attribute_size(self.form, header.encoding()).map(usize::from) + } + + /// Parse an attribute's form. + fn parse_form(input: &mut R) -> Result { + let val = input.read_uleb128_u16()?; + if val == 0 { + Err(Error::AttributeFormZero) + } else { + Ok(constants::DwForm(val)) + } + } + + /// Parse an attribute specification. Returns `None` for the null attribute + /// specification, `Some` for an actual attribute specification. + fn parse(input: &mut R) -> Result> { + let name = input.read_uleb128_u16()?; + if name == 0 { + // Parse the null attribute specification. + let form = input.read_uleb128_u16()?; + return if form == 0 { + Ok(None) + } else { + Err(Error::ExpectedZero) + }; + } + + let name = constants::DwAt(name); + let form = Self::parse_form(input)?; + let implicit_const_value = if form == constants::DW_FORM_implicit_const { + Some(input.read_sleb128()?) + } else { + None + }; + let spec = AttributeSpecification::new(name, form, implicit_const_value); + Ok(Some(spec)) + } +} + +#[inline] +pub(crate) fn get_attribute_size(form: constants::DwForm, encoding: Encoding) -> Option { + match form { + constants::DW_FORM_addr => Some(encoding.address_size), + + constants::DW_FORM_implicit_const | constants::DW_FORM_flag_present => Some(0), + + constants::DW_FORM_data1 + | constants::DW_FORM_flag + | constants::DW_FORM_strx1 + | constants::DW_FORM_ref1 + | constants::DW_FORM_addrx1 => Some(1), + + constants::DW_FORM_data2 + | constants::DW_FORM_ref2 + | constants::DW_FORM_addrx2 + | constants::DW_FORM_strx2 => Some(2), + + constants::DW_FORM_addrx3 | constants::DW_FORM_strx3 => Some(3), + + constants::DW_FORM_data4 + | constants::DW_FORM_ref_sup4 + | constants::DW_FORM_ref4 + | constants::DW_FORM_strx4 + | constants::DW_FORM_addrx4 => Some(4), + + constants::DW_FORM_data8 + | constants::DW_FORM_ref8 + | constants::DW_FORM_ref_sig8 + | constants::DW_FORM_ref_sup8 => Some(8), + + constants::DW_FORM_data16 => Some(16), + + constants::DW_FORM_sec_offset + | constants::DW_FORM_GNU_ref_alt + | constants::DW_FORM_strp + | constants::DW_FORM_strp_sup + | constants::DW_FORM_GNU_strp_alt + | constants::DW_FORM_line_strp => Some(encoding.format.word_size()), + + constants::DW_FORM_ref_addr => { + // This is an offset, but DWARF version 2 specifies that DW_FORM_ref_addr + // has the same size as an address on the target system. This was changed + // in DWARF version 3. + Some(if encoding.version == 2 { + encoding.address_size + } else { + encoding.format.word_size() + }) + } + + // Variably sized forms. + constants::DW_FORM_block + | constants::DW_FORM_block1 + | constants::DW_FORM_block2 + | constants::DW_FORM_block4 + | constants::DW_FORM_exprloc + | constants::DW_FORM_ref_udata + | constants::DW_FORM_string + | constants::DW_FORM_sdata + | constants::DW_FORM_udata + | constants::DW_FORM_indirect => None, + + // We don't know the size of unknown forms. + _ => None, + } +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + use crate::constants; + use crate::endianity::LittleEndian; + use crate::read::{EndianSlice, Error}; + use crate::test_util::GimliSectionMethods; + #[cfg(target_pointer_width = "32")] + use core::u32; + use test_assembler::Section; + + pub trait AbbrevSectionMethods { + fn abbrev(self, code: u64, tag: constants::DwTag, children: constants::DwChildren) -> Self; + fn abbrev_null(self) -> Self; + fn abbrev_attr(self, name: constants::DwAt, form: constants::DwForm) -> Self; + fn abbrev_attr_implicit_const(self, name: constants::DwAt, value: i64) -> Self; + fn abbrev_attr_null(self) -> Self; + } + + impl AbbrevSectionMethods for Section { + fn abbrev(self, code: u64, tag: constants::DwTag, children: constants::DwChildren) -> Self { + self.uleb(code).uleb(tag.0.into()).D8(children.0) + } + + fn abbrev_null(self) -> Self { + self.D8(0) + } + + fn abbrev_attr(self, name: constants::DwAt, form: constants::DwForm) -> Self { + self.uleb(name.0.into()).uleb(form.0.into()) + } + + fn abbrev_attr_implicit_const(self, name: constants::DwAt, value: i64) -> Self { + self.uleb(name.0.into()) + .uleb(constants::DW_FORM_implicit_const.0.into()) + .sleb(value) + } + + fn abbrev_attr_null(self) -> Self { + self.D8(0).D8(0) + } + } + + #[test] + fn test_debug_abbrev_ok() { + let extra_start = [1, 2, 3, 4]; + let expected_rest = [5, 6, 7, 8]; + #[rustfmt::skip] + let buf = Section::new() + .append_bytes(&extra_start) + .abbrev(2, constants::DW_TAG_subprogram, constants::DW_CHILDREN_no) + .abbrev_attr(constants::DW_AT_name, constants::DW_FORM_string) + .abbrev_attr_null() + .abbrev(1, constants::DW_TAG_compile_unit, constants::DW_CHILDREN_yes) + .abbrev_attr(constants::DW_AT_producer, constants::DW_FORM_strp) + .abbrev_attr(constants::DW_AT_language, constants::DW_FORM_data2) + .abbrev_attr_null() + .abbrev_null() + .append_bytes(&expected_rest) + .get_contents() + .unwrap(); + + let abbrev1 = Abbreviation::new( + 1, + constants::DW_TAG_compile_unit, + constants::DW_CHILDREN_yes, + vec![ + AttributeSpecification::new( + constants::DW_AT_producer, + constants::DW_FORM_strp, + None, + ), + AttributeSpecification::new( + constants::DW_AT_language, + constants::DW_FORM_data2, + None, + ), + ] + .into(), + ); + + let abbrev2 = Abbreviation::new( + 2, + constants::DW_TAG_subprogram, + constants::DW_CHILDREN_no, + vec![AttributeSpecification::new( + constants::DW_AT_name, + constants::DW_FORM_string, + None, + )] + .into(), + ); + + let debug_abbrev = DebugAbbrev::new(&buf, LittleEndian); + let debug_abbrev_offset = DebugAbbrevOffset(extra_start.len()); + let abbrevs = debug_abbrev + .abbreviations(debug_abbrev_offset) + .expect("Should parse abbreviations"); + assert_eq!(abbrevs.get(1), Some(&abbrev1)); + assert_eq!(abbrevs.get(2), Some(&abbrev2)); + } + + #[test] + fn test_abbreviations_insert() { + fn abbrev(code: u16) -> Abbreviation { + Abbreviation::new( + code.into(), + constants::DwTag(code), + constants::DW_CHILDREN_no, + vec![].into(), + ) + } + + fn assert_abbrev(abbrevs: &Abbreviations, code: u16) { + let abbrev = abbrevs.get(code.into()).unwrap(); + assert_eq!(abbrev.tag(), constants::DwTag(code)); + } + + // Sequential insert. + let mut abbrevs = Abbreviations::empty(); + abbrevs.insert(abbrev(1)).unwrap(); + abbrevs.insert(abbrev(2)).unwrap(); + assert_eq!(abbrevs.vec.len(), 2); + assert!(abbrevs.map.is_empty()); + assert_abbrev(&abbrevs, 1); + assert_abbrev(&abbrevs, 2); + + // Out of order insert. + let mut abbrevs = Abbreviations::empty(); + abbrevs.insert(abbrev(2)).unwrap(); + abbrevs.insert(abbrev(3)).unwrap(); + assert!(abbrevs.vec.is_empty()); + assert_abbrev(&abbrevs, 2); + assert_abbrev(&abbrevs, 3); + + // Mixed order insert. + let mut abbrevs = Abbreviations::empty(); + abbrevs.insert(abbrev(1)).unwrap(); + abbrevs.insert(abbrev(3)).unwrap(); + abbrevs.insert(abbrev(2)).unwrap(); + assert_eq!(abbrevs.vec.len(), 2); + assert_abbrev(&abbrevs, 1); + assert_abbrev(&abbrevs, 2); + assert_abbrev(&abbrevs, 3); + + // Duplicate code in vec. + let mut abbrevs = Abbreviations::empty(); + abbrevs.insert(abbrev(1)).unwrap(); + abbrevs.insert(abbrev(2)).unwrap(); + assert_eq!(abbrevs.insert(abbrev(1)), Err(())); + assert_eq!(abbrevs.insert(abbrev(2)), Err(())); + + // Duplicate code in map when adding to map. + let mut abbrevs = Abbreviations::empty(); + abbrevs.insert(abbrev(2)).unwrap(); + assert_eq!(abbrevs.insert(abbrev(2)), Err(())); + + // Duplicate code in map when adding to vec. + let mut abbrevs = Abbreviations::empty(); + abbrevs.insert(abbrev(2)).unwrap(); + abbrevs.insert(abbrev(1)).unwrap(); + assert_eq!(abbrevs.insert(abbrev(2)), Err(())); + + // 32-bit usize conversions. + let mut abbrevs = Abbreviations::empty(); + abbrevs.insert(abbrev(2)).unwrap(); + } + + #[test] + #[cfg(target_pointer_width = "32")] + fn test_abbreviations_insert_32() { + fn abbrev(code: u64) -> Abbreviation { + Abbreviation::new( + code, + constants::DwTag(code as u16), + constants::DW_CHILDREN_no, + vec![].into(), + ) + } + + fn assert_abbrev(abbrevs: &Abbreviations, code: u64) { + let abbrev = abbrevs.get(code).unwrap(); + assert_eq!(abbrev.tag(), constants::DwTag(code as u16)); + } + + let mut abbrevs = Abbreviations::empty(); + abbrevs.insert(abbrev(1)).unwrap(); + + let wrap_code = (u32::MAX as u64 + 1) + 1; + // `get` should not treat the wrapped code as `1`. + assert_eq!(abbrevs.get(wrap_code), None); + // `insert` should not treat the wrapped code as `1`. + abbrevs.insert(abbrev(wrap_code)).unwrap(); + assert_abbrev(&abbrevs, 1); + assert_abbrev(&abbrevs, wrap_code); + } + + #[test] + fn test_parse_abbreviations_ok() { + let expected_rest = [1, 2, 3, 4]; + #[rustfmt::skip] + let buf = Section::new() + .abbrev(2, constants::DW_TAG_subprogram, constants::DW_CHILDREN_no) + .abbrev_attr(constants::DW_AT_name, constants::DW_FORM_string) + .abbrev_attr_null() + .abbrev(1, constants::DW_TAG_compile_unit, constants::DW_CHILDREN_yes) + .abbrev_attr(constants::DW_AT_producer, constants::DW_FORM_strp) + .abbrev_attr(constants::DW_AT_language, constants::DW_FORM_data2) + .abbrev_attr_null() + .abbrev_null() + .append_bytes(&expected_rest) + .get_contents() + .unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + let abbrev1 = Abbreviation::new( + 1, + constants::DW_TAG_compile_unit, + constants::DW_CHILDREN_yes, + vec![ + AttributeSpecification::new( + constants::DW_AT_producer, + constants::DW_FORM_strp, + None, + ), + AttributeSpecification::new( + constants::DW_AT_language, + constants::DW_FORM_data2, + None, + ), + ] + .into(), + ); + + let abbrev2 = Abbreviation::new( + 2, + constants::DW_TAG_subprogram, + constants::DW_CHILDREN_no, + vec![AttributeSpecification::new( + constants::DW_AT_name, + constants::DW_FORM_string, + None, + )] + .into(), + ); + + let abbrevs = Abbreviations::parse(rest).expect("Should parse abbreviations"); + assert_eq!(abbrevs.get(1), Some(&abbrev1)); + assert_eq!(abbrevs.get(2), Some(&abbrev2)); + assert_eq!(*rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_abbreviations_duplicate() { + let expected_rest = [1, 2, 3, 4]; + #[rustfmt::skip] + let buf = Section::new() + .abbrev(1, constants::DW_TAG_subprogram, constants::DW_CHILDREN_no) + .abbrev_attr(constants::DW_AT_name, constants::DW_FORM_string) + .abbrev_attr_null() + .abbrev(1, constants::DW_TAG_compile_unit, constants::DW_CHILDREN_yes) + .abbrev_attr(constants::DW_AT_producer, constants::DW_FORM_strp) + .abbrev_attr(constants::DW_AT_language, constants::DW_FORM_data2) + .abbrev_attr_null() + .abbrev_null() + .append_bytes(&expected_rest) + .get_contents() + .unwrap(); + let buf = &mut EndianSlice::new(&buf, LittleEndian); + + match Abbreviations::parse(buf) { + Err(Error::DuplicateAbbreviationCode) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_abbreviation_tag_ok() { + let buf = [0x01, 0x02]; + let rest = &mut EndianSlice::new(&buf, LittleEndian); + let tag = Abbreviation::parse_tag(rest).expect("Should parse tag"); + assert_eq!(tag, constants::DW_TAG_array_type); + assert_eq!(*rest, EndianSlice::new(&buf[1..], LittleEndian)); + } + + #[test] + fn test_parse_abbreviation_tag_zero() { + let buf = [0x00]; + let buf = &mut EndianSlice::new(&buf, LittleEndian); + match Abbreviation::parse_tag(buf) { + Err(Error::AbbreviationTagZero) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_abbreviation_has_children() { + let buf = [0x00, 0x01, 0x02]; + let rest = &mut EndianSlice::new(&buf, LittleEndian); + let val = Abbreviation::parse_has_children(rest).expect("Should parse children"); + assert_eq!(val, constants::DW_CHILDREN_no); + let val = Abbreviation::parse_has_children(rest).expect("Should parse children"); + assert_eq!(val, constants::DW_CHILDREN_yes); + match Abbreviation::parse_has_children(rest) { + Err(Error::BadHasChildren) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_abbreviation_ok() { + let expected_rest = [0x01, 0x02, 0x03, 0x04]; + let buf = Section::new() + .abbrev(1, constants::DW_TAG_subprogram, constants::DW_CHILDREN_no) + .abbrev_attr(constants::DW_AT_name, constants::DW_FORM_string) + .abbrev_attr_null() + .append_bytes(&expected_rest) + .get_contents() + .unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + let expect = Some(Abbreviation::new( + 1, + constants::DW_TAG_subprogram, + constants::DW_CHILDREN_no, + vec![AttributeSpecification::new( + constants::DW_AT_name, + constants::DW_FORM_string, + None, + )] + .into(), + )); + + let abbrev = Abbreviation::parse(rest).expect("Should parse abbreviation"); + assert_eq!(abbrev, expect); + assert_eq!(*rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_abbreviation_implicit_const_ok() { + let expected_rest = [0x01, 0x02, 0x03, 0x04]; + let buf = Section::new() + .abbrev(1, constants::DW_TAG_subprogram, constants::DW_CHILDREN_no) + .abbrev_attr_implicit_const(constants::DW_AT_name, -42) + .abbrev_attr_null() + .append_bytes(&expected_rest) + .get_contents() + .unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + let expect = Some(Abbreviation::new( + 1, + constants::DW_TAG_subprogram, + constants::DW_CHILDREN_no, + vec![AttributeSpecification::new( + constants::DW_AT_name, + constants::DW_FORM_implicit_const, + Some(-42), + )] + .into(), + )); + + let abbrev = Abbreviation::parse(rest).expect("Should parse abbreviation"); + assert_eq!(abbrev, expect); + assert_eq!(*rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_abbreviation_implicit_const_no_const() { + let buf = Section::new() + .abbrev(1, constants::DW_TAG_subprogram, constants::DW_CHILDREN_no) + .abbrev_attr(constants::DW_AT_name, constants::DW_FORM_implicit_const) + .get_contents() + .unwrap(); + let buf = &mut EndianSlice::new(&buf, LittleEndian); + + match Abbreviation::parse(buf) { + Err(Error::UnexpectedEof(_)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + } + } + + #[test] + fn test_parse_null_abbreviation_ok() { + let expected_rest = [0x01, 0x02, 0x03, 0x04]; + let buf = Section::new() + .abbrev_null() + .append_bytes(&expected_rest) + .get_contents() + .unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + let abbrev = Abbreviation::parse(rest).expect("Should parse null abbreviation"); + assert!(abbrev.is_none()); + assert_eq!(*rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_attribute_form_ok() { + let buf = [0x01, 0x02]; + let rest = &mut EndianSlice::new(&buf, LittleEndian); + let tag = AttributeSpecification::parse_form(rest).expect("Should parse form"); + assert_eq!(tag, constants::DW_FORM_addr); + assert_eq!(*rest, EndianSlice::new(&buf[1..], LittleEndian)); + } + + #[test] + fn test_parse_attribute_form_zero() { + let buf = [0x00]; + let buf = &mut EndianSlice::new(&buf, LittleEndian); + match AttributeSpecification::parse_form(buf) { + Err(Error::AttributeFormZero) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_null_attribute_specification_ok() { + let buf = [0x00, 0x00, 0x01]; + let rest = &mut EndianSlice::new(&buf, LittleEndian); + let attr = + AttributeSpecification::parse(rest).expect("Should parse null attribute specification"); + assert!(attr.is_none()); + assert_eq!(*rest, EndianSlice::new(&buf[2..], LittleEndian)); + } + + #[test] + fn test_parse_attribute_specifications_name_zero() { + let buf = [0x00, 0x01, 0x00, 0x00]; + let buf = &mut EndianSlice::new(&buf, LittleEndian); + match AttributeSpecification::parse(buf) { + Err(Error::ExpectedZero) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_attribute_specifications_form_zero() { + let buf = [0x01, 0x00, 0x00, 0x00]; + let buf = &mut EndianSlice::new(&buf, LittleEndian); + match AttributeSpecification::parse(buf) { + Err(Error::AttributeFormZero) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_get_abbrev_zero() { + let mut abbrevs = Abbreviations::empty(); + abbrevs + .insert(Abbreviation::new( + 1, + constants::DwTag(1), + constants::DW_CHILDREN_no, + vec![].into(), + )) + .unwrap(); + assert!(abbrevs.get(0).is_none()); + } +} diff --git a/third_party/rust/gimli/src/read/addr.rs b/third_party/rust/gimli/src/read/addr.rs new file mode 100644 index 000000000000..fc2fbabd126a --- /dev/null +++ b/third_party/rust/gimli/src/read/addr.rs @@ -0,0 +1,118 @@ +use crate::common::{DebugAddrBase, DebugAddrIndex, SectionId}; +use crate::read::{Reader, ReaderOffset, Result, Section}; + +/// The raw contents of the `.debug_addr` section. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugAddr { + section: R, +} + +impl DebugAddr { + // TODO: add an iterator over the sets of addresses in the section. + // This is not needed for common usage of the section though. + + /// Returns the address at the given `base` and `index`. + /// + /// A set of addresses in the `.debug_addr` section consists of a header + /// followed by a series of addresses. + /// + /// The `base` must be the `DW_AT_addr_base` value from the compilation unit DIE. + /// This is an offset that points to the first address following the header. + /// + /// The `index` is the value of a `DW_FORM_addrx` attribute. + /// + /// The `address_size` must be the size of the address for the compilation unit. + /// This value must also match the header. However, note that we do not parse the + /// header to validate this, since locating the header is unreliable, and the GNU + /// extensions do not emit it. + pub fn get_address( + &self, + address_size: u8, + base: DebugAddrBase, + index: DebugAddrIndex, + ) -> Result { + let input = &mut self.section.clone(); + input.skip(base.0)?; + input.skip(R::Offset::from_u64( + index.0.into_u64() * u64::from(address_size), + )?)?; + input.read_address(address_size) + } +} + +impl DebugAddr { + /// Create a `DebugAddr` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfSections::borrow`. + pub fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugAddr + where + F: FnMut(&'a T) -> R, + { + borrow(&self.section).into() + } +} + +impl Section for DebugAddr { + fn id() -> SectionId { + SectionId::DebugAddr + } + + fn reader(&self) -> &R { + &self.section + } +} + +impl From for DebugAddr { + fn from(section: R) -> Self { + DebugAddr { section } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::read::EndianSlice; + use crate::test_util::GimliSectionMethods; + use crate::{Format, LittleEndian}; + use test_assembler::{Endian, Label, LabelMaker, Section}; + + #[test] + fn test_get_address() { + for format in [Format::Dwarf32, Format::Dwarf64] { + for address_size in [4, 8] { + let zero = Label::new(); + let length = Label::new(); + let start = Label::new(); + let first = Label::new(); + let end = Label::new(); + let mut section = Section::with_endian(Endian::Little) + .mark(&zero) + .initial_length(format, &length, &start) + .D16(5) + .D8(address_size) + .D8(0) + .mark(&first); + for i in 0..20 { + section = section.word(address_size, 1000 + i); + } + section = section.mark(&end); + length.set_const((&end - &start) as u64); + + let section = section.get_contents().unwrap(); + let debug_addr = DebugAddr::from(EndianSlice::new(§ion, LittleEndian)); + let base = DebugAddrBase((&first - &zero) as usize); + + assert_eq!( + debug_addr.get_address(address_size, base, DebugAddrIndex(0)), + Ok(1000) + ); + assert_eq!( + debug_addr.get_address(address_size, base, DebugAddrIndex(19)), + Ok(1019) + ); + } + } + } +} diff --git a/third_party/rust/gimli/src/read/aranges.rs b/third_party/rust/gimli/src/read/aranges.rs new file mode 100644 index 000000000000..12bcec7e61e3 --- /dev/null +++ b/third_party/rust/gimli/src/read/aranges.rs @@ -0,0 +1,650 @@ +use crate::common::{DebugArangesOffset, DebugInfoOffset, Encoding, SectionId}; +use crate::endianity::Endianity; +use crate::read::{EndianSlice, Error, Range, Reader, ReaderOffset, Result, Section}; + +/// The `DebugAranges` struct represents the DWARF address range information +/// found in the `.debug_aranges` section. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugAranges { + section: R, +} + +impl<'input, Endian> DebugAranges> +where + Endian: Endianity, +{ + /// Construct a new `DebugAranges` instance from the data in the `.debug_aranges` + /// section. + /// + /// It is the caller's responsibility to read the `.debug_aranges` section and + /// present it as a `&[u8]` slice. That means using some ELF loader on + /// Linux, a Mach-O loader on macOS, etc. + /// + /// ``` + /// use gimli::{DebugAranges, LittleEndian}; + /// + /// # let buf = []; + /// # let read_debug_aranges_section = || &buf; + /// let debug_aranges = + /// DebugAranges::new(read_debug_aranges_section(), LittleEndian); + /// ``` + pub fn new(section: &'input [u8], endian: Endian) -> Self { + DebugAranges { + section: EndianSlice::new(section, endian), + } + } +} + +impl DebugAranges { + /// Iterate the sets of entries in the `.debug_aranges` section. + /// + /// Each set of entries belongs to a single unit. + pub fn headers(&self) -> ArangeHeaderIter { + ArangeHeaderIter { + input: self.section.clone(), + offset: DebugArangesOffset(R::Offset::from_u8(0)), + } + } + + /// Get the header at the given offset. + pub fn header(&self, offset: DebugArangesOffset) -> Result> { + let mut input = self.section.clone(); + input.skip(offset.0)?; + ArangeHeader::parse(&mut input, offset) + } +} + +impl DebugAranges { + /// Create a `DebugAranges` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfSections::borrow`. + pub fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugAranges + where + F: FnMut(&'a T) -> R, + { + borrow(&self.section).into() + } +} + +impl Section for DebugAranges { + fn id() -> SectionId { + SectionId::DebugAranges + } + + fn reader(&self) -> &R { + &self.section + } +} + +impl From for DebugAranges { + fn from(section: R) -> Self { + DebugAranges { section } + } +} + +/// An iterator over the headers of a `.debug_aranges` section. +#[derive(Clone, Debug)] +pub struct ArangeHeaderIter { + input: R, + offset: DebugArangesOffset, +} + +impl ArangeHeaderIter { + /// Advance the iterator to the next header. + pub fn next(&mut self) -> Result>> { + if self.input.is_empty() { + return Ok(None); + } + + let len = self.input.len(); + match ArangeHeader::parse(&mut self.input, self.offset) { + Ok(header) => { + self.offset.0 += len - self.input.len(); + Ok(Some(header)) + } + Err(e) => { + self.input.empty(); + Err(e) + } + } + } +} + +#[cfg(feature = "fallible-iterator")] +impl fallible_iterator::FallibleIterator for ArangeHeaderIter { + type Item = ArangeHeader; + type Error = Error; + + fn next(&mut self) -> ::core::result::Result, Self::Error> { + ArangeHeaderIter::next(self) + } +} + +/// A header for a set of entries in the `.debug_arange` section. +/// +/// These entries all belong to a single unit. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ArangeHeader::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + offset: DebugArangesOffset, + encoding: Encoding, + length: Offset, + debug_info_offset: DebugInfoOffset, + segment_size: u8, + entries: R, +} + +impl ArangeHeader +where + R: Reader, + Offset: ReaderOffset, +{ + fn parse(input: &mut R, offset: DebugArangesOffset) -> Result { + let (length, format) = input.read_initial_length()?; + let mut rest = input.split(length)?; + + // Check the version. The DWARF 5 spec says that this is always 2, but version 3 + // has been observed in the wild, potentially due to a bug; see + // https://github.com/gimli-rs/gimli/issues/559 for more information. + // lldb allows versions 2 through 5, possibly by mistake. + let version = rest.read_u16()?; + if version != 2 && version != 3 { + return Err(Error::UnknownVersion(u64::from(version))); + } + + let debug_info_offset = rest.read_offset(format).map(DebugInfoOffset)?; + let address_size = rest.read_u8()?; + let segment_size = rest.read_u8()?; + + // unit_length + version + offset + address_size + segment_size + let header_length = format.initial_length_size() + 2 + format.word_size() + 1 + 1; + + // The first tuple following the header in each set begins at an offset that is + // a multiple of the size of a single tuple (that is, the size of a segment selector + // plus twice the size of an address). + let tuple_length = address_size + .checked_mul(2) + .and_then(|x| x.checked_add(segment_size)) + .ok_or(Error::InvalidAddressRange)?; + if tuple_length == 0 { + return Err(Error::InvalidAddressRange); + } + let padding = if header_length % tuple_length == 0 { + 0 + } else { + tuple_length - header_length % tuple_length + }; + rest.skip(R::Offset::from_u8(padding))?; + + let encoding = Encoding { + format, + version, + address_size, + // TODO: segment_size + }; + Ok(ArangeHeader { + offset, + encoding, + length, + debug_info_offset, + segment_size, + entries: rest, + }) + } + + /// Return the offset of this header within the `.debug_aranges` section. + #[inline] + pub fn offset(&self) -> DebugArangesOffset { + self.offset + } + + /// Return the length of this set of entries, including the header. + #[inline] + pub fn length(&self) -> Offset { + self.length + } + + /// Return the encoding parameters for this set of entries. + #[inline] + pub fn encoding(&self) -> Encoding { + self.encoding + } + + /// Return the segment size for this set of entries. + #[inline] + pub fn segment_size(&self) -> u8 { + self.segment_size + } + + /// Return the offset into the .debug_info section for this set of arange entries. + #[inline] + pub fn debug_info_offset(&self) -> DebugInfoOffset { + self.debug_info_offset + } + + /// Return the arange entries in this set. + #[inline] + pub fn entries(&self) -> ArangeEntryIter { + ArangeEntryIter { + input: self.entries.clone(), + encoding: self.encoding, + segment_size: self.segment_size, + } + } +} + +/// An iterator over the aranges from a `.debug_aranges` section. +/// +/// Can be [used with +/// `FallibleIterator`](./index.html#using-with-fallibleiterator). +#[derive(Debug, Clone)] +pub struct ArangeEntryIter { + input: R, + encoding: Encoding, + segment_size: u8, +} + +impl ArangeEntryIter { + /// Advance the iterator and return the next arange. + /// + /// Returns the newly parsed arange as `Ok(Some(arange))`. Returns `Ok(None)` + /// when iteration is complete and all aranges have already been parsed and + /// yielded. If an error occurs while parsing the next arange, then this error + /// is returned as `Err(e)`, and all subsequent calls return `Ok(None)`. + pub fn next(&mut self) -> Result> { + if self.input.is_empty() { + return Ok(None); + } + + match ArangeEntry::parse(&mut self.input, self.encoding, self.segment_size) { + Ok(Some(entry)) => Ok(Some(entry)), + Ok(None) => { + self.input.empty(); + Ok(None) + } + Err(e) => { + self.input.empty(); + Err(e) + } + } + } +} + +#[cfg(feature = "fallible-iterator")] +impl fallible_iterator::FallibleIterator for ArangeEntryIter { + type Item = ArangeEntry; + type Error = Error; + + fn next(&mut self) -> ::core::result::Result, Self::Error> { + ArangeEntryIter::next(self) + } +} + +/// A single parsed arange. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct ArangeEntry { + segment: Option, + address: u64, + length: u64, +} + +impl ArangeEntry { + /// Parse a single arange. Return `None` for the null arange, `Some` for an actual arange. + fn parse( + input: &mut R, + encoding: Encoding, + segment_size: u8, + ) -> Result> { + let address_size = encoding.address_size; + + let tuple_length = R::Offset::from_u8(2 * address_size + segment_size); + if tuple_length > input.len() { + input.empty(); + return Ok(None); + } + + let segment = if segment_size != 0 { + input.read_address(segment_size)? + } else { + 0 + }; + let address = input.read_address(address_size)?; + let length = input.read_address(address_size)?; + + match (segment, address, length) { + // This is meant to be a null terminator, but in practice it can occur + // before the end, possibly due to a linker omitting a function and + // leaving an unrelocated entry. + (0, 0, 0) => Self::parse(input, encoding, segment_size), + _ => Ok(Some(ArangeEntry { + segment: if segment_size != 0 { + Some(segment) + } else { + None + }, + address, + length, + })), + } + } + + /// Return the segment selector of this arange. + #[inline] + pub fn segment(&self) -> Option { + self.segment + } + + /// Return the beginning address of this arange. + #[inline] + pub fn address(&self) -> u64 { + self.address + } + + /// Return the length of this arange. + #[inline] + pub fn length(&self) -> u64 { + self.length + } + + /// Return the range. + #[inline] + pub fn range(&self) -> Range { + Range { + begin: self.address, + end: self.address.wrapping_add(self.length), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::common::{DebugInfoOffset, Format}; + use crate::endianity::LittleEndian; + use crate::read::EndianSlice; + + #[test] + fn test_iterate_headers() { + #[rustfmt::skip] + let buf = [ + // 32-bit length = 28. + 0x1c, 0x00, 0x00, 0x00, + // Version. + 0x02, 0x00, + // Offset. + 0x01, 0x02, 0x03, 0x04, + // Address size. + 0x04, + // Segment size. + 0x00, + // Dummy padding and arange tuples. + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // 32-bit length = 36. + 0x24, 0x00, 0x00, 0x00, + // Version. + 0x02, 0x00, + // Offset. + 0x11, 0x12, 0x13, 0x14, + // Address size. + 0x04, + // Segment size. + 0x00, + // Dummy padding and arange tuples. + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; + + let debug_aranges = DebugAranges::new(&buf, LittleEndian); + let mut headers = debug_aranges.headers(); + + let header = headers + .next() + .expect("should parse header ok") + .expect("should have a header"); + assert_eq!(header.offset(), DebugArangesOffset(0)); + assert_eq!(header.debug_info_offset(), DebugInfoOffset(0x0403_0201)); + + let header = headers + .next() + .expect("should parse header ok") + .expect("should have a header"); + assert_eq!(header.offset(), DebugArangesOffset(0x20)); + assert_eq!(header.debug_info_offset(), DebugInfoOffset(0x1413_1211)); + } + + #[test] + fn test_parse_header_ok() { + #[rustfmt::skip] + let buf = [ + // 32-bit length = 32. + 0x20, 0x00, 0x00, 0x00, + // Version. + 0x02, 0x00, + // Offset. + 0x01, 0x02, 0x03, 0x04, + // Address size. + 0x08, + // Segment size. + 0x04, + // Length to here = 12, tuple length = 20. + // Padding to tuple length multiple = 4. + 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // Dummy arange tuple data. + 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // Dummy next arange. + 0x30, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + ]; + + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + let header = + ArangeHeader::parse(rest, DebugArangesOffset(0x10)).expect("should parse header ok"); + + assert_eq!( + *rest, + EndianSlice::new(&buf[buf.len() - 16..], LittleEndian) + ); + assert_eq!( + header, + ArangeHeader { + offset: DebugArangesOffset(0x10), + encoding: Encoding { + format: Format::Dwarf32, + version: 2, + address_size: 8, + }, + length: 0x20, + debug_info_offset: DebugInfoOffset(0x0403_0201), + segment_size: 4, + entries: EndianSlice::new(&buf[buf.len() - 32..buf.len() - 16], LittleEndian), + } + ); + } + + #[test] + fn test_parse_header_overflow_error() { + #[rustfmt::skip] + let buf = [ + // 32-bit length = 32. + 0x20, 0x00, 0x00, 0x00, + // Version. + 0x02, 0x00, + // Offset. + 0x01, 0x02, 0x03, 0x04, + // Address size. + 0xff, + // Segment size. + 0xff, + // Length to here = 12, tuple length = 20. + // Padding to tuple length multiple = 4. + 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // Dummy arange tuple data. + 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // Dummy next arange. + 0x30, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + ]; + + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + let error = ArangeHeader::parse(rest, DebugArangesOffset(0x10)) + .expect_err("should fail to parse header"); + assert_eq!(error, Error::InvalidAddressRange); + } + + #[test] + fn test_parse_header_div_by_zero_error() { + #[rustfmt::skip] + let buf = [ + // 32-bit length = 32. + 0x20, 0x00, 0x00, 0x00, + // Version. + 0x02, 0x00, + // Offset. + 0x01, 0x02, 0x03, 0x04, + // Address size = 0. Could cause a division by zero if we aren't + // careful. + 0x00, + // Segment size. + 0x00, + // Length to here = 12, tuple length = 20. + // Padding to tuple length multiple = 4. + 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // Dummy arange tuple data. + 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // Dummy next arange. + 0x30, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + ]; + + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + let error = ArangeHeader::parse(rest, DebugArangesOffset(0x10)) + .expect_err("should fail to parse header"); + assert_eq!(error, Error::InvalidAddressRange); + } + + #[test] + fn test_parse_entry_ok() { + let encoding = Encoding { + format: Format::Dwarf32, + version: 2, + address_size: 4, + }; + let segment_size = 0; + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09]; + let rest = &mut EndianSlice::new(&buf, LittleEndian); + let entry = + ArangeEntry::parse(rest, encoding, segment_size).expect("should parse entry ok"); + assert_eq!(*rest, EndianSlice::new(&buf[buf.len() - 1..], LittleEndian)); + assert_eq!( + entry, + Some(ArangeEntry { + segment: None, + address: 0x0403_0201, + length: 0x0807_0605, + }) + ); + } + + #[test] + fn test_parse_entry_segment() { + let encoding = Encoding { + format: Format::Dwarf32, + version: 2, + address_size: 4, + }; + let segment_size = 8; + #[rustfmt::skip] + let buf = [ + // Segment. + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + // Address. + 0x01, 0x02, 0x03, 0x04, + // Length. + 0x05, 0x06, 0x07, 0x08, + // Next tuple. + 0x09 + ]; + let rest = &mut EndianSlice::new(&buf, LittleEndian); + let entry = + ArangeEntry::parse(rest, encoding, segment_size).expect("should parse entry ok"); + assert_eq!(*rest, EndianSlice::new(&buf[buf.len() - 1..], LittleEndian)); + assert_eq!( + entry, + Some(ArangeEntry { + segment: Some(0x1817_1615_1413_1211), + address: 0x0403_0201, + length: 0x0807_0605, + }) + ); + } + + #[test] + fn test_parse_entry_zero() { + let encoding = Encoding { + format: Format::Dwarf32, + version: 2, + address_size: 4, + }; + let segment_size = 0; + #[rustfmt::skip] + let buf = [ + // Zero tuple. + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // Address. + 0x01, 0x02, 0x03, 0x04, + // Length. + 0x05, 0x06, 0x07, 0x08, + // Next tuple. + 0x09 + ]; + let rest = &mut EndianSlice::new(&buf, LittleEndian); + let entry = + ArangeEntry::parse(rest, encoding, segment_size).expect("should parse entry ok"); + assert_eq!(*rest, EndianSlice::new(&buf[buf.len() - 1..], LittleEndian)); + assert_eq!( + entry, + Some(ArangeEntry { + segment: None, + address: 0x0403_0201, + length: 0x0807_0605, + }) + ); + } +} diff --git a/third_party/rust/gimli/src/read/cfi.rs b/third_party/rust/gimli/src/read/cfi.rs new file mode 100644 index 000000000000..5aa88468cf05 --- /dev/null +++ b/third_party/rust/gimli/src/read/cfi.rs @@ -0,0 +1,7921 @@ +#[cfg(feature = "read")] +use alloc::boxed::Box; + +use core::cmp::Ordering; +use core::fmt::{self, Debug}; +use core::iter::FromIterator; +use core::mem; +use core::num::Wrapping; + +use super::util::{ArrayLike, ArrayVec}; +use crate::common::{ + DebugFrameOffset, EhFrameOffset, Encoding, Format, Register, SectionId, Vendor, +}; +use crate::constants::{self, DwEhPe}; +use crate::endianity::Endianity; +use crate::read::{ + EndianSlice, Error, Expression, Reader, ReaderOffset, Result, Section, StoreOnHeap, +}; + +/// `DebugFrame` contains the `.debug_frame` section's frame unwinding +/// information required to unwind to and recover registers from older frames on +/// the stack. For example, this is useful for a debugger that wants to print +/// locals in a backtrace. +/// +/// Most interesting methods are defined in the +/// [`UnwindSection`](trait.UnwindSection.html) trait. +/// +/// ### Differences between `.debug_frame` and `.eh_frame` +/// +/// While the `.debug_frame` section's information has a lot of overlap with the +/// `.eh_frame` section's information, the `.eh_frame` information tends to only +/// encode the subset of information needed for exception handling. Often, only +/// one of `.eh_frame` or `.debug_frame` will be present in an object file. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct DebugFrame { + section: R, + address_size: u8, + segment_size: u8, + vendor: Vendor, +} + +impl DebugFrame { + /// Set the size of a target address in bytes. + /// + /// This defaults to the native word size. + /// This is only used if the CIE version is less than 4. + pub fn set_address_size(&mut self, address_size: u8) { + self.address_size = address_size + } + + /// Set the size of a segment selector in bytes. + /// + /// This defaults to 0. + /// This is only used if the CIE version is less than 4. + pub fn set_segment_size(&mut self, segment_size: u8) { + self.segment_size = segment_size + } + + /// Set the vendor extensions to use. + /// + /// This defaults to `Vendor::Default`. + pub fn set_vendor(&mut self, vendor: Vendor) { + self.vendor = vendor; + } +} + +impl<'input, Endian> DebugFrame> +where + Endian: Endianity, +{ + /// Construct a new `DebugFrame` instance from the data in the + /// `.debug_frame` section. + /// + /// It is the caller's responsibility to read the section and present it as + /// a `&[u8]` slice. That means using some ELF loader on Linux, a Mach-O + /// loader on macOS, etc. + /// + /// ``` + /// use gimli::{DebugFrame, NativeEndian}; + /// + /// // Use with `.debug_frame` + /// # let buf = [0x00, 0x01, 0x02, 0x03]; + /// # let read_debug_frame_section_somehow = || &buf; + /// let debug_frame = DebugFrame::new(read_debug_frame_section_somehow(), NativeEndian); + /// ``` + pub fn new(section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(section, endian)) + } +} + +impl Section for DebugFrame { + fn id() -> SectionId { + SectionId::DebugFrame + } + + fn reader(&self) -> &R { + &self.section + } +} + +impl From for DebugFrame { + fn from(section: R) -> Self { + // Default to no segments and native word size. + DebugFrame { + section, + address_size: mem::size_of::() as u8, + segment_size: 0, + vendor: Vendor::Default, + } + } +} + +/// `EhFrameHdr` contains the information about the `.eh_frame_hdr` section. +/// +/// A pointer to the start of the `.eh_frame` data, and optionally, a binary +/// search table of pointers to the `.eh_frame` records that are found in this section. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct EhFrameHdr(R); + +/// `ParsedEhFrameHdr` contains the parsed information from the `.eh_frame_hdr` section. +#[derive(Clone, Debug)] +pub struct ParsedEhFrameHdr { + address_size: u8, + section: R, + + eh_frame_ptr: Pointer, + fde_count: u64, + table_enc: DwEhPe, + table: R, +} + +impl<'input, Endian> EhFrameHdr> +where + Endian: Endianity, +{ + /// Constructs a new `EhFrameHdr` instance from the data in the `.eh_frame_hdr` section. + pub fn new(section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(section, endian)) + } +} + +impl EhFrameHdr { + /// Parses this `EhFrameHdr` to a `ParsedEhFrameHdr`. + pub fn parse(&self, bases: &BaseAddresses, address_size: u8) -> Result> { + let mut reader = self.0.clone(); + let version = reader.read_u8()?; + if version != 1 { + return Err(Error::UnknownVersion(u64::from(version))); + } + + let eh_frame_ptr_enc = parse_pointer_encoding(&mut reader)?; + let fde_count_enc = parse_pointer_encoding(&mut reader)?; + let table_enc = parse_pointer_encoding(&mut reader)?; + + let parameters = PointerEncodingParameters { + bases: &bases.eh_frame_hdr, + func_base: None, + address_size, + section: &self.0, + }; + + // Omitting this pointer is not valid (defeats the purpose of .eh_frame_hdr entirely) + if eh_frame_ptr_enc == constants::DW_EH_PE_omit { + return Err(Error::CannotParseOmitPointerEncoding); + } + let eh_frame_ptr = parse_encoded_pointer(eh_frame_ptr_enc, ¶meters, &mut reader)?; + + let fde_count; + if fde_count_enc == constants::DW_EH_PE_omit || table_enc == constants::DW_EH_PE_omit { + fde_count = 0 + } else { + fde_count = parse_encoded_pointer(fde_count_enc, ¶meters, &mut reader)?.direct()?; + } + + Ok(ParsedEhFrameHdr { + address_size, + section: self.0.clone(), + + eh_frame_ptr, + fde_count, + table_enc, + table: reader, + }) + } +} + +impl Section for EhFrameHdr { + fn id() -> SectionId { + SectionId::EhFrameHdr + } + + fn reader(&self) -> &R { + &self.0 + } +} + +impl From for EhFrameHdr { + fn from(section: R) -> Self { + EhFrameHdr(section) + } +} + +impl ParsedEhFrameHdr { + /// Returns the address of the binary's `.eh_frame` section. + pub fn eh_frame_ptr(&self) -> Pointer { + self.eh_frame_ptr + } + + /// Retrieves the CFI binary search table, if there is one. + pub fn table(&self) -> Option> { + // There are two big edge cases here: + // * You search the table for an invalid address. As this is just a binary + // search table, we always have to return a valid result for that (unless + // you specify an address that is lower than the first address in the + // table). Since this means that you have to recheck that the FDE contains + // your address anyways, we just return the first FDE even when the address + // is too low. After all, we're just doing a normal binary search. + // * This falls apart when the table is empty - there is no entry we could + // return. We conclude that an empty table is not really a table at all. + if self.fde_count == 0 { + None + } else { + Some(EhHdrTable { hdr: self }) + } + } +} + +/// An iterator for `.eh_frame_hdr` section's binary search table. +/// +/// Each table entry consists of a tuple containing an `initial_location` and `address`. +/// The `initial location` represents the first address that the targeted FDE +/// is able to decode. The `address` is the address of the FDE in the `.eh_frame` section. +/// The `address` can be converted with `EhHdrTable::pointer_to_offset` and `EhFrame::fde_from_offset` to an FDE. +#[derive(Debug)] +pub struct EhHdrTableIter<'a, 'bases, R: Reader> { + hdr: &'a ParsedEhFrameHdr, + table: R, + bases: &'bases BaseAddresses, + remain: u64, +} + +impl<'a, 'bases, R: Reader> EhHdrTableIter<'a, 'bases, R> { + /// Yield the next entry in the `EhHdrTableIter`. + pub fn next(&mut self) -> Result> { + if self.remain == 0 { + return Ok(None); + } + + let parameters = PointerEncodingParameters { + bases: &self.bases.eh_frame_hdr, + func_base: None, + address_size: self.hdr.address_size, + section: &self.hdr.section, + }; + + self.remain -= 1; + let from = parse_encoded_pointer(self.hdr.table_enc, ¶meters, &mut self.table)?; + let to = parse_encoded_pointer(self.hdr.table_enc, ¶meters, &mut self.table)?; + Ok(Some((from, to))) + } + /// Yield the nth entry in the `EhHdrTableIter` + pub fn nth(&mut self, n: usize) -> Result> { + use core::convert::TryFrom; + let size = match self.hdr.table_enc.format() { + constants::DW_EH_PE_uleb128 | constants::DW_EH_PE_sleb128 => { + return Err(Error::VariableLengthSearchTable); + } + constants::DW_EH_PE_sdata2 | constants::DW_EH_PE_udata2 => 2, + constants::DW_EH_PE_sdata4 | constants::DW_EH_PE_udata4 => 4, + constants::DW_EH_PE_sdata8 | constants::DW_EH_PE_udata8 => 8, + _ => return Err(Error::UnknownPointerEncoding(self.hdr.table_enc)), + }; + + let row_size = size * 2; + let n = u64::try_from(n).map_err(|_| Error::UnsupportedOffset)?; + self.remain = self.remain.saturating_sub(n); + self.table.skip(R::Offset::from_u64(n * row_size)?)?; + self.next() + } +} + +#[cfg(feature = "fallible-iterator")] +impl<'a, 'bases, R: Reader> fallible_iterator::FallibleIterator for EhHdrTableIter<'a, 'bases, R> { + type Item = (Pointer, Pointer); + type Error = Error; + fn next(&mut self) -> Result> { + EhHdrTableIter::next(self) + } + + fn size_hint(&self) -> (usize, Option) { + use core::convert::TryInto; + ( + self.remain.try_into().unwrap_or(0), + self.remain.try_into().ok(), + ) + } + + fn nth(&mut self, n: usize) -> Result> { + EhHdrTableIter::nth(self, n) + } +} + +/// The CFI binary search table that is an optional part of the `.eh_frame_hdr` section. +#[derive(Debug, Clone)] +pub struct EhHdrTable<'a, R: Reader> { + hdr: &'a ParsedEhFrameHdr, +} + +impl<'a, R: Reader + 'a> EhHdrTable<'a, R> { + /// Return an iterator that can walk the `.eh_frame_hdr` table. + /// + /// Each table entry consists of a tuple containing an `initial_location` and `address`. + /// The `initial location` represents the first address that the targeted FDE + /// is able to decode. The `address` is the address of the FDE in the `.eh_frame` section. + /// The `address` can be converted with `EhHdrTable::pointer_to_offset` and `EhFrame::fde_from_offset` to an FDE. + pub fn iter<'bases>(&self, bases: &'bases BaseAddresses) -> EhHdrTableIter<'_, 'bases, R> { + EhHdrTableIter { + hdr: self.hdr, + bases, + remain: self.hdr.fde_count, + table: self.hdr.table.clone(), + } + } + /// *Probably* returns a pointer to the FDE for the given address. + /// + /// This performs a binary search, so if there is no FDE for the given address, + /// this function **will** return a pointer to any other FDE that's close by. + /// + /// To be sure, you **must** call `contains` on the FDE. + pub fn lookup(&self, address: u64, bases: &BaseAddresses) -> Result { + let size = match self.hdr.table_enc.format() { + constants::DW_EH_PE_uleb128 | constants::DW_EH_PE_sleb128 => { + return Err(Error::VariableLengthSearchTable); + } + constants::DW_EH_PE_sdata2 | constants::DW_EH_PE_udata2 => 2, + constants::DW_EH_PE_sdata4 | constants::DW_EH_PE_udata4 => 4, + constants::DW_EH_PE_sdata8 | constants::DW_EH_PE_udata8 => 8, + _ => return Err(Error::UnknownPointerEncoding(self.hdr.table_enc)), + }; + + let row_size = size * 2; + + let mut len = self.hdr.fde_count; + + let mut reader = self.hdr.table.clone(); + + let parameters = PointerEncodingParameters { + bases: &bases.eh_frame_hdr, + func_base: None, + address_size: self.hdr.address_size, + section: &self.hdr.section, + }; + + while len > 1 { + let head = reader.split(R::Offset::from_u64((len / 2) * row_size)?)?; + let tail = reader.clone(); + + let pivot = + parse_encoded_pointer(self.hdr.table_enc, ¶meters, &mut reader)?.direct()?; + + match pivot.cmp(&address) { + Ordering::Equal => { + reader = tail; + break; + } + Ordering::Less => { + reader = tail; + len = len - (len / 2); + } + Ordering::Greater => { + reader = head; + len /= 2; + } + } + } + + reader.skip(R::Offset::from_u64(size)?)?; + + parse_encoded_pointer(self.hdr.table_enc, ¶meters, &mut reader) + } + + /// Convert a `Pointer` to a section offset. + /// + /// This does not support indirect pointers. + pub fn pointer_to_offset(&self, ptr: Pointer) -> Result> { + let ptr = ptr.direct()?; + let eh_frame_ptr = self.hdr.eh_frame_ptr().direct()?; + + // Calculate the offset in the EhFrame section + R::Offset::from_u64(ptr - eh_frame_ptr).map(EhFrameOffset) + } + + /// Returns a parsed FDE for the given address, or `NoUnwindInfoForAddress` + /// if there are none. + /// + /// You must provide a function to get its associated CIE. See + /// `PartialFrameDescriptionEntry::parse` for more information. + /// + /// # Example + /// + /// ``` + /// # use gimli::{BaseAddresses, EhFrame, ParsedEhFrameHdr, EndianSlice, NativeEndian, Error, UnwindSection}; + /// # fn foo() -> Result<(), Error> { + /// # let eh_frame: EhFrame> = unreachable!(); + /// # let eh_frame_hdr: ParsedEhFrameHdr> = unimplemented!(); + /// # let addr = 0; + /// # let bases = unimplemented!(); + /// let table = eh_frame_hdr.table().unwrap(); + /// let fde = table.fde_for_address(&eh_frame, &bases, addr, EhFrame::cie_from_offset)?; + /// # Ok(()) + /// # } + /// ``` + pub fn fde_for_address( + &self, + frame: &EhFrame, + bases: &BaseAddresses, + address: u64, + get_cie: F, + ) -> Result> + where + F: FnMut( + &EhFrame, + &BaseAddresses, + EhFrameOffset, + ) -> Result>, + { + let fdeptr = self.lookup(address, bases)?; + let offset = self.pointer_to_offset(fdeptr)?; + let entry = frame.fde_from_offset(bases, offset, get_cie)?; + if entry.contains(address) { + Ok(entry) + } else { + Err(Error::NoUnwindInfoForAddress) + } + } + + #[inline] + #[doc(hidden)] + #[deprecated(note = "Method renamed to fde_for_address; use that instead.")] + pub fn lookup_and_parse( + &self, + address: u64, + bases: &BaseAddresses, + frame: EhFrame, + get_cie: F, + ) -> Result> + where + F: FnMut( + &EhFrame, + &BaseAddresses, + EhFrameOffset, + ) -> Result>, + { + self.fde_for_address(&frame, bases, address, get_cie) + } + + /// Returns the frame unwind information for the given address, + /// or `NoUnwindInfoForAddress` if there are none. + /// + /// You must provide a function to get the associated CIE. See + /// `PartialFrameDescriptionEntry::parse` for more information. + pub fn unwind_info_for_address<'ctx, F, A: UnwindContextStorage>( + &self, + frame: &EhFrame, + bases: &BaseAddresses, + ctx: &'ctx mut UnwindContext, + address: u64, + get_cie: F, + ) -> Result<&'ctx UnwindTableRow> + where + F: FnMut( + &EhFrame, + &BaseAddresses, + EhFrameOffset, + ) -> Result>, + { + let fde = self.fde_for_address(frame, bases, address, get_cie)?; + fde.unwind_info_for_address(frame, bases, ctx, address) + } +} + +/// `EhFrame` contains the frame unwinding information needed during exception +/// handling found in the `.eh_frame` section. +/// +/// Most interesting methods are defined in the +/// [`UnwindSection`](trait.UnwindSection.html) trait. +/// +/// See +/// [`DebugFrame`](./struct.DebugFrame.html#differences-between-debug_frame-and-eh_frame) +/// for some discussion on the differences between `.debug_frame` and +/// `.eh_frame`. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct EhFrame { + section: R, + address_size: u8, + vendor: Vendor, +} + +impl EhFrame { + /// Set the size of a target address in bytes. + /// + /// This defaults to the native word size. + pub fn set_address_size(&mut self, address_size: u8) { + self.address_size = address_size + } + + /// Set the vendor extensions to use. + /// + /// This defaults to `Vendor::Default`. + pub fn set_vendor(&mut self, vendor: Vendor) { + self.vendor = vendor; + } +} + +impl<'input, Endian> EhFrame> +where + Endian: Endianity, +{ + /// Construct a new `EhFrame` instance from the data in the + /// `.eh_frame` section. + /// + /// It is the caller's responsibility to read the section and present it as + /// a `&[u8]` slice. That means using some ELF loader on Linux, a Mach-O + /// loader on macOS, etc. + /// + /// ``` + /// use gimli::{EhFrame, EndianSlice, NativeEndian}; + /// + /// // Use with `.eh_frame` + /// # let buf = [0x00, 0x01, 0x02, 0x03]; + /// # let read_eh_frame_section_somehow = || &buf; + /// let eh_frame = EhFrame::new(read_eh_frame_section_somehow(), NativeEndian); + /// ``` + pub fn new(section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(section, endian)) + } +} + +impl Section for EhFrame { + fn id() -> SectionId { + SectionId::EhFrame + } + + fn reader(&self) -> &R { + &self.section + } +} + +impl From for EhFrame { + fn from(section: R) -> Self { + // Default to native word size. + EhFrame { + section, + address_size: mem::size_of::() as u8, + vendor: Vendor::Default, + } + } +} + +// This has to be `pub` to silence a warning (that is deny(..)'d by default) in +// rustc. Eventually, not having this `pub` will become a hard error. +#[doc(hidden)] +#[allow(missing_docs)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum CieOffsetEncoding { + U32, + U64, +} + +/// An offset into an `UnwindSection`. +// +// Needed to avoid conflicting implementations of `Into`. +pub trait UnwindOffset: Copy + Debug + Eq + From +where + T: ReaderOffset, +{ + /// Convert an `UnwindOffset` into a `T`. + fn into(self) -> T; +} + +impl UnwindOffset for DebugFrameOffset +where + T: ReaderOffset, +{ + #[inline] + fn into(self) -> T { + self.0 + } +} + +impl UnwindOffset for EhFrameOffset +where + T: ReaderOffset, +{ + #[inline] + fn into(self) -> T { + self.0 + } +} + +/// This trait completely encapsulates everything that is different between +/// `.eh_frame` and `.debug_frame`, as well as all the bits that can change +/// between DWARF versions. +#[doc(hidden)] +pub trait _UnwindSectionPrivate { + /// Get the underlying section data. + fn section(&self) -> &R; + + /// Returns true if the given length value should be considered an + /// end-of-entries sentinel. + fn length_value_is_end_of_entries(length: R::Offset) -> bool; + + /// Return true if the given offset if the CIE sentinel, false otherwise. + fn is_cie(format: Format, id: u64) -> bool; + + /// Return the CIE offset/ID encoding used by this unwind section with the + /// given DWARF format. + fn cie_offset_encoding(format: Format) -> CieOffsetEncoding; + + /// For `.eh_frame`, CIE offsets are relative to the current position. For + /// `.debug_frame`, they are relative to the start of the section. We always + /// internally store them relative to the section, so we handle translating + /// `.eh_frame`'s relative offsets in this method. If the offset calculation + /// underflows, return `None`. + fn resolve_cie_offset(&self, base: R::Offset, offset: R::Offset) -> Option; + + /// Does this version of this unwind section encode address and segment + /// sizes in its CIEs? + fn has_address_and_segment_sizes(version: u8) -> bool; + + /// The address size to use if `has_address_and_segment_sizes` returns false. + fn address_size(&self) -> u8; + + /// The segment size to use if `has_address_and_segment_sizes` returns false. + fn segment_size(&self) -> u8; + + /// The vendor extensions to use. + fn vendor(&self) -> Vendor; +} + +/// A section holding unwind information: either `.debug_frame` or +/// `.eh_frame`. See [`DebugFrame`](./struct.DebugFrame.html) and +/// [`EhFrame`](./struct.EhFrame.html) respectively. +pub trait UnwindSection: Clone + Debug + _UnwindSectionPrivate { + /// The offset type associated with this CFI section. Either + /// `DebugFrameOffset` or `EhFrameOffset`. + type Offset: UnwindOffset; + + /// Iterate over the `CommonInformationEntry`s and `FrameDescriptionEntry`s + /// in this `.debug_frame` section. + /// + /// Can be [used with + /// `FallibleIterator`](./index.html#using-with-fallibleiterator). + fn entries<'bases>(&self, bases: &'bases BaseAddresses) -> CfiEntriesIter<'bases, Self, R> { + CfiEntriesIter { + section: self.clone(), + bases, + input: self.section().clone(), + } + } + + /// Parse the `CommonInformationEntry` at the given offset. + fn cie_from_offset( + &self, + bases: &BaseAddresses, + offset: Self::Offset, + ) -> Result> { + let offset = UnwindOffset::into(offset); + let input = &mut self.section().clone(); + input.skip(offset)?; + CommonInformationEntry::parse(bases, self, input) + } + + /// Parse the `PartialFrameDescriptionEntry` at the given offset. + fn partial_fde_from_offset<'bases>( + &self, + bases: &'bases BaseAddresses, + offset: Self::Offset, + ) -> Result> { + let offset = UnwindOffset::into(offset); + let input = &mut self.section().clone(); + input.skip(offset)?; + PartialFrameDescriptionEntry::parse_partial(self, bases, input) + } + + /// Parse the `FrameDescriptionEntry` at the given offset. + fn fde_from_offset( + &self, + bases: &BaseAddresses, + offset: Self::Offset, + get_cie: F, + ) -> Result> + where + F: FnMut(&Self, &BaseAddresses, Self::Offset) -> Result>, + { + let partial = self.partial_fde_from_offset(bases, offset)?; + partial.parse(get_cie) + } + + /// Find the `FrameDescriptionEntry` for the given address. + /// + /// If found, the FDE is returned. If not found, + /// `Err(gimli::Error::NoUnwindInfoForAddress)` is returned. + /// If parsing fails, the error is returned. + /// + /// You must provide a function to get its associated CIE. See + /// `PartialFrameDescriptionEntry::parse` for more information. + /// + /// Note: this iterates over all FDEs. If available, it is possible + /// to do a binary search with `EhFrameHdr::fde_for_address` instead. + fn fde_for_address( + &self, + bases: &BaseAddresses, + address: u64, + mut get_cie: F, + ) -> Result> + where + F: FnMut(&Self, &BaseAddresses, Self::Offset) -> Result>, + { + let mut entries = self.entries(bases); + while let Some(entry) = entries.next()? { + match entry { + CieOrFde::Cie(_) => {} + CieOrFde::Fde(partial) => { + let fde = partial.parse(&mut get_cie)?; + if fde.contains(address) { + return Ok(fde); + } + } + } + } + Err(Error::NoUnwindInfoForAddress) + } + + /// Find the frame unwind information for the given address. + /// + /// If found, the unwind information is returned. If not found, + /// `Err(gimli::Error::NoUnwindInfoForAddress)` is returned. If parsing or + /// CFI evaluation fails, the error is returned. + /// + /// ``` + /// use gimli::{BaseAddresses, EhFrame, EndianSlice, NativeEndian, UnwindContext, + /// UnwindSection}; + /// + /// # fn foo() -> gimli::Result<()> { + /// # let read_eh_frame_section = || unimplemented!(); + /// // Get the `.eh_frame` section from the object file. Alternatively, + /// // use `EhFrame` with the `.eh_frame` section of the object file. + /// let eh_frame = EhFrame::new(read_eh_frame_section(), NativeEndian); + /// + /// # let get_frame_pc = || unimplemented!(); + /// // Get the address of the PC for a frame you'd like to unwind. + /// let address = get_frame_pc(); + /// + /// // This context is reusable, which cuts down on heap allocations. + /// let ctx = UnwindContext::new(); + /// + /// // Optionally provide base addresses for any relative pointers. If a + /// // base address isn't provided and a pointer is found that is relative to + /// // it, we will return an `Err`. + /// # let address_of_text_section_in_memory = unimplemented!(); + /// # let address_of_got_section_in_memory = unimplemented!(); + /// let bases = BaseAddresses::default() + /// .set_text(address_of_text_section_in_memory) + /// .set_got(address_of_got_section_in_memory); + /// + /// let unwind_info = eh_frame.unwind_info_for_address( + /// &bases, + /// &mut ctx, + /// address, + /// EhFrame::cie_from_offset, + /// )?; + /// + /// # let do_stuff_with = |_| unimplemented!(); + /// do_stuff_with(unwind_info); + /// # let _ = ctx; + /// # unreachable!() + /// # } + /// ``` + #[inline] + fn unwind_info_for_address<'ctx, F, A: UnwindContextStorage>( + &self, + bases: &BaseAddresses, + ctx: &'ctx mut UnwindContext, + address: u64, + get_cie: F, + ) -> Result<&'ctx UnwindTableRow> + where + F: FnMut(&Self, &BaseAddresses, Self::Offset) -> Result>, + { + let fde = self.fde_for_address(bases, address, get_cie)?; + fde.unwind_info_for_address(self, bases, ctx, address) + } +} + +impl _UnwindSectionPrivate for DebugFrame { + fn section(&self) -> &R { + &self.section + } + + fn length_value_is_end_of_entries(_: R::Offset) -> bool { + false + } + + fn is_cie(format: Format, id: u64) -> bool { + match format { + Format::Dwarf32 => id == 0xffff_ffff, + Format::Dwarf64 => id == 0xffff_ffff_ffff_ffff, + } + } + + fn cie_offset_encoding(format: Format) -> CieOffsetEncoding { + match format { + Format::Dwarf32 => CieOffsetEncoding::U32, + Format::Dwarf64 => CieOffsetEncoding::U64, + } + } + + fn resolve_cie_offset(&self, _: R::Offset, offset: R::Offset) -> Option { + Some(offset) + } + + fn has_address_and_segment_sizes(version: u8) -> bool { + version == 4 + } + + fn address_size(&self) -> u8 { + self.address_size + } + + fn segment_size(&self) -> u8 { + self.segment_size + } + + fn vendor(&self) -> Vendor { + self.vendor + } +} + +impl UnwindSection for DebugFrame { + type Offset = DebugFrameOffset; +} + +impl _UnwindSectionPrivate for EhFrame { + fn section(&self) -> &R { + &self.section + } + + fn length_value_is_end_of_entries(length: R::Offset) -> bool { + length.into_u64() == 0 + } + + fn is_cie(_: Format, id: u64) -> bool { + id == 0 + } + + fn cie_offset_encoding(_format: Format) -> CieOffsetEncoding { + // `.eh_frame` offsets are always 4 bytes, regardless of the DWARF + // format. + CieOffsetEncoding::U32 + } + + fn resolve_cie_offset(&self, base: R::Offset, offset: R::Offset) -> Option { + base.checked_sub(offset) + } + + fn has_address_and_segment_sizes(_version: u8) -> bool { + false + } + + fn address_size(&self) -> u8 { + self.address_size + } + + fn segment_size(&self) -> u8 { + 0 + } + + fn vendor(&self) -> Vendor { + self.vendor + } +} + +impl UnwindSection for EhFrame { + type Offset = EhFrameOffset; +} + +/// Optional base addresses for the relative `DW_EH_PE_*` encoded pointers. +/// +/// During CIE/FDE parsing, if a relative pointer is encountered for a base +/// address that is unknown, an Err will be returned. +/// +/// ``` +/// use gimli::BaseAddresses; +/// +/// # fn foo() { +/// # let address_of_eh_frame_hdr_section_in_memory = unimplemented!(); +/// # let address_of_eh_frame_section_in_memory = unimplemented!(); +/// # let address_of_text_section_in_memory = unimplemented!(); +/// # let address_of_got_section_in_memory = unimplemented!(); +/// # let address_of_the_start_of_current_func = unimplemented!(); +/// let bases = BaseAddresses::default() +/// .set_eh_frame_hdr(address_of_eh_frame_hdr_section_in_memory) +/// .set_eh_frame(address_of_eh_frame_section_in_memory) +/// .set_text(address_of_text_section_in_memory) +/// .set_got(address_of_got_section_in_memory); +/// # let _ = bases; +/// # } +/// ``` +#[derive(Clone, Default, Debug, PartialEq, Eq)] +pub struct BaseAddresses { + /// The base addresses to use for pointers in the `.eh_frame_hdr` section. + pub eh_frame_hdr: SectionBaseAddresses, + + /// The base addresses to use for pointers in the `.eh_frame` section. + pub eh_frame: SectionBaseAddresses, +} + +/// Optional base addresses for the relative `DW_EH_PE_*` encoded pointers +/// in a particular section. +/// +/// See `BaseAddresses` for methods that are helpful in setting these addresses. +#[derive(Clone, Default, Debug, PartialEq, Eq)] +pub struct SectionBaseAddresses { + /// The address of the section containing the pointer. + pub section: Option, + + /// The base address for text relative pointers. + /// This is generally the address of the `.text` section. + pub text: Option, + + /// The base address for data relative pointers. + /// + /// For pointers in the `.eh_frame_hdr` section, this is the address + /// of the `.eh_frame_hdr` section + /// + /// For pointers in the `.eh_frame` section, this is generally the + /// global pointer, such as the address of the `.got` section. + pub data: Option, +} + +impl BaseAddresses { + /// Set the `.eh_frame_hdr` section base address. + #[inline] + pub fn set_eh_frame_hdr(mut self, addr: u64) -> Self { + self.eh_frame_hdr.section = Some(addr); + self.eh_frame_hdr.data = Some(addr); + self + } + + /// Set the `.eh_frame` section base address. + #[inline] + pub fn set_eh_frame(mut self, addr: u64) -> Self { + self.eh_frame.section = Some(addr); + self + } + + /// Set the `.text` section base address. + #[inline] + pub fn set_text(mut self, addr: u64) -> Self { + self.eh_frame_hdr.text = Some(addr); + self.eh_frame.text = Some(addr); + self + } + + /// Set the `.got` section base address. + #[inline] + pub fn set_got(mut self, addr: u64) -> Self { + self.eh_frame.data = Some(addr); + self + } +} + +/// An iterator over CIE and FDE entries in a `.debug_frame` or `.eh_frame` +/// section. +/// +/// Some pointers may be encoded relative to various base addresses. Use the +/// [`BaseAddresses`](./struct.BaseAddresses.html) parameter to provide them. By +/// default, none are provided. If a relative pointer is encountered for a base +/// address that is unknown, an `Err` will be returned and iteration will abort. +/// +/// Can be [used with +/// `FallibleIterator`](./index.html#using-with-fallibleiterator). +/// +/// ``` +/// use gimli::{BaseAddresses, EhFrame, EndianSlice, NativeEndian, UnwindSection}; +/// +/// # fn foo() -> gimli::Result<()> { +/// # let read_eh_frame_somehow = || unimplemented!(); +/// let eh_frame = EhFrame::new(read_eh_frame_somehow(), NativeEndian); +/// +/// # let address_of_eh_frame_hdr_section_in_memory = unimplemented!(); +/// # let address_of_eh_frame_section_in_memory = unimplemented!(); +/// # let address_of_text_section_in_memory = unimplemented!(); +/// # let address_of_got_section_in_memory = unimplemented!(); +/// # let address_of_the_start_of_current_func = unimplemented!(); +/// // Provide base addresses for relative pointers. +/// let bases = BaseAddresses::default() +/// .set_eh_frame_hdr(address_of_eh_frame_hdr_section_in_memory) +/// .set_eh_frame(address_of_eh_frame_section_in_memory) +/// .set_text(address_of_text_section_in_memory) +/// .set_got(address_of_got_section_in_memory); +/// +/// let mut entries = eh_frame.entries(&bases); +/// +/// # let do_stuff_with = |_| unimplemented!(); +/// while let Some(entry) = entries.next()? { +/// do_stuff_with(entry) +/// } +/// # unreachable!() +/// # } +/// ``` +#[derive(Clone, Debug)] +pub struct CfiEntriesIter<'bases, Section, R> +where + R: Reader, + Section: UnwindSection, +{ + section: Section, + bases: &'bases BaseAddresses, + input: R, +} + +impl<'bases, Section, R> CfiEntriesIter<'bases, Section, R> +where + R: Reader, + Section: UnwindSection, +{ + /// Advance the iterator to the next entry. + pub fn next(&mut self) -> Result>> { + if self.input.is_empty() { + return Ok(None); + } + + match parse_cfi_entry(self.bases, &self.section, &mut self.input) { + Err(e) => { + self.input.empty(); + Err(e) + } + Ok(None) => { + self.input.empty(); + Ok(None) + } + Ok(Some(entry)) => Ok(Some(entry)), + } + } +} + +#[cfg(feature = "fallible-iterator")] +impl<'bases, Section, R> fallible_iterator::FallibleIterator for CfiEntriesIter<'bases, Section, R> +where + R: Reader, + Section: UnwindSection, +{ + type Item = CieOrFde<'bases, Section, R>; + type Error = Error; + + fn next(&mut self) -> ::core::result::Result, Self::Error> { + CfiEntriesIter::next(self) + } +} + +/// Either a `CommonInformationEntry` (CIE) or a `FrameDescriptionEntry` (FDE). +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum CieOrFde<'bases, Section, R> +where + R: Reader, + Section: UnwindSection, +{ + /// This CFI entry is a `CommonInformationEntry`. + Cie(CommonInformationEntry), + /// This CFI entry is a `FrameDescriptionEntry`, however fully parsing it + /// requires parsing its CIE first, so it is left in a partially parsed + /// state. + Fde(PartialFrameDescriptionEntry<'bases, Section, R>), +} + +fn parse_cfi_entry<'bases, Section, R>( + bases: &'bases BaseAddresses, + section: &Section, + input: &mut R, +) -> Result>> +where + R: Reader, + Section: UnwindSection, +{ + let (offset, length, format) = loop { + let offset = input.offset_from(section.section()); + let (length, format) = input.read_initial_length()?; + + if Section::length_value_is_end_of_entries(length) { + return Ok(None); + } + + // Hack: skip zero padding inserted by buggy compilers/linkers. + // We require that the padding is a multiple of 32-bits, otherwise + // there is no reliable way to determine when the padding ends. This + // should be okay since CFI entries must be aligned to the address size. + + if length.into_u64() != 0 || format != Format::Dwarf32 { + break (offset, length, format); + } + }; + + let mut rest = input.split(length)?; + let cie_offset_base = rest.offset_from(section.section()); + let cie_id_or_offset = match Section::cie_offset_encoding(format) { + CieOffsetEncoding::U32 => rest.read_u32().map(u64::from)?, + CieOffsetEncoding::U64 => rest.read_u64()?, + }; + + if Section::is_cie(format, cie_id_or_offset) { + let cie = CommonInformationEntry::parse_rest(offset, length, format, bases, section, rest)?; + Ok(Some(CieOrFde::Cie(cie))) + } else { + let cie_offset = R::Offset::from_u64(cie_id_or_offset)?; + let cie_offset = match section.resolve_cie_offset(cie_offset_base, cie_offset) { + None => return Err(Error::OffsetOutOfBounds), + Some(cie_offset) => cie_offset, + }; + + let fde = PartialFrameDescriptionEntry { + offset, + length, + format, + cie_offset: cie_offset.into(), + rest, + section: section.clone(), + bases, + }; + + Ok(Some(CieOrFde::Fde(fde))) + } +} + +/// We support the z-style augmentation [defined by `.eh_frame`][ehframe]. +/// +/// [ehframe]: https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html +#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] +pub struct Augmentation { + /// > A 'L' may be present at any position after the first character of the + /// > string. This character may only be present if 'z' is the first character + /// > of the string. If present, it indicates the presence of one argument in + /// > the Augmentation Data of the CIE, and a corresponding argument in the + /// > Augmentation Data of the FDE. The argument in the Augmentation Data of + /// > the CIE is 1-byte and represents the pointer encoding used for the + /// > argument in the Augmentation Data of the FDE, which is the address of a + /// > language-specific data area (LSDA). The size of the LSDA pointer is + /// > specified by the pointer encoding used. + lsda: Option, + + /// > A 'P' may be present at any position after the first character of the + /// > string. This character may only be present if 'z' is the first character + /// > of the string. If present, it indicates the presence of two arguments in + /// > the Augmentation Data of the CIE. The first argument is 1-byte and + /// > represents the pointer encoding used for the second argument, which is + /// > the address of a personality routine handler. The size of the + /// > personality routine pointer is specified by the pointer encoding used. + personality: Option<(constants::DwEhPe, Pointer)>, + + /// > A 'R' may be present at any position after the first character of the + /// > string. This character may only be present if 'z' is the first character + /// > of the string. If present, The Augmentation Data shall include a 1 byte + /// > argument that represents the pointer encoding for the address pointers + /// > used in the FDE. + fde_address_encoding: Option, + + /// True if this CIE's FDEs are trampolines for signal handlers. + is_signal_trampoline: bool, +} + +impl Augmentation { + fn parse( + augmentation_str: &mut R, + bases: &BaseAddresses, + address_size: u8, + section: &Section, + input: &mut R, + ) -> Result + where + R: Reader, + Section: UnwindSection, + { + debug_assert!( + !augmentation_str.is_empty(), + "Augmentation::parse should only be called if we have an augmentation" + ); + + let mut augmentation = Augmentation::default(); + + let mut parsed_first = false; + let mut data = None; + + while !augmentation_str.is_empty() { + let ch = augmentation_str.read_u8()?; + match ch { + b'z' => { + if parsed_first { + return Err(Error::UnknownAugmentation); + } + + let augmentation_length = input.read_uleb128().and_then(R::Offset::from_u64)?; + data = Some(input.split(augmentation_length)?); + } + b'L' => { + let rest = data.as_mut().ok_or(Error::UnknownAugmentation)?; + let encoding = parse_pointer_encoding(rest)?; + augmentation.lsda = Some(encoding); + } + b'P' => { + let rest = data.as_mut().ok_or(Error::UnknownAugmentation)?; + let encoding = parse_pointer_encoding(rest)?; + let parameters = PointerEncodingParameters { + bases: &bases.eh_frame, + func_base: None, + address_size, + section: section.section(), + }; + + let personality = parse_encoded_pointer(encoding, ¶meters, rest)?; + augmentation.personality = Some((encoding, personality)); + } + b'R' => { + let rest = data.as_mut().ok_or(Error::UnknownAugmentation)?; + let encoding = parse_pointer_encoding(rest)?; + augmentation.fde_address_encoding = Some(encoding); + } + b'S' => augmentation.is_signal_trampoline = true, + _ => return Err(Error::UnknownAugmentation), + } + + parsed_first = true; + } + + Ok(augmentation) + } +} + +/// Parsed augmentation data for a `FrameDescriptEntry`. +#[derive(Clone, Debug, Default, PartialEq, Eq)] +struct AugmentationData { + lsda: Option, +} + +impl AugmentationData { + fn parse( + augmentation: &Augmentation, + encoding_parameters: &PointerEncodingParameters<'_, R>, + input: &mut R, + ) -> Result { + // In theory, we should be iterating over the original augmentation + // string, interpreting each character, and reading the appropriate bits + // out of the augmentation data as we go. However, the only character + // that defines augmentation data in the FDE is the 'L' character, so we + // can just check for its presence directly. + + let aug_data_len = input.read_uleb128().and_then(R::Offset::from_u64)?; + let rest = &mut input.split(aug_data_len)?; + let mut augmentation_data = AugmentationData::default(); + if let Some(encoding) = augmentation.lsda { + let lsda = parse_encoded_pointer(encoding, encoding_parameters, rest)?; + augmentation_data.lsda = Some(lsda); + } + Ok(augmentation_data) + } +} + +/// > A Common Information Entry holds information that is shared among many +/// > Frame Description Entries. There is at least one CIE in every non-empty +/// > `.debug_frame` section. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct CommonInformationEntry::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + /// The offset of this entry from the start of its containing section. + offset: Offset, + + /// > A constant that gives the number of bytes of the CIE structure, not + /// > including the length field itself (see Section 7.2.2). The size of the + /// > length field plus the value of length must be an integral multiple of + /// > the address size. + length: Offset, + + format: Format, + + /// > A version number (see Section 7.23). This number is specific to the + /// > call frame information and is independent of the DWARF version number. + version: u8, + + /// The parsed augmentation, if any. + augmentation: Option, + + /// > The size of a target address in this CIE and any FDEs that use it, in + /// > bytes. If a compilation unit exists for this frame, its address size + /// > must match the address size here. + address_size: u8, + + /// "The size of a segment selector in this CIE and any FDEs that use it, in + /// bytes." + segment_size: u8, + + /// "A constant that is factored out of all advance location instructions + /// (see Section 6.4.2.1)." + code_alignment_factor: u64, + + /// > A constant that is factored out of certain offset instructions (see + /// > below). The resulting value is (operand * data_alignment_factor). + data_alignment_factor: i64, + + /// > An unsigned LEB128 constant that indicates which column in the rule + /// > table represents the return address of the function. Note that this + /// > column might not correspond to an actual machine register. + return_address_register: Register, + + /// > A sequence of rules that are interpreted to create the initial setting + /// > of each column in the table. + /// + /// > The default rule for all columns before interpretation of the initial + /// > instructions is the undefined rule. However, an ABI authoring body or a + /// > compilation system authoring body may specify an alternate default + /// > value for any or all columns. + /// + /// This is followed by `DW_CFA_nop` padding until the end of `length` bytes + /// in the input. + initial_instructions: R, +} + +impl CommonInformationEntry { + fn parse>( + bases: &BaseAddresses, + section: &Section, + input: &mut R, + ) -> Result> { + match parse_cfi_entry(bases, section, input)? { + Some(CieOrFde::Cie(cie)) => Ok(cie), + Some(CieOrFde::Fde(_)) => Err(Error::NotCieId), + None => Err(Error::NoEntryAtGivenOffset), + } + } + + fn parse_rest>( + offset: R::Offset, + length: R::Offset, + format: Format, + bases: &BaseAddresses, + section: &Section, + mut rest: R, + ) -> Result> { + let version = rest.read_u8()?; + + // Version 1 of `.debug_frame` corresponds to DWARF 2, and then for + // DWARF 3 and 4, I think they decided to just match the standard's + // version. + match version { + 1 | 3 | 4 => (), + _ => return Err(Error::UnknownVersion(u64::from(version))), + } + + let mut augmentation_string = rest.read_null_terminated_slice()?; + + let (address_size, segment_size) = if Section::has_address_and_segment_sizes(version) { + let address_size = rest.read_u8()?; + let segment_size = rest.read_u8()?; + (address_size, segment_size) + } else { + (section.address_size(), section.segment_size()) + }; + + let code_alignment_factor = rest.read_uleb128()?; + let data_alignment_factor = rest.read_sleb128()?; + + let return_address_register = if version == 1 { + Register(rest.read_u8()?.into()) + } else { + rest.read_uleb128().and_then(Register::from_u64)? + }; + + let augmentation = if augmentation_string.is_empty() { + None + } else { + Some(Augmentation::parse( + &mut augmentation_string, + bases, + address_size, + section, + &mut rest, + )?) + }; + + let entry = CommonInformationEntry { + offset, + length, + format, + version, + augmentation, + address_size, + segment_size, + code_alignment_factor, + data_alignment_factor, + return_address_register, + initial_instructions: rest, + }; + + Ok(entry) + } +} + +/// # Signal Safe Methods +/// +/// These methods are guaranteed not to allocate, acquire locks, or perform any +/// other signal-unsafe operations. +impl CommonInformationEntry { + /// Get the offset of this entry from the start of its containing section. + pub fn offset(&self) -> R::Offset { + self.offset + } + + /// Return the encoding parameters for this CIE. + pub fn encoding(&self) -> Encoding { + Encoding { + format: self.format, + version: u16::from(self.version), + address_size: self.address_size, + } + } + + /// The size of addresses (in bytes) in this CIE. + pub fn address_size(&self) -> u8 { + self.address_size + } + + /// Iterate over this CIE's initial instructions. + /// + /// Can be [used with + /// `FallibleIterator`](./index.html#using-with-fallibleiterator). + pub fn instructions<'a, Section>( + &self, + section: &'a Section, + bases: &'a BaseAddresses, + ) -> CallFrameInstructionIter<'a, R> + where + Section: UnwindSection, + { + CallFrameInstructionIter { + input: self.initial_instructions.clone(), + address_encoding: None, + parameters: PointerEncodingParameters { + bases: &bases.eh_frame, + func_base: None, + address_size: self.address_size, + section: section.section(), + }, + vendor: section.vendor(), + } + } + + /// > A constant that gives the number of bytes of the CIE structure, not + /// > including the length field itself (see Section 7.2.2). The size of the + /// > length field plus the value of length must be an integral multiple of + /// > the address size. + pub fn entry_len(&self) -> R::Offset { + self.length + } + + /// > A version number (see Section 7.23). This number is specific to the + /// > call frame information and is independent of the DWARF version number. + pub fn version(&self) -> u8 { + self.version + } + + /// Get the augmentation data, if any exists. + /// + /// The only augmentation understood by `gimli` is that which is defined by + /// `.eh_frame`. + pub fn augmentation(&self) -> Option<&Augmentation> { + self.augmentation.as_ref() + } + + /// True if this CIE's FDEs have a LSDA. + pub fn has_lsda(&self) -> bool { + self.augmentation.map_or(false, |a| a.lsda.is_some()) + } + + /// Return the encoding of the LSDA address for this CIE's FDEs. + pub fn lsda_encoding(&self) -> Option { + self.augmentation.and_then(|a| a.lsda) + } + + /// Return the encoding and address of the personality routine handler + /// for this CIE's FDEs. + pub fn personality_with_encoding(&self) -> Option<(constants::DwEhPe, Pointer)> { + self.augmentation.as_ref().and_then(|a| a.personality) + } + + /// Return the address of the personality routine handler + /// for this CIE's FDEs. + pub fn personality(&self) -> Option { + self.augmentation + .as_ref() + .and_then(|a| a.personality) + .map(|(_, p)| p) + } + + /// Return the encoding of the addresses for this CIE's FDEs. + pub fn fde_address_encoding(&self) -> Option { + self.augmentation.and_then(|a| a.fde_address_encoding) + } + + /// True if this CIE's FDEs are trampolines for signal handlers. + pub fn is_signal_trampoline(&self) -> bool { + self.augmentation.map_or(false, |a| a.is_signal_trampoline) + } + + /// > A constant that is factored out of all advance location instructions + /// > (see Section 6.4.2.1). + pub fn code_alignment_factor(&self) -> u64 { + self.code_alignment_factor + } + + /// > A constant that is factored out of certain offset instructions (see + /// > below). The resulting value is (operand * data_alignment_factor). + pub fn data_alignment_factor(&self) -> i64 { + self.data_alignment_factor + } + + /// > An unsigned ... constant that indicates which column in the rule + /// > table represents the return address of the function. Note that this + /// > column might not correspond to an actual machine register. + pub fn return_address_register(&self) -> Register { + self.return_address_register + } +} + +/// A partially parsed `FrameDescriptionEntry`. +/// +/// Fully parsing this FDE requires first parsing its CIE. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct PartialFrameDescriptionEntry<'bases, Section, R> +where + R: Reader, + Section: UnwindSection, +{ + offset: R::Offset, + length: R::Offset, + format: Format, + cie_offset: Section::Offset, + rest: R, + section: Section, + bases: &'bases BaseAddresses, +} + +impl<'bases, Section, R> PartialFrameDescriptionEntry<'bases, Section, R> +where + R: Reader, + Section: UnwindSection, +{ + fn parse_partial( + section: &Section, + bases: &'bases BaseAddresses, + input: &mut R, + ) -> Result> { + match parse_cfi_entry(bases, section, input)? { + Some(CieOrFde::Cie(_)) => Err(Error::NotFdePointer), + Some(CieOrFde::Fde(partial)) => Ok(partial), + None => Err(Error::NoEntryAtGivenOffset), + } + } + + /// Fully parse this FDE. + /// + /// You must provide a function get its associated CIE (either by parsing it + /// on demand, or looking it up in some table mapping offsets to CIEs that + /// you've already parsed, etc.) + pub fn parse(&self, get_cie: F) -> Result> + where + F: FnMut(&Section, &BaseAddresses, Section::Offset) -> Result>, + { + FrameDescriptionEntry::parse_rest( + self.offset, + self.length, + self.format, + self.cie_offset, + self.rest.clone(), + &self.section, + self.bases, + get_cie, + ) + } + + /// Get the offset of this entry from the start of its containing section. + pub fn offset(&self) -> R::Offset { + self.offset + } + + /// Get the offset of this FDE's CIE. + pub fn cie_offset(&self) -> Section::Offset { + self.cie_offset + } + + /// > A constant that gives the number of bytes of the header and + /// > instruction stream for this function, not including the length field + /// > itself (see Section 7.2.2). The size of the length field plus the value + /// > of length must be an integral multiple of the address size. + pub fn entry_len(&self) -> R::Offset { + self.length + } +} + +/// A `FrameDescriptionEntry` is a set of CFA instructions for an address range. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct FrameDescriptionEntry::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + /// The start of this entry within its containing section. + offset: Offset, + + /// > A constant that gives the number of bytes of the header and + /// > instruction stream for this function, not including the length field + /// > itself (see Section 7.2.2). The size of the length field plus the value + /// > of length must be an integral multiple of the address size. + length: Offset, + + format: Format, + + /// "A constant offset into the .debug_frame section that denotes the CIE + /// that is associated with this FDE." + /// + /// This is the CIE at that offset. + cie: CommonInformationEntry, + + /// > The address of the first location associated with this table entry. If + /// > the segment_size field of this FDE's CIE is non-zero, the initial + /// > location is preceded by a segment selector of the given length. + initial_segment: u64, + initial_address: u64, + + /// "The number of bytes of program instructions described by this entry." + address_range: u64, + + /// The parsed augmentation data, if we have any. + augmentation: Option, + + /// "A sequence of table defining instructions that are described below." + /// + /// This is followed by `DW_CFA_nop` padding until `length` bytes of the + /// input are consumed. + instructions: R, +} + +impl FrameDescriptionEntry { + fn parse_rest( + offset: R::Offset, + length: R::Offset, + format: Format, + cie_pointer: Section::Offset, + mut rest: R, + section: &Section, + bases: &BaseAddresses, + mut get_cie: F, + ) -> Result> + where + Section: UnwindSection, + F: FnMut(&Section, &BaseAddresses, Section::Offset) -> Result>, + { + let cie = get_cie(section, bases, cie_pointer)?; + + let initial_segment = if cie.segment_size > 0 { + rest.read_address(cie.segment_size)? + } else { + 0 + }; + + let mut parameters = PointerEncodingParameters { + bases: &bases.eh_frame, + func_base: None, + address_size: cie.address_size, + section: section.section(), + }; + + let (initial_address, address_range) = Self::parse_addresses(&mut rest, &cie, ¶meters)?; + parameters.func_base = Some(initial_address); + + let aug_data = if let Some(ref augmentation) = cie.augmentation { + Some(AugmentationData::parse( + augmentation, + ¶meters, + &mut rest, + )?) + } else { + None + }; + + let entry = FrameDescriptionEntry { + offset, + length, + format, + cie, + initial_segment, + initial_address, + address_range, + augmentation: aug_data, + instructions: rest, + }; + + Ok(entry) + } + + fn parse_addresses( + input: &mut R, + cie: &CommonInformationEntry, + parameters: &PointerEncodingParameters<'_, R>, + ) -> Result<(u64, u64)> { + let encoding = cie.augmentation().and_then(|a| a.fde_address_encoding); + if let Some(encoding) = encoding { + let initial_address = parse_encoded_pointer(encoding, parameters, input)?; + + // Ignore indirection. + let initial_address = initial_address.pointer(); + + // Address ranges cannot be relative to anything, so just grab the + // data format bits from the encoding. + let address_range = parse_encoded_pointer(encoding.format(), parameters, input)?; + Ok((initial_address, address_range.pointer())) + } else { + let initial_address = input.read_address(cie.address_size)?; + let address_range = input.read_address(cie.address_size)?; + Ok((initial_address, address_range)) + } + } + + /// Return the table of unwind information for this FDE. + #[inline] + pub fn rows<'a, 'ctx, Section: UnwindSection, A: UnwindContextStorage>( + &self, + section: &'a Section, + bases: &'a BaseAddresses, + ctx: &'ctx mut UnwindContext, + ) -> Result> { + UnwindTable::new(section, bases, ctx, self) + } + + /// Find the frame unwind information for the given address. + /// + /// If found, the unwind information is returned along with the reset + /// context in the form `Ok((unwind_info, context))`. If not found, + /// `Err(gimli::Error::NoUnwindInfoForAddress)` is returned. If parsing or + /// CFI evaluation fails, the error is returned. + pub fn unwind_info_for_address< + 'ctx, + Section: UnwindSection, + A: UnwindContextStorage, + >( + &self, + section: &Section, + bases: &BaseAddresses, + ctx: &'ctx mut UnwindContext, + address: u64, + ) -> Result<&'ctx UnwindTableRow> { + let mut table = self.rows(section, bases, ctx)?; + while let Some(row) = table.next_row()? { + if row.contains(address) { + return Ok(table.ctx.row()); + } + } + Err(Error::NoUnwindInfoForAddress) + } +} + +/// # Signal Safe Methods +/// +/// These methods are guaranteed not to allocate, acquire locks, or perform any +/// other signal-unsafe operations. +#[allow(clippy::len_without_is_empty)] +impl FrameDescriptionEntry { + /// Get the offset of this entry from the start of its containing section. + pub fn offset(&self) -> R::Offset { + self.offset + } + + /// Get a reference to this FDE's CIE. + pub fn cie(&self) -> &CommonInformationEntry { + &self.cie + } + + /// > A constant that gives the number of bytes of the header and + /// > instruction stream for this function, not including the length field + /// > itself (see Section 7.2.2). The size of the length field plus the value + /// > of length must be an integral multiple of the address size. + pub fn entry_len(&self) -> R::Offset { + self.length + } + + /// Iterate over this FDE's instructions. + /// + /// Will not include the CIE's initial instructions, if you want those do + /// `fde.cie().instructions()` first. + /// + /// Can be [used with + /// `FallibleIterator`](./index.html#using-with-fallibleiterator). + pub fn instructions<'a, Section>( + &self, + section: &'a Section, + bases: &'a BaseAddresses, + ) -> CallFrameInstructionIter<'a, R> + where + Section: UnwindSection, + { + CallFrameInstructionIter { + input: self.instructions.clone(), + address_encoding: self.cie.augmentation().and_then(|a| a.fde_address_encoding), + parameters: PointerEncodingParameters { + bases: &bases.eh_frame, + func_base: None, + address_size: self.cie.address_size, + section: section.section(), + }, + vendor: section.vendor(), + } + } + + /// The first address for which this entry has unwind information for. + pub fn initial_address(&self) -> u64 { + self.initial_address + } + + /// The number of bytes of instructions that this entry has unwind + /// information for. + pub fn len(&self) -> u64 { + self.address_range + } + + /// Return `true` if the given address is within this FDE, `false` + /// otherwise. + /// + /// This is equivalent to `entry.initial_address() <= address < + /// entry.initial_address() + entry.len()`. + pub fn contains(&self, address: u64) -> bool { + let start = self.initial_address(); + let end = start + self.len(); + start <= address && address < end + } + + /// The address of this FDE's language-specific data area (LSDA), if it has + /// any. + pub fn lsda(&self) -> Option { + self.augmentation.as_ref().and_then(|a| a.lsda) + } + + /// Return true if this FDE's function is a trampoline for a signal handler. + #[inline] + pub fn is_signal_trampoline(&self) -> bool { + self.cie().is_signal_trampoline() + } + + /// Return the address of the FDE's function's personality routine + /// handler. The personality routine does language-specific clean up when + /// unwinding the stack frames with the intent to not run them again. + #[inline] + pub fn personality(&self) -> Option { + self.cie().personality() + } +} + +/// Specification of what storage should be used for [`UnwindContext`]. +/// +#[cfg_attr( + feature = "read", + doc = " +Normally you would only need to use [`StoreOnHeap`], which places the stack +on the heap using [`Box`]. This is the default storage type parameter for [`UnwindContext`]. + +You may want to supply your own storage type for one of the following reasons: + + 1. In rare cases you may run into failed unwinds due to the fixed stack size + used by [`StoreOnHeap`], so you may want to try a larger `Box`. If denial + of service is not a concern, then you could also try a `Vec`-based stack which + can grow as needed. + 2. You may want to avoid heap allocations entirely. You can use a fixed-size + stack with in-line arrays, which will place the entire storage in-line into + [`UnwindContext`]. +" +)] +/// +/// Here's an implementation which uses a fixed-size stack and allocates everything in-line, +/// which will cause `UnwindContext` to be large: +/// +/// ```rust,no_run +/// # use gimli::*; +/// # +/// # fn foo<'a>(some_fde: gimli::FrameDescriptionEntry>) +/// # -> gimli::Result<()> { +/// # let eh_frame: gimli::EhFrame<_> = unreachable!(); +/// # let bases = unimplemented!(); +/// # +/// struct StoreOnStack; +/// +/// impl UnwindContextStorage for StoreOnStack { +/// type Rules = [(Register, RegisterRule); 192]; +/// type Stack = [UnwindTableRow; 4]; +/// } +/// +/// let mut ctx = UnwindContext::<_, StoreOnStack>::new_in(); +/// +/// // Initialize the context by evaluating the CIE's initial instruction program, +/// // and generate the unwind table. +/// let mut table = some_fde.rows(&eh_frame, &bases, &mut ctx)?; +/// while let Some(row) = table.next_row()? { +/// // Do stuff with each row... +/// # let _ = row; +/// } +/// # unreachable!() +/// # } +/// ``` +pub trait UnwindContextStorage: Sized { + /// The storage used for register rules in a unwind table row. + /// + /// Note that this is nested within the stack. + type Rules: ArrayLike)>; + + /// The storage used for unwind table row stack. + type Stack: ArrayLike>; +} + +#[cfg(feature = "read")] +const MAX_RULES: usize = 192; +#[cfg(feature = "read")] +const MAX_UNWIND_STACK_DEPTH: usize = 4; + +#[cfg(feature = "read")] +impl UnwindContextStorage for StoreOnHeap { + type Rules = [(Register, RegisterRule); MAX_RULES]; + type Stack = Box<[UnwindTableRow; MAX_UNWIND_STACK_DEPTH]>; +} + +/// Common context needed when evaluating the call frame unwinding information. +/// +/// By default, this structure is small and allocates its internal storage +/// on the heap using [`Box`] during [`UnwindContext::new`]. +/// +/// This can be overridden by providing a custom [`UnwindContextStorage`] type parameter. +/// When using a custom storage with in-line arrays, the [`UnwindContext`] type itself +/// will be big, so in that case it's recommended to place [`UnwindContext`] on the +/// heap, e.g. using `Box::new(UnwindContext::::new_in())`. +/// +/// To avoid re-allocating the context multiple times when evaluating multiple +/// CFI programs, the same [`UnwindContext`] can be reused for multiple unwinds. +/// +/// ``` +/// use gimli::{UnwindContext, UnwindTable}; +/// +/// # fn foo<'a>(some_fde: gimli::FrameDescriptionEntry>) +/// # -> gimli::Result<()> { +/// # let eh_frame: gimli::EhFrame<_> = unreachable!(); +/// # let bases = unimplemented!(); +/// // An uninitialized context. +/// let mut ctx = UnwindContext::new(); +/// +/// // Initialize the context by evaluating the CIE's initial instruction program, +/// // and generate the unwind table. +/// let mut table = some_fde.rows(&eh_frame, &bases, &mut ctx)?; +/// while let Some(row) = table.next_row()? { +/// // Do stuff with each row... +/// # let _ = row; +/// } +/// # unreachable!() +/// # } +/// ``` +#[derive(Clone, PartialEq, Eq)] +pub struct UnwindContext = StoreOnHeap> { + // Stack of rows. The last row is the row currently being built by the + // program. There is always at least one row. The vast majority of CFI + // programs will only ever have one row on the stack. + stack: ArrayVec, + + // If we are evaluating an FDE's instructions, then `is_initialized` will be + // `true`. If `initial_rule` is `Some`, then the initial register rules are either + // all default rules or have just 1 non-default rule, stored in `initial_rule`. + // If it's `None`, `stack[0]` will contain the initial register rules + // described by the CIE's initial instructions. These rules are used by + // `DW_CFA_restore`. Otherwise, when we are currently evaluating a CIE's + // initial instructions, `is_initialized` will be `false` and initial rules + // cannot be read. + initial_rule: Option<(Register, RegisterRule)>, + + is_initialized: bool, +} + +impl> Debug for UnwindContext { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("UnwindContext") + .field("stack", &self.stack) + .field("initial_rule", &self.initial_rule) + .field("is_initialized", &self.is_initialized) + .finish() + } +} + +impl> Default for UnwindContext { + fn default() -> Self { + Self::new_in() + } +} + +#[cfg(feature = "read")] +impl UnwindContext { + /// Construct a new call frame unwinding context. + pub fn new() -> Self { + Self::new_in() + } +} + +/// # Signal Safe Methods +/// +/// These methods are guaranteed not to allocate, acquire locks, or perform any +/// other signal-unsafe operations, if an non-allocating storage is used. +impl> UnwindContext { + /// Construct a new call frame unwinding context. + pub fn new_in() -> Self { + let mut ctx = UnwindContext { + stack: Default::default(), + initial_rule: None, + is_initialized: false, + }; + ctx.reset(); + ctx + } + + /// Run the CIE's initial instructions and initialize this `UnwindContext`. + fn initialize( + &mut self, + section: &Section, + bases: &BaseAddresses, + cie: &CommonInformationEntry, + ) -> Result<()> + where + R: Reader, + Section: UnwindSection, + { + // Always reset because previous initialization failure may leave dirty state. + self.reset(); + + let mut table = UnwindTable::new_for_cie(section, bases, self, cie); + while table.next_row()?.is_some() {} + + self.save_initial_rules()?; + Ok(()) + } + + fn reset(&mut self) { + self.stack.clear(); + self.stack.try_push(UnwindTableRow::default()).unwrap(); + debug_assert!(self.stack[0].is_default()); + self.initial_rule = None; + self.is_initialized = false; + } + + fn row(&self) -> &UnwindTableRow { + self.stack.last().unwrap() + } + + fn row_mut(&mut self) -> &mut UnwindTableRow { + self.stack.last_mut().unwrap() + } + + fn save_initial_rules(&mut self) -> Result<()> { + debug_assert!(!self.is_initialized); + self.initial_rule = match *self.stack.last().unwrap().registers.rules { + // All rules are default (undefined). In this case just synthesize + // an undefined rule. + [] => Some((Register(0), RegisterRule::Undefined)), + [ref rule] => Some(rule.clone()), + _ => { + let rules = self.stack.last().unwrap().clone(); + self.stack + .try_insert(0, rules) + .map_err(|_| Error::StackFull)?; + None + } + }; + self.is_initialized = true; + Ok(()) + } + + fn start_address(&self) -> u64 { + self.row().start_address + } + + fn set_start_address(&mut self, start_address: u64) { + let row = self.row_mut(); + row.start_address = start_address; + } + + fn set_register_rule(&mut self, register: Register, rule: RegisterRule) -> Result<()> { + let row = self.row_mut(); + row.registers.set(register, rule) + } + + /// Returns `None` if we have not completed evaluation of a CIE's initial + /// instructions. + fn get_initial_rule(&self, register: Register) -> Option> { + if !self.is_initialized { + return None; + } + Some(match self.initial_rule { + None => self.stack[0].registers.get(register), + Some((r, ref rule)) if r == register => rule.clone(), + _ => RegisterRule::Undefined, + }) + } + + fn set_cfa(&mut self, cfa: CfaRule) { + self.row_mut().cfa = cfa; + } + + fn cfa_mut(&mut self) -> &mut CfaRule { + &mut self.row_mut().cfa + } + + fn push_row(&mut self) -> Result<()> { + let new_row = self.row().clone(); + self.stack.try_push(new_row).map_err(|_| Error::StackFull) + } + + fn pop_row(&mut self) -> Result<()> { + let min_size = if self.is_initialized && self.initial_rule.is_none() { + 2 + } else { + 1 + }; + if self.stack.len() <= min_size { + return Err(Error::PopWithEmptyStack); + } + self.stack.pop().unwrap(); + Ok(()) + } +} + +/// The `UnwindTable` iteratively evaluates a `FrameDescriptionEntry`'s +/// `CallFrameInstruction` program, yielding the each row one at a time. +/// +/// > 6.4.1 Structure of Call Frame Information +/// > +/// > DWARF supports virtual unwinding by defining an architecture independent +/// > basis for recording how procedures save and restore registers during their +/// > lifetimes. This basis must be augmented on some machines with specific +/// > information that is defined by an architecture specific ABI authoring +/// > committee, a hardware vendor, or a compiler producer. The body defining a +/// > specific augmentation is referred to below as the “augmenter.” +/// > +/// > Abstractly, this mechanism describes a very large table that has the +/// > following structure: +/// > +/// > +/// > +/// > +/// > +/// > +/// > +/// > +/// > +/// > +/// > +/// > +/// > +/// > +/// > +/// > +/// > +/// >
LOCCFAR0R1...RN
L0
L1
...
LN
+/// > +/// > The first column indicates an address for every location that contains code +/// > in a program. (In shared objects, this is an object-relative offset.) The +/// > remaining columns contain virtual unwinding rules that are associated with +/// > the indicated location. +/// > +/// > The CFA column defines the rule which computes the Canonical Frame Address +/// > value; it may be either a register and a signed offset that are added +/// > together, or a DWARF expression that is evaluated. +/// > +/// > The remaining columns are labeled by register number. This includes some +/// > registers that have special designation on some architectures such as the PC +/// > and the stack pointer register. (The actual mapping of registers for a +/// > particular architecture is defined by the augmenter.) The register columns +/// > contain rules that describe whether a given register has been saved and the +/// > rule to find the value for the register in the previous frame. +/// > +/// > ... +/// > +/// > This table would be extremely large if actually constructed as +/// > described. Most of the entries at any point in the table are identical to +/// > the ones above them. The whole table can be represented quite compactly by +/// > recording just the differences starting at the beginning address of each +/// > subroutine in the program. +#[derive(Debug)] +pub struct UnwindTable<'a, 'ctx, R: Reader, A: UnwindContextStorage = StoreOnHeap> { + code_alignment_factor: Wrapping, + data_alignment_factor: Wrapping, + next_start_address: u64, + last_end_address: u64, + returned_last_row: bool, + current_row_valid: bool, + instructions: CallFrameInstructionIter<'a, R>, + ctx: &'ctx mut UnwindContext, +} + +/// # Signal Safe Methods +/// +/// These methods are guaranteed not to allocate, acquire locks, or perform any +/// other signal-unsafe operations. +impl<'a, 'ctx, R: Reader, A: UnwindContextStorage> UnwindTable<'a, 'ctx, R, A> { + /// Construct a new `UnwindTable` for the given + /// `FrameDescriptionEntry`'s CFI unwinding program. + pub fn new>( + section: &'a Section, + bases: &'a BaseAddresses, + ctx: &'ctx mut UnwindContext, + fde: &FrameDescriptionEntry, + ) -> Result { + ctx.initialize(section, bases, fde.cie())?; + Ok(Self::new_for_fde(section, bases, ctx, fde)) + } + + fn new_for_fde>( + section: &'a Section, + bases: &'a BaseAddresses, + ctx: &'ctx mut UnwindContext, + fde: &FrameDescriptionEntry, + ) -> Self { + assert!(ctx.stack.len() >= 1); + UnwindTable { + code_alignment_factor: Wrapping(fde.cie().code_alignment_factor()), + data_alignment_factor: Wrapping(fde.cie().data_alignment_factor()), + next_start_address: fde.initial_address(), + last_end_address: fde.initial_address().wrapping_add(fde.len()), + returned_last_row: false, + current_row_valid: false, + instructions: fde.instructions(section, bases), + ctx, + } + } + + fn new_for_cie>( + section: &'a Section, + bases: &'a BaseAddresses, + ctx: &'ctx mut UnwindContext, + cie: &CommonInformationEntry, + ) -> Self { + assert!(ctx.stack.len() >= 1); + UnwindTable { + code_alignment_factor: Wrapping(cie.code_alignment_factor()), + data_alignment_factor: Wrapping(cie.data_alignment_factor()), + next_start_address: 0, + last_end_address: 0, + returned_last_row: false, + current_row_valid: false, + instructions: cie.instructions(section, bases), + ctx, + } + } + + /// Evaluate call frame instructions until the next row of the table is + /// completed, and return it. + /// + /// Unfortunately, this cannot be used with `FallibleIterator` because of + /// the restricted lifetime of the yielded item. + pub fn next_row(&mut self) -> Result>> { + assert!(self.ctx.stack.len() >= 1); + self.ctx.set_start_address(self.next_start_address); + self.current_row_valid = false; + + loop { + match self.instructions.next() { + Err(e) => return Err(e), + + Ok(None) => { + if self.returned_last_row { + return Ok(None); + } + + let row = self.ctx.row_mut(); + row.end_address = self.last_end_address; + + self.returned_last_row = true; + self.current_row_valid = true; + return Ok(Some(row)); + } + + Ok(Some(instruction)) => { + if self.evaluate(instruction)? { + self.current_row_valid = true; + return Ok(Some(self.ctx.row())); + } + } + }; + } + } + + /// Returns the current row with the lifetime of the context. + pub fn into_current_row(self) -> Option<&'ctx UnwindTableRow> { + if self.current_row_valid { + Some(self.ctx.row()) + } else { + None + } + } + + /// Evaluate one call frame instruction. Return `Ok(true)` if the row is + /// complete, `Ok(false)` otherwise. + fn evaluate(&mut self, instruction: CallFrameInstruction) -> Result { + use crate::CallFrameInstruction::*; + + match instruction { + // Instructions that complete the current row and advance the + // address for the next row. + SetLoc { address } => { + if address < self.ctx.start_address() { + return Err(Error::InvalidAddressRange); + } + + self.next_start_address = address; + self.ctx.row_mut().end_address = self.next_start_address; + return Ok(true); + } + AdvanceLoc { delta } => { + let delta = Wrapping(u64::from(delta)) * self.code_alignment_factor; + self.next_start_address = (Wrapping(self.ctx.start_address()) + delta).0; + self.ctx.row_mut().end_address = self.next_start_address; + return Ok(true); + } + + // Instructions that modify the CFA. + DefCfa { register, offset } => { + self.ctx.set_cfa(CfaRule::RegisterAndOffset { + register, + offset: offset as i64, + }); + } + DefCfaSf { + register, + factored_offset, + } => { + let data_align = self.data_alignment_factor; + self.ctx.set_cfa(CfaRule::RegisterAndOffset { + register, + offset: (Wrapping(factored_offset) * data_align).0, + }); + } + DefCfaRegister { register } => { + if let CfaRule::RegisterAndOffset { + register: ref mut reg, + .. + } = *self.ctx.cfa_mut() + { + *reg = register; + } else { + return Err(Error::CfiInstructionInInvalidContext); + } + } + DefCfaOffset { offset } => { + if let CfaRule::RegisterAndOffset { + offset: ref mut off, + .. + } = *self.ctx.cfa_mut() + { + *off = offset as i64; + } else { + return Err(Error::CfiInstructionInInvalidContext); + } + } + DefCfaOffsetSf { factored_offset } => { + if let CfaRule::RegisterAndOffset { + offset: ref mut off, + .. + } = *self.ctx.cfa_mut() + { + let data_align = self.data_alignment_factor; + *off = (Wrapping(factored_offset) * data_align).0; + } else { + return Err(Error::CfiInstructionInInvalidContext); + } + } + DefCfaExpression { expression } => { + self.ctx.set_cfa(CfaRule::Expression(expression)); + } + + // Instructions that define register rules. + Undefined { register } => { + self.ctx + .set_register_rule(register, RegisterRule::Undefined)?; + } + SameValue { register } => { + self.ctx + .set_register_rule(register, RegisterRule::SameValue)?; + } + Offset { + register, + factored_offset, + } => { + let offset = Wrapping(factored_offset as i64) * self.data_alignment_factor; + self.ctx + .set_register_rule(register, RegisterRule::Offset(offset.0))?; + } + OffsetExtendedSf { + register, + factored_offset, + } => { + let offset = Wrapping(factored_offset) * self.data_alignment_factor; + self.ctx + .set_register_rule(register, RegisterRule::Offset(offset.0))?; + } + ValOffset { + register, + factored_offset, + } => { + let offset = Wrapping(factored_offset as i64) * self.data_alignment_factor; + self.ctx + .set_register_rule(register, RegisterRule::ValOffset(offset.0))?; + } + ValOffsetSf { + register, + factored_offset, + } => { + let offset = Wrapping(factored_offset) * self.data_alignment_factor; + self.ctx + .set_register_rule(register, RegisterRule::ValOffset(offset.0))?; + } + Register { + dest_register, + src_register, + } => { + self.ctx + .set_register_rule(dest_register, RegisterRule::Register(src_register))?; + } + Expression { + register, + expression, + } => { + let expression = RegisterRule::Expression(expression); + self.ctx.set_register_rule(register, expression)?; + } + ValExpression { + register, + expression, + } => { + let expression = RegisterRule::ValExpression(expression); + self.ctx.set_register_rule(register, expression)?; + } + Restore { register } => { + let initial_rule = if let Some(rule) = self.ctx.get_initial_rule(register) { + rule + } else { + // Can't restore the initial rule when we are + // evaluating the initial rules! + return Err(Error::CfiInstructionInInvalidContext); + }; + + self.ctx.set_register_rule(register, initial_rule)?; + } + + // Row push and pop instructions. + RememberState => { + self.ctx.push_row()?; + } + RestoreState => { + // Pop state while preserving current location. + let start_address = self.ctx.start_address(); + self.ctx.pop_row()?; + self.ctx.set_start_address(start_address); + } + + // GNU Extension. Save the size somewhere so the unwinder can use + // it when restoring IP + ArgsSize { size } => { + self.ctx.row_mut().saved_args_size = size; + } + + // AArch64 extension. + NegateRaState => { + let register = crate::AArch64::RA_SIGN_STATE; + let value = match self.ctx.row().register(register) { + RegisterRule::Undefined => 0, + RegisterRule::Constant(value) => value, + _ => return Err(Error::CfiInstructionInInvalidContext), + }; + self.ctx + .set_register_rule(register, RegisterRule::Constant(value ^ 1))?; + } + + // No operation. + Nop => {} + }; + + Ok(false) + } +} + +// We tend to have very few register rules: usually only a couple. Even if we +// have a rule for every register, on x86-64 with SSE and everything we're +// talking about ~100 rules. So rather than keeping the rules in a hash map, or +// a vector indexed by register number (which would lead to filling lots of +// empty entries), we store them as a vec of (register number, register rule) +// pairs. +// +// Additionally, because every register's default rule is implicitly +// `RegisterRule::Undefined`, we never store a register's rule in this vec if it +// is undefined and save a little bit more space and do a little fewer +// comparisons that way. +// +// The maximum number of rules preallocated by libunwind is 97 for AArch64, 128 +// for ARM, and even 188 for MIPS. It is extremely unlikely to encounter this +// many register rules in practice. +// +// See: +// - https://github.com/libunwind/libunwind/blob/11fd461095ea98f4b3e3a361f5a8a558519363fa/include/tdep-x86_64/dwarf-config.h#L36 +// - https://github.com/libunwind/libunwind/blob/11fd461095ea98f4b3e3a361f5a8a558519363fa/include/tdep-aarch64/dwarf-config.h#L32 +// - https://github.com/libunwind/libunwind/blob/11fd461095ea98f4b3e3a361f5a8a558519363fa/include/tdep-arm/dwarf-config.h#L31 +// - https://github.com/libunwind/libunwind/blob/11fd461095ea98f4b3e3a361f5a8a558519363fa/include/tdep-mips/dwarf-config.h#L31 +struct RegisterRuleMap = StoreOnHeap> { + rules: ArrayVec, +} + +impl> Debug for RegisterRuleMap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RegisterRuleMap") + .field("rules", &self.rules) + .finish() + } +} + +impl> Clone for RegisterRuleMap { + fn clone(&self) -> Self { + Self { + rules: self.rules.clone(), + } + } +} + +impl> Default for RegisterRuleMap { + fn default() -> Self { + RegisterRuleMap { + rules: Default::default(), + } + } +} + +/// # Signal Safe Methods +/// +/// These methods are guaranteed not to allocate, acquire locks, or perform any +/// other signal-unsafe operations. +impl> RegisterRuleMap { + fn is_default(&self) -> bool { + self.rules.is_empty() + } + + fn get(&self, register: Register) -> RegisterRule { + self.rules + .iter() + .find(|rule| rule.0 == register) + .map(|r| { + debug_assert!(r.1.is_defined()); + r.1.clone() + }) + .unwrap_or(RegisterRule::Undefined) + } + + fn set(&mut self, register: Register, rule: RegisterRule) -> Result<()> { + if !rule.is_defined() { + let idx = self + .rules + .iter() + .enumerate() + .find(|&(_, r)| r.0 == register) + .map(|(i, _)| i); + if let Some(idx) = idx { + self.rules.swap_remove(idx); + } + return Ok(()); + } + + for &mut (reg, ref mut old_rule) in &mut *self.rules { + debug_assert!(old_rule.is_defined()); + if reg == register { + *old_rule = rule; + return Ok(()); + } + } + + self.rules + .try_push((register, rule)) + .map_err(|_| Error::TooManyRegisterRules) + } + + fn iter(&self) -> RegisterRuleIter<'_, T> { + RegisterRuleIter(self.rules.iter()) + } +} + +impl<'a, R, S: UnwindContextStorage> FromIterator<&'a (Register, RegisterRule)> + for RegisterRuleMap +where + R: 'a + ReaderOffset, +{ + fn from_iter(iter: T) -> Self + where + T: IntoIterator)>, + { + let iter = iter.into_iter(); + let mut rules = RegisterRuleMap::default(); + for &(reg, ref rule) in iter.filter(|r| r.1.is_defined()) { + rules.set(reg, rule.clone()).expect( + "This is only used in tests, impl isn't exposed publicly. + If you trip this, fix your test", + ); + } + rules + } +} + +impl> PartialEq for RegisterRuleMap +where + T: ReaderOffset + PartialEq, +{ + fn eq(&self, rhs: &Self) -> bool { + for &(reg, ref rule) in &*self.rules { + debug_assert!(rule.is_defined()); + if *rule != rhs.get(reg) { + return false; + } + } + + for &(reg, ref rhs_rule) in &*rhs.rules { + debug_assert!(rhs_rule.is_defined()); + if *rhs_rule != self.get(reg) { + return false; + } + } + + true + } +} + +impl> Eq for RegisterRuleMap where T: ReaderOffset + Eq {} + +/// An unordered iterator for register rules. +#[derive(Debug, Clone)] +pub struct RegisterRuleIter<'iter, T>(::core::slice::Iter<'iter, (Register, RegisterRule)>) +where + T: ReaderOffset; + +impl<'iter, T: ReaderOffset> Iterator for RegisterRuleIter<'iter, T> { + type Item = &'iter (Register, RegisterRule); + + fn next(&mut self) -> Option { + self.0.next() + } +} + +/// A row in the virtual unwind table that describes how to find the values of +/// the registers in the *previous* frame for a range of PC addresses. +#[derive(PartialEq, Eq)] +pub struct UnwindTableRow = StoreOnHeap> { + start_address: u64, + end_address: u64, + saved_args_size: u64, + cfa: CfaRule, + registers: RegisterRuleMap, +} + +impl> Debug for UnwindTableRow { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("UnwindTableRow") + .field("start_address", &self.start_address) + .field("end_address", &self.end_address) + .field("saved_args_size", &self.saved_args_size) + .field("cfa", &self.cfa) + .field("registers", &self.registers) + .finish() + } +} + +impl> Clone for UnwindTableRow { + fn clone(&self) -> Self { + Self { + start_address: self.start_address, + end_address: self.end_address, + saved_args_size: self.saved_args_size, + cfa: self.cfa.clone(), + registers: self.registers.clone(), + } + } +} + +impl> Default for UnwindTableRow { + fn default() -> Self { + UnwindTableRow { + start_address: 0, + end_address: 0, + saved_args_size: 0, + cfa: Default::default(), + registers: Default::default(), + } + } +} + +impl> UnwindTableRow { + fn is_default(&self) -> bool { + self.start_address == 0 + && self.end_address == 0 + && self.cfa.is_default() + && self.registers.is_default() + } + + /// Get the starting PC address that this row applies to. + pub fn start_address(&self) -> u64 { + self.start_address + } + + /// Get the end PC address where this row's register rules become + /// unapplicable. + /// + /// In other words, this row describes how to recover the last frame's + /// registers for all PCs where `row.start_address() <= PC < + /// row.end_address()`. This row does NOT describe how to recover registers + /// when `PC == row.end_address()`. + pub fn end_address(&self) -> u64 { + self.end_address + } + + /// Return `true` if the given `address` is within this row's address range, + /// `false` otherwise. + pub fn contains(&self, address: u64) -> bool { + self.start_address <= address && address < self.end_address + } + + /// Returns the amount of args currently on the stack. + /// + /// When unwinding, if the personality function requested a change in IP, + /// the SP needs to be adjusted by saved_args_size. + pub fn saved_args_size(&self) -> u64 { + self.saved_args_size + } + + /// Get the canonical frame address (CFA) recovery rule for this row. + pub fn cfa(&self) -> &CfaRule { + &self.cfa + } + + /// Get the register recovery rule for the given register number. + /// + /// The register number mapping is architecture dependent. For example, in + /// the x86-64 ABI the register number mapping is defined in Figure 3.36: + /// + /// > Figure 3.36: DWARF Register Number Mapping + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// > + /// >
Register Name Number Abbreviation
General Purpose Register RAX 0 %rax
General Purpose Register RDX 1 %rdx
General Purpose Register RCX 2 %rcx
General Purpose Register RBX 3 %rbx
General Purpose Register RSI 4 %rsi
General Purpose Register RDI 5 %rdi
General Purpose Register RBP 6 %rbp
Stack Pointer Register RSP 7 %rsp
Extended Integer Registers 8-15 8-15 %r8-%r15
Return Address RA 16
Vector Registers 0–7 17-24 %xmm0–%xmm7
Extended Vector Registers 8–15 25-32 %xmm8–%xmm15
Floating Point Registers 0–7 33-40 %st0–%st7
MMX Registers 0–7 41-48 %mm0–%mm7
Flag Register 49 %rFLAGS
Segment Register ES 50 %es
Segment Register CS 51 %cs
Segment Register SS 52 %ss
Segment Register DS 53 %ds
Segment Register FS 54 %fs
Segment Register GS 55 %gs
Reserved 56-57
FS Base address 58 %fs.base
GS Base address 59 %gs.base
Reserved 60-61
Task Register 62 %tr
LDT Register 63 %ldtr
128-bit Media Control and Status 64 %mxcsr
x87 Control Word 65 %fcw
x87 Status Word 66 %fsw
Upper Vector Registers 16–31 67-82 %xmm16–%xmm31
Reserved 83-117
Vector Mask Registers 0–7 118-125 %k0–%k7
Reserved 126-129
+ pub fn register(&self, register: Register) -> RegisterRule { + self.registers.get(register) + } + + /// Iterate over all defined register `(number, rule)` pairs. + /// + /// The rules are not iterated in any guaranteed order. Any register that + /// does not make an appearance in the iterator implicitly has the rule + /// `RegisterRule::Undefined`. + /// + /// ``` + /// # use gimli::{EndianSlice, LittleEndian, UnwindTableRow}; + /// # fn foo<'input>(unwind_table_row: UnwindTableRow) { + /// for &(register, ref rule) in unwind_table_row.registers() { + /// // ... + /// # drop(register); drop(rule); + /// } + /// # } + /// ``` + pub fn registers(&self) -> RegisterRuleIter<'_, T> { + self.registers.iter() + } +} + +/// The canonical frame address (CFA) recovery rules. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum CfaRule { + /// The CFA is given offset from the given register's value. + RegisterAndOffset { + /// The register containing the base value. + register: Register, + /// The offset from the register's base value. + offset: i64, + }, + /// The CFA is obtained by evaluating this `Reader` as a DWARF expression + /// program. + Expression(UnwindExpression), +} + +impl Default for CfaRule { + fn default() -> Self { + CfaRule::RegisterAndOffset { + register: Register(0), + offset: 0, + } + } +} + +impl CfaRule { + fn is_default(&self) -> bool { + match *self { + CfaRule::RegisterAndOffset { register, offset } => { + register == Register(0) && offset == 0 + } + _ => false, + } + } +} + +/// An entry in the abstract CFI table that describes how to find the value of a +/// register. +/// +/// "The register columns contain rules that describe whether a given register +/// has been saved and the rule to find the value for the register in the +/// previous frame." +#[derive(Clone, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub enum RegisterRule { + /// > A register that has this rule has no recoverable value in the previous + /// > frame. (By convention, it is not preserved by a callee.) + Undefined, + + /// > This register has not been modified from the previous frame. (By + /// > convention, it is preserved by the callee, but the callee has not + /// > modified it.) + SameValue, + + /// "The previous value of this register is saved at the address CFA+N where + /// CFA is the current CFA value and N is a signed offset." + Offset(i64), + + /// "The previous value of this register is the value CFA+N where CFA is the + /// current CFA value and N is a signed offset." + ValOffset(i64), + + /// "The previous value of this register is stored in another register + /// numbered R." + Register(Register), + + /// "The previous value of this register is located at the address produced + /// by executing the DWARF expression." + Expression(UnwindExpression), + + /// "The previous value of this register is the value produced by executing + /// the DWARF expression." + ValExpression(UnwindExpression), + + /// "The rule is defined externally to this specification by the augmenter." + Architectural, + + /// This is a pseudo-register with a constant value. + Constant(u64), +} + +impl RegisterRule { + fn is_defined(&self) -> bool { + !matches!(*self, RegisterRule::Undefined) + } +} + +/// A parsed call frame instruction. +#[derive(Clone, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub enum CallFrameInstruction { + // 6.4.2.1 Row Creation Methods + /// > 1. DW_CFA_set_loc + /// > + /// > The DW_CFA_set_loc instruction takes a single operand that represents + /// > a target address. The required action is to create a new table row + /// > using the specified address as the location. All other values in the + /// > new row are initially identical to the current row. The new location + /// > value is always greater than the current one. If the segment_size + /// > field of this FDE's CIE is non- zero, the initial location is preceded + /// > by a segment selector of the given length. + SetLoc { + /// The target address. + address: u64, + }, + + /// The `AdvanceLoc` instruction is used for all of `DW_CFA_advance_loc` and + /// `DW_CFA_advance_loc{1,2,4}`. + /// + /// > 2. DW_CFA_advance_loc + /// > + /// > The DW_CFA_advance instruction takes a single operand (encoded with + /// > the opcode) that represents a constant delta. The required action is + /// > to create a new table row with a location value that is computed by + /// > taking the current entry’s location value and adding the value of + /// > delta * code_alignment_factor. All other values in the new row are + /// > initially identical to the current row. + AdvanceLoc { + /// The delta to be added to the current address. + delta: u32, + }, + + // 6.4.2.2 CFA Definition Methods + /// > 1. DW_CFA_def_cfa + /// > + /// > The DW_CFA_def_cfa instruction takes two unsigned LEB128 operands + /// > representing a register number and a (non-factored) offset. The + /// > required action is to define the current CFA rule to use the provided + /// > register and offset. + DefCfa { + /// The target register's number. + register: Register, + /// The non-factored offset. + offset: u64, + }, + + /// > 2. DW_CFA_def_cfa_sf + /// > + /// > The DW_CFA_def_cfa_sf instruction takes two operands: an unsigned + /// > LEB128 value representing a register number and a signed LEB128 + /// > factored offset. This instruction is identical to DW_CFA_def_cfa + /// > except that the second operand is signed and factored. The resulting + /// > offset is factored_offset * data_alignment_factor. + DefCfaSf { + /// The target register's number. + register: Register, + /// The factored offset. + factored_offset: i64, + }, + + /// > 3. DW_CFA_def_cfa_register + /// > + /// > The DW_CFA_def_cfa_register instruction takes a single unsigned LEB128 + /// > operand representing a register number. The required action is to + /// > define the current CFA rule to use the provided register (but to keep + /// > the old offset). This operation is valid only if the current CFA rule + /// > is defined to use a register and offset. + DefCfaRegister { + /// The target register's number. + register: Register, + }, + + /// > 4. DW_CFA_def_cfa_offset + /// > + /// > The DW_CFA_def_cfa_offset instruction takes a single unsigned LEB128 + /// > operand representing a (non-factored) offset. The required action is + /// > to define the current CFA rule to use the provided offset (but to keep + /// > the old register). This operation is valid only if the current CFA + /// > rule is defined to use a register and offset. + DefCfaOffset { + /// The non-factored offset. + offset: u64, + }, + + /// > 5. DW_CFA_def_cfa_offset_sf + /// > + /// > The DW_CFA_def_cfa_offset_sf instruction takes a signed LEB128 operand + /// > representing a factored offset. This instruction is identical to + /// > DW_CFA_def_cfa_offset except that the operand is signed and + /// > factored. The resulting offset is factored_offset * + /// > data_alignment_factor. This operation is valid only if the current CFA + /// > rule is defined to use a register and offset. + DefCfaOffsetSf { + /// The factored offset. + factored_offset: i64, + }, + + /// > 6. DW_CFA_def_cfa_expression + /// > + /// > The DW_CFA_def_cfa_expression instruction takes a single operand + /// > encoded as a DW_FORM_exprloc value representing a DWARF + /// > expression. The required action is to establish that expression as the + /// > means by which the current CFA is computed. + DefCfaExpression { + /// The location of the DWARF expression. + expression: UnwindExpression, + }, + + // 6.4.2.3 Register Rule Instructions + /// > 1. DW_CFA_undefined + /// > + /// > The DW_CFA_undefined instruction takes a single unsigned LEB128 + /// > operand that represents a register number. The required action is to + /// > set the rule for the specified register to “undefined.” + Undefined { + /// The target register's number. + register: Register, + }, + + /// > 2. DW_CFA_same_value + /// > + /// > The DW_CFA_same_value instruction takes a single unsigned LEB128 + /// > operand that represents a register number. The required action is to + /// > set the rule for the specified register to “same value.” + SameValue { + /// The target register's number. + register: Register, + }, + + /// The `Offset` instruction represents both `DW_CFA_offset` and + /// `DW_CFA_offset_extended`. + /// + /// > 3. DW_CFA_offset + /// > + /// > The DW_CFA_offset instruction takes two operands: a register number + /// > (encoded with the opcode) and an unsigned LEB128 constant representing + /// > a factored offset. The required action is to change the rule for the + /// > register indicated by the register number to be an offset(N) rule + /// > where the value of N is factored offset * data_alignment_factor. + Offset { + /// The target register's number. + register: Register, + /// The factored offset. + factored_offset: u64, + }, + + /// > 5. DW_CFA_offset_extended_sf + /// > + /// > The DW_CFA_offset_extended_sf instruction takes two operands: an + /// > unsigned LEB128 value representing a register number and a signed + /// > LEB128 factored offset. This instruction is identical to + /// > DW_CFA_offset_extended except that the second operand is signed and + /// > factored. The resulting offset is factored_offset * + /// > data_alignment_factor. + OffsetExtendedSf { + /// The target register's number. + register: Register, + /// The factored offset. + factored_offset: i64, + }, + + /// > 6. DW_CFA_val_offset + /// > + /// > The DW_CFA_val_offset instruction takes two unsigned LEB128 operands + /// > representing a register number and a factored offset. The required + /// > action is to change the rule for the register indicated by the + /// > register number to be a val_offset(N) rule where the value of N is + /// > factored_offset * data_alignment_factor. + ValOffset { + /// The target register's number. + register: Register, + /// The factored offset. + factored_offset: u64, + }, + + /// > 7. DW_CFA_val_offset_sf + /// > + /// > The DW_CFA_val_offset_sf instruction takes two operands: an unsigned + /// > LEB128 value representing a register number and a signed LEB128 + /// > factored offset. This instruction is identical to DW_CFA_val_offset + /// > except that the second operand is signed and factored. The resulting + /// > offset is factored_offset * data_alignment_factor. + ValOffsetSf { + /// The target register's number. + register: Register, + /// The factored offset. + factored_offset: i64, + }, + + /// > 8. DW_CFA_register + /// > + /// > The DW_CFA_register instruction takes two unsigned LEB128 operands + /// > representing register numbers. The required action is to set the rule + /// > for the first register to be register(R) where R is the second + /// > register. + Register { + /// The number of the register whose rule is being changed. + dest_register: Register, + /// The number of the register where the other register's value can be + /// found. + src_register: Register, + }, + + /// > 9. DW_CFA_expression + /// > + /// > The DW_CFA_expression instruction takes two operands: an unsigned + /// > LEB128 value representing a register number, and a DW_FORM_block value + /// > representing a DWARF expression. The required action is to change the + /// > rule for the register indicated by the register number to be an + /// > expression(E) rule where E is the DWARF expression. That is, the DWARF + /// > expression computes the address. The value of the CFA is pushed on the + /// > DWARF evaluation stack prior to execution of the DWARF expression. + Expression { + /// The target register's number. + register: Register, + /// The location of the DWARF expression. + expression: UnwindExpression, + }, + + /// > 10. DW_CFA_val_expression + /// > + /// > The DW_CFA_val_expression instruction takes two operands: an unsigned + /// > LEB128 value representing a register number, and a DW_FORM_block value + /// > representing a DWARF expression. The required action is to change the + /// > rule for the register indicated by the register number to be a + /// > val_expression(E) rule where E is the DWARF expression. That is, the + /// > DWARF expression computes the value of the given register. The value + /// > of the CFA is pushed on the DWARF evaluation stack prior to execution + /// > of the DWARF expression. + ValExpression { + /// The target register's number. + register: Register, + /// The location of the DWARF expression. + expression: UnwindExpression, + }, + + /// The `Restore` instruction represents both `DW_CFA_restore` and + /// `DW_CFA_restore_extended`. + /// + /// > 11. DW_CFA_restore + /// > + /// > The DW_CFA_restore instruction takes a single operand (encoded with + /// > the opcode) that represents a register number. The required action is + /// > to change the rule for the indicated register to the rule assigned it + /// > by the initial_instructions in the CIE. + Restore { + /// The register to be reset. + register: Register, + }, + + // 6.4.2.4 Row State Instructions + /// > 1. DW_CFA_remember_state + /// > + /// > The DW_CFA_remember_state instruction takes no operands. The required + /// > action is to push the set of rules for every register onto an implicit + /// > stack. + RememberState, + + /// > 2. DW_CFA_restore_state + /// > + /// > The DW_CFA_restore_state instruction takes no operands. The required + /// > action is to pop the set of rules off the implicit stack and place + /// > them in the current row. + RestoreState, + + /// > DW_CFA_GNU_args_size + /// > + /// > GNU Extension + /// > + /// > The DW_CFA_GNU_args_size instruction takes an unsigned LEB128 operand + /// > representing an argument size. This instruction specifies the total of + /// > the size of the arguments which have been pushed onto the stack. + ArgsSize { + /// The size of the arguments which have been pushed onto the stack + size: u64, + }, + + /// > DW_CFA_AARCH64_negate_ra_state + /// > + /// > AArch64 Extension + /// > + /// > The DW_CFA_AARCH64_negate_ra_state operation negates bit 0 of the + /// > RA_SIGN_STATE pseudo-register. It does not take any operands. The + /// > DW_CFA_AARCH64_negate_ra_state must not be mixed with other DWARF Register + /// > Rule Instructions on the RA_SIGN_STATE pseudo-register in one Common + /// > Information Entry (CIE) and Frame Descriptor Entry (FDE) program sequence. + NegateRaState, + + // 6.4.2.5 Padding Instruction + /// > 1. DW_CFA_nop + /// > + /// > The DW_CFA_nop instruction has no operands and no required actions. It + /// > is used as padding to make a CIE or FDE an appropriate size. + Nop, +} + +const CFI_INSTRUCTION_HIGH_BITS_MASK: u8 = 0b1100_0000; +const CFI_INSTRUCTION_LOW_BITS_MASK: u8 = !CFI_INSTRUCTION_HIGH_BITS_MASK; + +impl CallFrameInstruction { + fn parse>( + input: &mut R, + address_encoding: Option, + parameters: &PointerEncodingParameters<'_, R>, + vendor: Vendor, + ) -> Result> { + let instruction = input.read_u8()?; + let high_bits = instruction & CFI_INSTRUCTION_HIGH_BITS_MASK; + + if high_bits == constants::DW_CFA_advance_loc.0 { + let delta = instruction & CFI_INSTRUCTION_LOW_BITS_MASK; + return Ok(CallFrameInstruction::AdvanceLoc { + delta: u32::from(delta), + }); + } + + if high_bits == constants::DW_CFA_offset.0 { + let register = Register((instruction & CFI_INSTRUCTION_LOW_BITS_MASK).into()); + let offset = input.read_uleb128()?; + return Ok(CallFrameInstruction::Offset { + register, + factored_offset: offset, + }); + } + + if high_bits == constants::DW_CFA_restore.0 { + let register = Register((instruction & CFI_INSTRUCTION_LOW_BITS_MASK).into()); + return Ok(CallFrameInstruction::Restore { register }); + } + + debug_assert_eq!(high_bits, 0); + let instruction = constants::DwCfa(instruction); + + match instruction { + constants::DW_CFA_nop => Ok(CallFrameInstruction::Nop), + + constants::DW_CFA_set_loc => { + let address = if let Some(encoding) = address_encoding { + parse_encoded_pointer(encoding, parameters, input)?.direct()? + } else { + input.read_address(parameters.address_size)? + }; + Ok(CallFrameInstruction::SetLoc { address }) + } + + constants::DW_CFA_advance_loc1 => { + let delta = input.read_u8()?; + Ok(CallFrameInstruction::AdvanceLoc { + delta: u32::from(delta), + }) + } + + constants::DW_CFA_advance_loc2 => { + let delta = input.read_u16()?; + Ok(CallFrameInstruction::AdvanceLoc { + delta: u32::from(delta), + }) + } + + constants::DW_CFA_advance_loc4 => { + let delta = input.read_u32()?; + Ok(CallFrameInstruction::AdvanceLoc { delta }) + } + + constants::DW_CFA_offset_extended => { + let register = input.read_uleb128().and_then(Register::from_u64)?; + let offset = input.read_uleb128()?; + Ok(CallFrameInstruction::Offset { + register, + factored_offset: offset, + }) + } + + constants::DW_CFA_restore_extended => { + let register = input.read_uleb128().and_then(Register::from_u64)?; + Ok(CallFrameInstruction::Restore { register }) + } + + constants::DW_CFA_undefined => { + let register = input.read_uleb128().and_then(Register::from_u64)?; + Ok(CallFrameInstruction::Undefined { register }) + } + + constants::DW_CFA_same_value => { + let register = input.read_uleb128().and_then(Register::from_u64)?; + Ok(CallFrameInstruction::SameValue { register }) + } + + constants::DW_CFA_register => { + let dest = input.read_uleb128().and_then(Register::from_u64)?; + let src = input.read_uleb128().and_then(Register::from_u64)?; + Ok(CallFrameInstruction::Register { + dest_register: dest, + src_register: src, + }) + } + + constants::DW_CFA_remember_state => Ok(CallFrameInstruction::RememberState), + + constants::DW_CFA_restore_state => Ok(CallFrameInstruction::RestoreState), + + constants::DW_CFA_def_cfa => { + let register = input.read_uleb128().and_then(Register::from_u64)?; + let offset = input.read_uleb128()?; + Ok(CallFrameInstruction::DefCfa { register, offset }) + } + + constants::DW_CFA_def_cfa_register => { + let register = input.read_uleb128().and_then(Register::from_u64)?; + Ok(CallFrameInstruction::DefCfaRegister { register }) + } + + constants::DW_CFA_def_cfa_offset => { + let offset = input.read_uleb128()?; + Ok(CallFrameInstruction::DefCfaOffset { offset }) + } + + constants::DW_CFA_def_cfa_expression => { + let length = input.read_uleb128().and_then(R::Offset::from_u64)?; + let offset = input.offset_from(parameters.section); + input.skip(length)?; + Ok(CallFrameInstruction::DefCfaExpression { + expression: UnwindExpression { offset, length }, + }) + } + + constants::DW_CFA_expression => { + let register = input.read_uleb128().and_then(Register::from_u64)?; + let length = input.read_uleb128().and_then(R::Offset::from_u64)?; + let offset = input.offset_from(parameters.section); + input.skip(length)?; + Ok(CallFrameInstruction::Expression { + register, + expression: UnwindExpression { offset, length }, + }) + } + + constants::DW_CFA_offset_extended_sf => { + let register = input.read_uleb128().and_then(Register::from_u64)?; + let offset = input.read_sleb128()?; + Ok(CallFrameInstruction::OffsetExtendedSf { + register, + factored_offset: offset, + }) + } + + constants::DW_CFA_def_cfa_sf => { + let register = input.read_uleb128().and_then(Register::from_u64)?; + let offset = input.read_sleb128()?; + Ok(CallFrameInstruction::DefCfaSf { + register, + factored_offset: offset, + }) + } + + constants::DW_CFA_def_cfa_offset_sf => { + let offset = input.read_sleb128()?; + Ok(CallFrameInstruction::DefCfaOffsetSf { + factored_offset: offset, + }) + } + + constants::DW_CFA_val_offset => { + let register = input.read_uleb128().and_then(Register::from_u64)?; + let offset = input.read_uleb128()?; + Ok(CallFrameInstruction::ValOffset { + register, + factored_offset: offset, + }) + } + + constants::DW_CFA_val_offset_sf => { + let register = input.read_uleb128().and_then(Register::from_u64)?; + let offset = input.read_sleb128()?; + Ok(CallFrameInstruction::ValOffsetSf { + register, + factored_offset: offset, + }) + } + + constants::DW_CFA_val_expression => { + let register = input.read_uleb128().and_then(Register::from_u64)?; + let length = input.read_uleb128().and_then(R::Offset::from_u64)?; + let offset = input.offset_from(parameters.section); + input.skip(length)?; + Ok(CallFrameInstruction::ValExpression { + register, + expression: UnwindExpression { offset, length }, + }) + } + + constants::DW_CFA_GNU_args_size => { + let size = input.read_uleb128()?; + Ok(CallFrameInstruction::ArgsSize { size }) + } + + constants::DW_CFA_AARCH64_negate_ra_state if vendor == Vendor::AArch64 => { + Ok(CallFrameInstruction::NegateRaState) + } + + otherwise => Err(Error::UnknownCallFrameInstruction(otherwise)), + } + } +} + +/// A lazy iterator parsing call frame instructions. +/// +/// Can be [used with +/// `FallibleIterator`](./index.html#using-with-fallibleiterator). +#[derive(Clone, Debug)] +pub struct CallFrameInstructionIter<'a, R: Reader> { + input: R, + address_encoding: Option, + parameters: PointerEncodingParameters<'a, R>, + vendor: Vendor, +} + +impl<'a, R: Reader> CallFrameInstructionIter<'a, R> { + /// Parse the next call frame instruction. + pub fn next(&mut self) -> Result>> { + if self.input.is_empty() { + return Ok(None); + } + + match CallFrameInstruction::parse( + &mut self.input, + self.address_encoding, + &self.parameters, + self.vendor, + ) { + Ok(instruction) => Ok(Some(instruction)), + Err(e) => { + self.input.empty(); + Err(e) + } + } + } +} + +#[cfg(feature = "fallible-iterator")] +impl<'a, R: Reader> fallible_iterator::FallibleIterator for CallFrameInstructionIter<'a, R> { + type Item = CallFrameInstruction; + type Error = Error; + + fn next(&mut self) -> ::core::result::Result, Self::Error> { + CallFrameInstructionIter::next(self) + } +} + +/// The location of a DWARF expression within an unwind section. +/// +/// This is stored as an offset and length within the section instead of as a +/// `Reader` to avoid lifetime issues when reusing [`UnwindContext`]. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct UnwindExpression { + /// The offset of the expression within the section. + pub offset: T, + /// The length of the expression. + pub length: T, +} + +impl UnwindExpression { + /// Get the expression from the section. + /// + /// The offset and length were previously validated when the + /// `UnwindExpression` was created, so this should not fail. + pub fn get, S: UnwindSection>( + &self, + section: &S, + ) -> Result> { + let input = &mut section.section().clone(); + input.skip(self.offset)?; + let data = input.split(self.length)?; + Ok(Expression(data)) + } +} + +/// Parse a `DW_EH_PE_*` pointer encoding. +#[doc(hidden)] +#[inline] +fn parse_pointer_encoding(input: &mut R) -> Result { + let eh_pe = input.read_u8()?; + let eh_pe = constants::DwEhPe(eh_pe); + + if eh_pe.is_valid_encoding() { + Ok(eh_pe) + } else { + Err(Error::UnknownPointerEncoding(eh_pe)) + } +} + +/// A decoded pointer. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Pointer { + /// This value is the decoded pointer value. + Direct(u64), + + /// This value is *not* the pointer value, but points to the address of + /// where the real pointer value lives. In other words, deref this pointer + /// to get the real pointer value. + /// + /// Chase this pointer at your own risk: do you trust the DWARF data it came + /// from? + Indirect(u64), +} + +impl Default for Pointer { + #[inline] + fn default() -> Self { + Pointer::Direct(0) + } +} + +impl Pointer { + #[inline] + fn new(encoding: constants::DwEhPe, address: u64) -> Pointer { + if encoding.is_indirect() { + Pointer::Indirect(address) + } else { + Pointer::Direct(address) + } + } + + /// Return the direct pointer value. + #[inline] + pub fn direct(self) -> Result { + match self { + Pointer::Direct(p) => Ok(p), + Pointer::Indirect(_) => Err(Error::UnsupportedPointerEncoding), + } + } + + /// Return the pointer value, discarding indirectness information. + #[inline] + pub fn pointer(self) -> u64 { + match self { + Pointer::Direct(p) | Pointer::Indirect(p) => p, + } + } +} + +#[derive(Clone, Debug)] +struct PointerEncodingParameters<'a, R: Reader> { + bases: &'a SectionBaseAddresses, + func_base: Option, + address_size: u8, + section: &'a R, +} + +fn parse_encoded_pointer( + encoding: constants::DwEhPe, + parameters: &PointerEncodingParameters<'_, R>, + input: &mut R, +) -> Result { + // TODO: check this once only in parse_pointer_encoding + if !encoding.is_valid_encoding() { + return Err(Error::UnknownPointerEncoding(encoding)); + } + + if encoding == constants::DW_EH_PE_omit { + return Err(Error::CannotParseOmitPointerEncoding); + } + + let base = match encoding.application() { + constants::DW_EH_PE_absptr => 0, + constants::DW_EH_PE_pcrel => { + if let Some(section_base) = parameters.bases.section { + let offset_from_section = input.offset_from(parameters.section); + section_base.wrapping_add(offset_from_section.into_u64()) + } else { + return Err(Error::PcRelativePointerButSectionBaseIsUndefined); + } + } + constants::DW_EH_PE_textrel => { + if let Some(text) = parameters.bases.text { + text + } else { + return Err(Error::TextRelativePointerButTextBaseIsUndefined); + } + } + constants::DW_EH_PE_datarel => { + if let Some(data) = parameters.bases.data { + data + } else { + return Err(Error::DataRelativePointerButDataBaseIsUndefined); + } + } + constants::DW_EH_PE_funcrel => { + if let Some(func) = parameters.func_base { + func + } else { + return Err(Error::FuncRelativePointerInBadContext); + } + } + constants::DW_EH_PE_aligned => return Err(Error::UnsupportedPointerEncoding), + _ => unreachable!(), + }; + + let offset = match encoding.format() { + // Unsigned variants. + constants::DW_EH_PE_absptr => input.read_address(parameters.address_size), + constants::DW_EH_PE_uleb128 => input.read_uleb128(), + constants::DW_EH_PE_udata2 => input.read_u16().map(u64::from), + constants::DW_EH_PE_udata4 => input.read_u32().map(u64::from), + constants::DW_EH_PE_udata8 => input.read_u64(), + + // Signed variants. Here we sign extend the values (happens by + // default when casting a signed integer to a larger range integer + // in Rust), return them as u64, and rely on wrapping addition to do + // the right thing when adding these offsets to their bases. + constants::DW_EH_PE_sleb128 => input.read_sleb128().map(|a| a as u64), + constants::DW_EH_PE_sdata2 => input.read_i16().map(|a| a as u64), + constants::DW_EH_PE_sdata4 => input.read_i32().map(|a| a as u64), + constants::DW_EH_PE_sdata8 => input.read_i64().map(|a| a as u64), + + // That was all of the valid encoding formats. + _ => unreachable!(), + }?; + + Ok(Pointer::new(encoding, base.wrapping_add(offset))) +} + +#[cfg(test)] +mod tests { + use super::*; + use super::{parse_cfi_entry, AugmentationData, RegisterRuleMap, UnwindContext}; + use crate::common::Format; + use crate::constants; + use crate::endianity::{BigEndian, Endianity, LittleEndian, NativeEndian}; + use crate::read::{ + EndianSlice, Error, Pointer, ReaderOffsetId, Result, Section as ReadSection, + }; + use crate::test_util::GimliSectionMethods; + use alloc::boxed::Box; + use alloc::vec::Vec; + use core::marker::PhantomData; + use core::mem; + use core::u64; + use test_assembler::{Endian, Label, LabelMaker, LabelOrNum, Section, ToLabelOrNum}; + + // Ensure each test tries to read the same section kind that it wrote. + #[derive(Clone, Copy)] + struct SectionKind

(PhantomData
); + + impl SectionKind { + fn endian<'input, E>(self) -> Endian + where + E: Endianity, + T: UnwindSection>, + T::Offset: UnwindOffset, + { + if E::default().is_big_endian() { + Endian::Big + } else { + Endian::Little + } + } + + fn section<'input, E>(self, contents: &'input [u8]) -> T + where + E: Endianity, + T: UnwindSection> + ReadSection>, + T::Offset: UnwindOffset, + { + EndianSlice::new(contents, E::default()).into() + } + } + + fn debug_frame_le<'a>() -> SectionKind>> { + SectionKind(PhantomData) + } + + fn debug_frame_be<'a>() -> SectionKind>> { + SectionKind(PhantomData) + } + + fn eh_frame_le<'a>() -> SectionKind>> { + SectionKind(PhantomData) + } + + fn parse_fde( + section: Section, + input: &mut R, + get_cie: F, + ) -> Result> + where + R: Reader, + Section: UnwindSection, + O: UnwindOffset, + F: FnMut(&Section, &BaseAddresses, O) -> Result>, + { + let bases = Default::default(); + match parse_cfi_entry(&bases, §ion, input) { + Ok(Some(CieOrFde::Fde(partial))) => partial.parse(get_cie), + Ok(_) => Err(Error::NoEntryAtGivenOffset), + Err(e) => Err(e), + } + } + + // Mixin methods for `Section` to help define binary test data. + + trait CfiSectionMethods: GimliSectionMethods { + fn cie<'aug, 'input, E, T>( + self, + _kind: SectionKind, + augmentation: Option<&'aug str>, + cie: &mut CommonInformationEntry>, + ) -> Self + where + E: Endianity, + T: UnwindSection>, + T::Offset: UnwindOffset; + fn fde<'a, 'input, E, T, L>( + self, + _kind: SectionKind, + cie_offset: L, + fde: &mut FrameDescriptionEntry>, + ) -> Self + where + E: Endianity, + T: UnwindSection>, + T::Offset: UnwindOffset, + L: ToLabelOrNum<'a, u64>; + } + + impl CfiSectionMethods for Section { + fn cie<'aug, 'input, E, T>( + self, + _kind: SectionKind, + augmentation: Option<&'aug str>, + cie: &mut CommonInformationEntry>, + ) -> Self + where + E: Endianity, + T: UnwindSection>, + T::Offset: UnwindOffset, + { + cie.offset = self.size() as _; + let length = Label::new(); + let start = Label::new(); + let end = Label::new(); + + let section = match cie.format { + Format::Dwarf32 => self.D32(&length).mark(&start).D32(0xffff_ffff), + Format::Dwarf64 => { + let section = self.D32(0xffff_ffff); + section.D64(&length).mark(&start).D64(0xffff_ffff_ffff_ffff) + } + }; + + let mut section = section.D8(cie.version); + + if let Some(augmentation) = augmentation { + section = section.append_bytes(augmentation.as_bytes()); + } + + // Null terminator for augmentation string. + let section = section.D8(0); + + let section = if T::has_address_and_segment_sizes(cie.version) { + section.D8(cie.address_size).D8(cie.segment_size) + } else { + section + }; + + let section = section + .uleb(cie.code_alignment_factor) + .sleb(cie.data_alignment_factor) + .uleb(cie.return_address_register.0.into()) + .append_bytes(cie.initial_instructions.slice()) + .mark(&end); + + cie.length = (&end - &start) as usize; + length.set_const(cie.length as u64); + + section + } + + fn fde<'a, 'input, E, T, L>( + self, + _kind: SectionKind, + cie_offset: L, + fde: &mut FrameDescriptionEntry>, + ) -> Self + where + E: Endianity, + T: UnwindSection>, + T::Offset: UnwindOffset, + L: ToLabelOrNum<'a, u64>, + { + fde.offset = self.size() as _; + let length = Label::new(); + let start = Label::new(); + let end = Label::new(); + + assert_eq!(fde.format, fde.cie.format); + + let section = match T::cie_offset_encoding(fde.format) { + CieOffsetEncoding::U32 => { + let section = self.D32(&length).mark(&start); + match cie_offset.to_labelornum() { + LabelOrNum::Label(ref l) => section.D32(l), + LabelOrNum::Num(o) => section.D32(o as u32), + } + } + CieOffsetEncoding::U64 => { + let section = self.D32(0xffff_ffff); + section.D64(&length).mark(&start).D64(cie_offset) + } + }; + + let section = match fde.cie.segment_size { + 0 => section, + 4 => section.D32(fde.initial_segment as u32), + 8 => section.D64(fde.initial_segment), + x => panic!("Unsupported test segment size: {}", x), + }; + + let section = match fde.cie.address_size { + 4 => section + .D32(fde.initial_address() as u32) + .D32(fde.len() as u32), + 8 => section.D64(fde.initial_address()).D64(fde.len()), + x => panic!("Unsupported address size: {}", x), + }; + + let section = if let Some(ref augmentation) = fde.augmentation { + let cie_aug = fde + .cie + .augmentation + .expect("FDE has augmentation, but CIE doesn't"); + + if let Some(lsda) = augmentation.lsda { + // We only support writing `DW_EH_PE_absptr` here. + assert_eq!( + cie_aug + .lsda + .expect("FDE has lsda, but CIE doesn't") + .format(), + constants::DW_EH_PE_absptr + ); + + // Augmentation data length + let section = section.uleb(u64::from(fde.cie.address_size)); + match fde.cie.address_size { + 4 => section.D32({ + let x: u64 = lsda.pointer(); + x as u32 + }), + 8 => section.D64({ + let x: u64 = lsda.pointer(); + x + }), + x => panic!("Unsupported address size: {}", x), + } + } else { + // Even if we don't have any augmentation data, if there is + // an augmentation defined, we need to put the length in. + section.uleb(0) + } + } else { + section + }; + + let section = section.append_bytes(fde.instructions.slice()).mark(&end); + + fde.length = (&end - &start) as usize; + length.set_const(fde.length as u64); + + section + } + } + + trait ResultExt { + fn map_eof(self, input: &[u8]) -> Self; + } + + impl ResultExt for Result { + fn map_eof(self, input: &[u8]) -> Self { + match self { + Err(Error::UnexpectedEof(id)) => { + let id = ReaderOffsetId(id.0 - input.as_ptr() as u64); + Err(Error::UnexpectedEof(id)) + } + r => r, + } + } + } + + fn assert_parse_cie<'input, E>( + kind: SectionKind>>, + section: Section, + address_size: u8, + expected: Result<( + EndianSlice<'input, E>, + CommonInformationEntry>, + )>, + ) where + E: Endianity, + { + let section = section.get_contents().unwrap(); + let mut debug_frame = kind.section(§ion); + debug_frame.set_address_size(address_size); + let input = &mut EndianSlice::new(§ion, E::default()); + let bases = Default::default(); + let result = CommonInformationEntry::parse(&bases, &debug_frame, input); + let result = result.map(|cie| (*input, cie)).map_eof(§ion); + assert_eq!(result, expected); + } + + #[test] + fn test_parse_cie_incomplete_length_32() { + let kind = debug_frame_le(); + let section = Section::with_endian(kind.endian()).L16(5); + assert_parse_cie( + kind, + section, + 8, + Err(Error::UnexpectedEof(ReaderOffsetId(0))), + ); + } + + #[test] + fn test_parse_cie_incomplete_length_64() { + let kind = debug_frame_le(); + let section = Section::with_endian(kind.endian()) + .L32(0xffff_ffff) + .L32(12345); + assert_parse_cie( + kind, + section, + 8, + Err(Error::UnexpectedEof(ReaderOffsetId(4))), + ); + } + + #[test] + fn test_parse_cie_incomplete_id_32() { + let kind = debug_frame_be(); + let section = Section::with_endian(kind.endian()) + // The length is not large enough to contain the ID. + .B32(3) + .B32(0xffff_ffff); + assert_parse_cie( + kind, + section, + 8, + Err(Error::UnexpectedEof(ReaderOffsetId(4))), + ); + } + + #[test] + fn test_parse_cie_bad_id_32() { + let kind = debug_frame_be(); + let section = Section::with_endian(kind.endian()) + // Initial length + .B32(4) + // Not the CIE Id. + .B32(0xbad1_bad2); + assert_parse_cie(kind, section, 8, Err(Error::NotCieId)); + } + + #[test] + fn test_parse_cie_32_bad_version() { + let mut cie = CommonInformationEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + version: 99, + augmentation: None, + address_size: 4, + segment_size: 0, + code_alignment_factor: 1, + data_alignment_factor: 2, + return_address_register: Register(3), + initial_instructions: EndianSlice::new(&[], LittleEndian), + }; + + let kind = debug_frame_le(); + let section = Section::with_endian(kind.endian()).cie(kind, None, &mut cie); + assert_parse_cie(kind, section, 4, Err(Error::UnknownVersion(99))); + } + + #[test] + fn test_parse_cie_unknown_augmentation() { + let length = Label::new(); + let start = Label::new(); + let end = Label::new(); + + let augmentation = "replicant"; + let expected_rest = [1, 2, 3]; + + let kind = debug_frame_le(); + let section = Section::with_endian(kind.endian()) + // Initial length + .L32(&length) + .mark(&start) + // CIE Id + .L32(0xffff_ffff) + // Version + .D8(4) + // Augmentation + .append_bytes(augmentation.as_bytes()) + // Null terminator + .D8(0) + // Extra augmented data that we can't understand. + .L32(1) + .L32(2) + .L32(3) + .L32(4) + .L32(5) + .L32(6) + .mark(&end) + .append_bytes(&expected_rest); + + let expected_length = (&end - &start) as u64; + length.set_const(expected_length); + + assert_parse_cie(kind, section, 8, Err(Error::UnknownAugmentation)); + } + + fn test_parse_cie(format: Format, version: u8, address_size: u8) { + let expected_rest = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + let expected_instrs: Vec<_> = (0..4).map(|_| constants::DW_CFA_nop.0).collect(); + + let mut cie = CommonInformationEntry { + offset: 0, + length: 0, + format, + version, + augmentation: None, + address_size, + segment_size: 0, + code_alignment_factor: 16, + data_alignment_factor: 32, + return_address_register: Register(1), + initial_instructions: EndianSlice::new(&expected_instrs, LittleEndian), + }; + + let kind = debug_frame_le(); + let section = Section::with_endian(kind.endian()) + .cie(kind, None, &mut cie) + .append_bytes(&expected_rest); + + assert_parse_cie( + kind, + section, + address_size, + Ok((EndianSlice::new(&expected_rest, LittleEndian), cie)), + ); + } + + #[test] + fn test_parse_cie_32_ok() { + test_parse_cie(Format::Dwarf32, 1, 4); + test_parse_cie(Format::Dwarf32, 1, 8); + test_parse_cie(Format::Dwarf32, 4, 4); + test_parse_cie(Format::Dwarf32, 4, 8); + } + + #[test] + fn test_parse_cie_64_ok() { + test_parse_cie(Format::Dwarf64, 1, 4); + test_parse_cie(Format::Dwarf64, 1, 8); + test_parse_cie(Format::Dwarf64, 4, 4); + test_parse_cie(Format::Dwarf64, 4, 8); + } + + #[test] + fn test_parse_cie_length_too_big() { + let expected_instrs: Vec<_> = (0..13).map(|_| constants::DW_CFA_nop.0).collect(); + + let mut cie = CommonInformationEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + version: 4, + augmentation: None, + address_size: 4, + segment_size: 0, + code_alignment_factor: 0, + data_alignment_factor: 0, + return_address_register: Register(3), + initial_instructions: EndianSlice::new(&expected_instrs, LittleEndian), + }; + + let kind = debug_frame_le(); + let section = Section::with_endian(kind.endian()).cie(kind, None, &mut cie); + + let mut contents = section.get_contents().unwrap(); + + // Overwrite the length to be too big. + contents[0] = 0; + contents[1] = 0; + contents[2] = 0; + contents[3] = 255; + + let debug_frame = DebugFrame::new(&contents, LittleEndian); + let bases = Default::default(); + assert_eq!( + CommonInformationEntry::parse( + &bases, + &debug_frame, + &mut EndianSlice::new(&contents, LittleEndian) + ) + .map_eof(&contents), + Err(Error::UnexpectedEof(ReaderOffsetId(4))) + ); + } + + #[test] + fn test_parse_fde_incomplete_length_32() { + let kind = debug_frame_le(); + let section = Section::with_endian(kind.endian()).L16(5); + let section = section.get_contents().unwrap(); + let debug_frame = kind.section(§ion); + let rest = &mut EndianSlice::new(§ion, LittleEndian); + assert_eq!( + parse_fde(debug_frame, rest, UnwindSection::cie_from_offset).map_eof(§ion), + Err(Error::UnexpectedEof(ReaderOffsetId(0))) + ); + } + + #[test] + fn test_parse_fde_incomplete_length_64() { + let kind = debug_frame_le(); + let section = Section::with_endian(kind.endian()) + .L32(0xffff_ffff) + .L32(12345); + let section = section.get_contents().unwrap(); + let debug_frame = kind.section(§ion); + let rest = &mut EndianSlice::new(§ion, LittleEndian); + assert_eq!( + parse_fde(debug_frame, rest, UnwindSection::cie_from_offset).map_eof(§ion), + Err(Error::UnexpectedEof(ReaderOffsetId(4))) + ); + } + + #[test] + fn test_parse_fde_incomplete_cie_pointer_32() { + let kind = debug_frame_be(); + let section = Section::with_endian(kind.endian()) + // The length is not large enough to contain the CIE pointer. + .B32(3) + .B32(1994); + let section = section.get_contents().unwrap(); + let debug_frame = kind.section(§ion); + let rest = &mut EndianSlice::new(§ion, BigEndian); + assert_eq!( + parse_fde(debug_frame, rest, UnwindSection::cie_from_offset).map_eof(§ion), + Err(Error::UnexpectedEof(ReaderOffsetId(4))) + ); + } + + #[test] + fn test_parse_fde_32_ok() { + let expected_rest = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + let cie_offset = 0xbad0_bad1; + let expected_instrs: Vec<_> = (0..7).map(|_| constants::DW_CFA_nop.0).collect(); + + let cie = CommonInformationEntry { + offset: 0, + length: 100, + format: Format::Dwarf32, + version: 4, + augmentation: None, + // DWARF32 with a 64 bit address size! Holy moly! + address_size: 8, + segment_size: 0, + code_alignment_factor: 3, + data_alignment_factor: 2, + return_address_register: Register(1), + initial_instructions: EndianSlice::new(&[], LittleEndian), + }; + + let mut fde = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie.clone(), + initial_segment: 0, + initial_address: 0xfeed_beef, + address_range: 39, + augmentation: None, + instructions: EndianSlice::new(&expected_instrs, LittleEndian), + }; + + let kind = debug_frame_le(); + let section = Section::with_endian(kind.endian()) + .fde(kind, cie_offset, &mut fde) + .append_bytes(&expected_rest); + + let section = section.get_contents().unwrap(); + let debug_frame = kind.section(§ion); + let rest = &mut EndianSlice::new(§ion, LittleEndian); + + let get_cie = |_: &_, _: &_, offset| { + assert_eq!(offset, DebugFrameOffset(cie_offset as usize)); + Ok(cie.clone()) + }; + + assert_eq!(parse_fde(debug_frame, rest, get_cie), Ok(fde)); + assert_eq!(*rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_fde_32_with_segment_ok() { + let expected_rest = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + let cie_offset = 0xbad0_bad1; + let expected_instrs: Vec<_> = (0..92).map(|_| constants::DW_CFA_nop.0).collect(); + + let cie = CommonInformationEntry { + offset: 0, + length: 100, + format: Format::Dwarf32, + version: 4, + augmentation: None, + address_size: 4, + segment_size: 4, + code_alignment_factor: 3, + data_alignment_factor: 2, + return_address_register: Register(1), + initial_instructions: EndianSlice::new(&[], LittleEndian), + }; + + let mut fde = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie.clone(), + initial_segment: 0xbadb_ad11, + initial_address: 0xfeed_beef, + address_range: 999, + augmentation: None, + instructions: EndianSlice::new(&expected_instrs, LittleEndian), + }; + + let kind = debug_frame_le(); + let section = Section::with_endian(kind.endian()) + .fde(kind, cie_offset, &mut fde) + .append_bytes(&expected_rest); + + let section = section.get_contents().unwrap(); + let debug_frame = kind.section(§ion); + let rest = &mut EndianSlice::new(§ion, LittleEndian); + + let get_cie = |_: &_, _: &_, offset| { + assert_eq!(offset, DebugFrameOffset(cie_offset as usize)); + Ok(cie.clone()) + }; + + assert_eq!(parse_fde(debug_frame, rest, get_cie), Ok(fde)); + assert_eq!(*rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_fde_64_ok() { + let expected_rest = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + let cie_offset = 0xbad0_bad1; + let expected_instrs: Vec<_> = (0..7).map(|_| constants::DW_CFA_nop.0).collect(); + + let cie = CommonInformationEntry { + offset: 0, + length: 100, + format: Format::Dwarf64, + version: 4, + augmentation: None, + address_size: 8, + segment_size: 0, + code_alignment_factor: 3, + data_alignment_factor: 2, + return_address_register: Register(1), + initial_instructions: EndianSlice::new(&[], LittleEndian), + }; + + let mut fde = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf64, + cie: cie.clone(), + initial_segment: 0, + initial_address: 0xfeed_beef, + address_range: 999, + augmentation: None, + instructions: EndianSlice::new(&expected_instrs, LittleEndian), + }; + + let kind = debug_frame_le(); + let section = Section::with_endian(kind.endian()) + .fde(kind, cie_offset, &mut fde) + .append_bytes(&expected_rest); + + let section = section.get_contents().unwrap(); + let debug_frame = kind.section(§ion); + let rest = &mut EndianSlice::new(§ion, LittleEndian); + + let get_cie = |_: &_, _: &_, offset| { + assert_eq!(offset, DebugFrameOffset(cie_offset as usize)); + Ok(cie.clone()) + }; + + assert_eq!(parse_fde(debug_frame, rest, get_cie), Ok(fde)); + assert_eq!(*rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_entry_on_cie_32_ok() { + let expected_rest = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + let expected_instrs: Vec<_> = (0..4).map(|_| constants::DW_CFA_nop.0).collect(); + + let mut cie = CommonInformationEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + version: 4, + augmentation: None, + address_size: 4, + segment_size: 0, + code_alignment_factor: 16, + data_alignment_factor: 32, + return_address_register: Register(1), + initial_instructions: EndianSlice::new(&expected_instrs, BigEndian), + }; + + let kind = debug_frame_be(); + let section = Section::with_endian(kind.endian()) + .cie(kind, None, &mut cie) + .append_bytes(&expected_rest); + let section = section.get_contents().unwrap(); + let debug_frame = kind.section(§ion); + let rest = &mut EndianSlice::new(§ion, BigEndian); + + let bases = Default::default(); + assert_eq!( + parse_cfi_entry(&bases, &debug_frame, rest), + Ok(Some(CieOrFde::Cie(cie))) + ); + assert_eq!(*rest, EndianSlice::new(&expected_rest, BigEndian)); + } + + #[test] + fn test_parse_cfi_entry_on_fde_32_ok() { + let cie_offset = 0x1234_5678; + let expected_rest = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + let expected_instrs: Vec<_> = (0..4).map(|_| constants::DW_CFA_nop.0).collect(); + + let cie = CommonInformationEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + version: 4, + augmentation: None, + address_size: 4, + segment_size: 0, + code_alignment_factor: 16, + data_alignment_factor: 32, + return_address_register: Register(1), + initial_instructions: EndianSlice::new(&[], BigEndian), + }; + + let mut fde = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie.clone(), + initial_segment: 0, + initial_address: 0xfeed_beef, + address_range: 39, + augmentation: None, + instructions: EndianSlice::new(&expected_instrs, BigEndian), + }; + + let kind = debug_frame_be(); + let section = Section::with_endian(kind.endian()) + .fde(kind, cie_offset, &mut fde) + .append_bytes(&expected_rest); + + let section = section.get_contents().unwrap(); + let debug_frame = kind.section(§ion); + let rest = &mut EndianSlice::new(§ion, BigEndian); + + let bases = Default::default(); + match parse_cfi_entry(&bases, &debug_frame, rest) { + Ok(Some(CieOrFde::Fde(partial))) => { + assert_eq!(*rest, EndianSlice::new(&expected_rest, BigEndian)); + + assert_eq!(partial.length, fde.length); + assert_eq!(partial.format, fde.format); + assert_eq!(partial.cie_offset, DebugFrameOffset(cie_offset as usize)); + + let get_cie = |_: &_, _: &_, offset| { + assert_eq!(offset, DebugFrameOffset(cie_offset as usize)); + Ok(cie.clone()) + }; + + assert_eq!(partial.parse(get_cie), Ok(fde)); + } + otherwise => panic!("Unexpected result: {:#?}", otherwise), + } + } + + #[test] + fn test_cfi_entries_iter() { + let expected_instrs1: Vec<_> = (0..4).map(|_| constants::DW_CFA_nop.0).collect(); + + let expected_instrs2: Vec<_> = (0..8).map(|_| constants::DW_CFA_nop.0).collect(); + + let expected_instrs3: Vec<_> = (0..12).map(|_| constants::DW_CFA_nop.0).collect(); + + let expected_instrs4: Vec<_> = (0..16).map(|_| constants::DW_CFA_nop.0).collect(); + + let mut cie1 = CommonInformationEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + version: 4, + augmentation: None, + address_size: 4, + segment_size: 0, + code_alignment_factor: 1, + data_alignment_factor: 2, + return_address_register: Register(3), + initial_instructions: EndianSlice::new(&expected_instrs1, BigEndian), + }; + + let mut cie2 = CommonInformationEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + version: 4, + augmentation: None, + address_size: 4, + segment_size: 0, + code_alignment_factor: 3, + data_alignment_factor: 2, + return_address_register: Register(1), + initial_instructions: EndianSlice::new(&expected_instrs2, BigEndian), + }; + + let cie1_location = Label::new(); + let cie2_location = Label::new(); + + // Write the CIEs first so that their length gets set before we clone + // them into the FDEs and our equality assertions down the line end up + // with all the CIEs always having he correct length. + let kind = debug_frame_be(); + let section = Section::with_endian(kind.endian()) + .mark(&cie1_location) + .cie(kind, None, &mut cie1) + .mark(&cie2_location) + .cie(kind, None, &mut cie2); + + let mut fde1 = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie1.clone(), + initial_segment: 0, + initial_address: 0xfeed_beef, + address_range: 39, + augmentation: None, + instructions: EndianSlice::new(&expected_instrs3, BigEndian), + }; + + let mut fde2 = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie2.clone(), + initial_segment: 0, + initial_address: 0xfeed_face, + address_range: 9000, + augmentation: None, + instructions: EndianSlice::new(&expected_instrs4, BigEndian), + }; + + let section = + section + .fde(kind, &cie1_location, &mut fde1) + .fde(kind, &cie2_location, &mut fde2); + + section.start().set_const(0); + + let cie1_offset = cie1_location.value().unwrap() as usize; + let cie2_offset = cie2_location.value().unwrap() as usize; + + let contents = section.get_contents().unwrap(); + let debug_frame = kind.section(&contents); + + let bases = Default::default(); + let mut entries = debug_frame.entries(&bases); + + assert_eq!(entries.next(), Ok(Some(CieOrFde::Cie(cie1.clone())))); + assert_eq!(entries.next(), Ok(Some(CieOrFde::Cie(cie2.clone())))); + + match entries.next() { + Ok(Some(CieOrFde::Fde(partial))) => { + assert_eq!(partial.length, fde1.length); + assert_eq!(partial.format, fde1.format); + assert_eq!(partial.cie_offset, DebugFrameOffset(cie1_offset)); + + let get_cie = |_: &_, _: &_, offset| { + assert_eq!(offset, DebugFrameOffset(cie1_offset)); + Ok(cie1.clone()) + }; + assert_eq!(partial.parse(get_cie), Ok(fde1)); + } + otherwise => panic!("Unexpected result: {:#?}", otherwise), + } + + match entries.next() { + Ok(Some(CieOrFde::Fde(partial))) => { + assert_eq!(partial.length, fde2.length); + assert_eq!(partial.format, fde2.format); + assert_eq!(partial.cie_offset, DebugFrameOffset(cie2_offset)); + + let get_cie = |_: &_, _: &_, offset| { + assert_eq!(offset, DebugFrameOffset(cie2_offset)); + Ok(cie2.clone()) + }; + assert_eq!(partial.parse(get_cie), Ok(fde2)); + } + otherwise => panic!("Unexpected result: {:#?}", otherwise), + } + + assert_eq!(entries.next(), Ok(None)); + } + + #[test] + fn test_parse_cie_from_offset() { + let filler = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + let instrs: Vec<_> = (0..5).map(|_| constants::DW_CFA_nop.0).collect(); + + let mut cie = CommonInformationEntry { + offset: 0, + length: 0, + format: Format::Dwarf64, + version: 4, + augmentation: None, + address_size: 4, + segment_size: 0, + code_alignment_factor: 4, + data_alignment_factor: 8, + return_address_register: Register(12), + initial_instructions: EndianSlice::new(&instrs, LittleEndian), + }; + + let cie_location = Label::new(); + + let kind = debug_frame_le(); + let section = Section::with_endian(kind.endian()) + .append_bytes(&filler) + .mark(&cie_location) + .cie(kind, None, &mut cie) + .append_bytes(&filler); + + section.start().set_const(0); + + let cie_offset = DebugFrameOffset(cie_location.value().unwrap() as usize); + + let contents = section.get_contents().unwrap(); + let debug_frame = kind.section(&contents); + let bases = Default::default(); + + assert_eq!(debug_frame.cie_from_offset(&bases, cie_offset), Ok(cie)); + } + + fn parse_cfi_instruction( + input: &mut R, + address_size: u8, + ) -> Result> { + let section = input.clone(); + let parameters = &PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size, + section: §ion, + }; + CallFrameInstruction::parse(input, None, parameters, Vendor::Default) + } + + #[test] + fn test_parse_cfi_instruction_advance_loc() { + let expected_rest = [1, 2, 3, 4]; + let expected_delta = 42; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_advance_loc.0 | expected_delta) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::AdvanceLoc { + delta: u32::from(expected_delta), + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_offset() { + let expected_rest = [1, 2, 3, 4]; + let expected_reg = 3; + let expected_offset = 1997; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_offset.0 | expected_reg) + .uleb(expected_offset) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::Offset { + register: Register(expected_reg.into()), + factored_offset: expected_offset, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_restore() { + let expected_rest = [1, 2, 3, 4]; + let expected_reg = 3; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_restore.0 | expected_reg) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::Restore { + register: Register(expected_reg.into()), + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_nop() { + let expected_rest = [1, 2, 3, 4]; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_nop.0) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::Nop) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_set_loc() { + let expected_rest = [1, 2, 3, 4]; + let expected_addr = 0xdead_beef; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_set_loc.0) + .L64(expected_addr) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::SetLoc { + address: expected_addr, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_set_loc_encoding() { + let text_base = 0xfeed_face; + let addr_offset = 0xbeef; + let expected_addr = text_base + addr_offset; + let expected_rest = [1, 2, 3, 4]; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_set_loc.0) + .L64(addr_offset) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + let parameters = &PointerEncodingParameters { + bases: &BaseAddresses::default().set_text(text_base).eh_frame, + func_base: None, + address_size: 8, + section: &EndianSlice::new(&[], LittleEndian), + }; + assert_eq!( + CallFrameInstruction::parse( + input, + Some(constants::DW_EH_PE_textrel), + parameters, + Vendor::Default + ), + Ok(CallFrameInstruction::SetLoc { + address: expected_addr, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_advance_loc1() { + let expected_rest = [1, 2, 3, 4]; + let expected_delta = 8; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_advance_loc1.0) + .D8(expected_delta) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::AdvanceLoc { + delta: u32::from(expected_delta), + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_advance_loc2() { + let expected_rest = [1, 2, 3, 4]; + let expected_delta = 500; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_advance_loc2.0) + .L16(expected_delta) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::AdvanceLoc { + delta: u32::from(expected_delta), + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_advance_loc4() { + let expected_rest = [1, 2, 3, 4]; + let expected_delta = 1 << 20; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_advance_loc4.0) + .L32(expected_delta) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::AdvanceLoc { + delta: expected_delta, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_offset_extended() { + let expected_rest = [1, 2, 3, 4]; + let expected_reg = 7; + let expected_offset = 33; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_offset_extended.0) + .uleb(expected_reg.into()) + .uleb(expected_offset) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::Offset { + register: Register(expected_reg), + factored_offset: expected_offset, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_restore_extended() { + let expected_rest = [1, 2, 3, 4]; + let expected_reg = 7; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_restore_extended.0) + .uleb(expected_reg.into()) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::Restore { + register: Register(expected_reg), + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_undefined() { + let expected_rest = [1, 2, 3, 4]; + let expected_reg = 7; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_undefined.0) + .uleb(expected_reg.into()) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::Undefined { + register: Register(expected_reg), + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_same_value() { + let expected_rest = [1, 2, 3, 4]; + let expected_reg = 7; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_same_value.0) + .uleb(expected_reg.into()) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::SameValue { + register: Register(expected_reg), + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_register() { + let expected_rest = [1, 2, 3, 4]; + let expected_dest_reg = 7; + let expected_src_reg = 8; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_register.0) + .uleb(expected_dest_reg.into()) + .uleb(expected_src_reg.into()) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::Register { + dest_register: Register(expected_dest_reg), + src_register: Register(expected_src_reg), + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_remember_state() { + let expected_rest = [1, 2, 3, 4]; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_remember_state.0) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::RememberState) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_restore_state() { + let expected_rest = [1, 2, 3, 4]; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_restore_state.0) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::RestoreState) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_def_cfa() { + let expected_rest = [1, 2, 3, 4]; + let expected_reg = 2; + let expected_offset = 0; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_def_cfa.0) + .uleb(expected_reg.into()) + .uleb(expected_offset) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::DefCfa { + register: Register(expected_reg), + offset: expected_offset, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_def_cfa_register() { + let expected_rest = [1, 2, 3, 4]; + let expected_reg = 2; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_def_cfa_register.0) + .uleb(expected_reg.into()) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::DefCfaRegister { + register: Register(expected_reg), + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_def_cfa_offset() { + let expected_rest = [1, 2, 3, 4]; + let expected_offset = 23; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_def_cfa_offset.0) + .uleb(expected_offset) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::DefCfaOffset { + offset: expected_offset, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_def_cfa_expression() { + let expected_rest = [1, 2, 3, 4]; + let expected_expr = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]; + + let length = Label::new(); + let start = Label::new(); + let end = Label::new(); + + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_def_cfa_expression.0) + .D8(&length) + .mark(&start) + .append_bytes(&expected_expr) + .mark(&end) + .append_bytes(&expected_rest); + + length.set_const((&end - &start) as u64); + let expected_expression = UnwindExpression { + offset: (&start - §ion.start()) as usize, + length: (&end - &start) as usize, + }; + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::DefCfaExpression { + expression: expected_expression, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_expression() { + let expected_rest = [1, 2, 3, 4]; + let expected_reg = 99; + let expected_expr = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]; + + let length = Label::new(); + let start = Label::new(); + let end = Label::new(); + + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_expression.0) + .uleb(expected_reg.into()) + .D8(&length) + .mark(&start) + .append_bytes(&expected_expr) + .mark(&end) + .append_bytes(&expected_rest); + + length.set_const((&end - &start) as u64); + let expected_expression = UnwindExpression { + offset: (&start - §ion.start()) as usize, + length: (&end - &start) as usize, + }; + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::Expression { + register: Register(expected_reg), + expression: expected_expression, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_offset_extended_sf() { + let expected_rest = [1, 2, 3, 4]; + let expected_reg = 7; + let expected_offset = -33; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_offset_extended_sf.0) + .uleb(expected_reg.into()) + .sleb(expected_offset) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::OffsetExtendedSf { + register: Register(expected_reg), + factored_offset: expected_offset, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_def_cfa_sf() { + let expected_rest = [1, 2, 3, 4]; + let expected_reg = 2; + let expected_offset = -9999; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_def_cfa_sf.0) + .uleb(expected_reg.into()) + .sleb(expected_offset) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::DefCfaSf { + register: Register(expected_reg), + factored_offset: expected_offset, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_def_cfa_offset_sf() { + let expected_rest = [1, 2, 3, 4]; + let expected_offset = -123; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_def_cfa_offset_sf.0) + .sleb(expected_offset) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::DefCfaOffsetSf { + factored_offset: expected_offset, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_val_offset() { + let expected_rest = [1, 2, 3, 4]; + let expected_reg = 50; + let expected_offset = 23; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_val_offset.0) + .uleb(expected_reg.into()) + .uleb(expected_offset) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::ValOffset { + register: Register(expected_reg), + factored_offset: expected_offset, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_val_offset_sf() { + let expected_rest = [1, 2, 3, 4]; + let expected_reg = 50; + let expected_offset = -23; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_val_offset_sf.0) + .uleb(expected_reg.into()) + .sleb(expected_offset) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::ValOffsetSf { + register: Register(expected_reg), + factored_offset: expected_offset, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_val_expression() { + let expected_rest = [1, 2, 3, 4]; + let expected_reg = 50; + let expected_expr = [2, 2, 1, 1, 5, 5]; + + let length = Label::new(); + let start = Label::new(); + let end = Label::new(); + + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_val_expression.0) + .uleb(expected_reg.into()) + .D8(&length) + .mark(&start) + .append_bytes(&expected_expr) + .mark(&end) + .append_bytes(&expected_rest); + + length.set_const((&end - &start) as u64); + let expected_expression = UnwindExpression { + offset: (&start - §ion.start()) as usize, + length: (&end - &start) as usize, + }; + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + + assert_eq!( + parse_cfi_instruction(input, 8), + Ok(CallFrameInstruction::ValExpression { + register: Register(expected_reg), + expression: expected_expression, + }) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_negate_ra_state() { + let expected_rest = [1, 2, 3, 4]; + let section = Section::with_endian(Endian::Little) + .D8(constants::DW_CFA_AARCH64_negate_ra_state.0) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + let parameters = &PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 8, + section: &EndianSlice::default(), + }; + assert_eq!( + CallFrameInstruction::parse(input, None, parameters, Vendor::AArch64), + Ok(CallFrameInstruction::NegateRaState) + ); + assert_eq!(*input, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_cfi_instruction_unknown_instruction() { + let expected_rest = [1, 2, 3, 4]; + let unknown_instr = constants::DwCfa(0b0011_1111); + let section = Section::with_endian(Endian::Little) + .D8(unknown_instr.0) + .append_bytes(&expected_rest); + let contents = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&contents, LittleEndian); + assert_eq!( + parse_cfi_instruction(input, 8), + Err(Error::UnknownCallFrameInstruction(unknown_instr)) + ); + } + + #[test] + fn test_call_frame_instruction_iter_ok() { + let expected_reg = 50; + let expected_expr = [2, 2, 1, 1, 5, 5]; + let expected_delta = 230; + + let length = Label::new(); + let start = Label::new(); + let end = Label::new(); + + let section = Section::with_endian(Endian::Big) + .D8(constants::DW_CFA_val_expression.0) + .uleb(expected_reg.into()) + .D8(&length) + .mark(&start) + .append_bytes(&expected_expr) + .mark(&end) + .D8(constants::DW_CFA_advance_loc1.0) + .D8(expected_delta); + + length.set_const((&end - &start) as u64); + let expected_expression = UnwindExpression { + offset: (&start - §ion.start()) as usize, + length: (&end - &start) as usize, + }; + let contents = section.get_contents().unwrap(); + let input = EndianSlice::new(&contents, BigEndian); + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 8, + section: &input, + }; + let mut iter = CallFrameInstructionIter { + input, + address_encoding: None, + parameters, + vendor: Vendor::Default, + }; + + assert_eq!( + iter.next(), + Ok(Some(CallFrameInstruction::ValExpression { + register: Register(expected_reg), + expression: expected_expression, + })) + ); + + assert_eq!( + iter.next(), + Ok(Some(CallFrameInstruction::AdvanceLoc { + delta: u32::from(expected_delta), + })) + ); + + assert_eq!(iter.next(), Ok(None)); + } + + #[test] + fn test_call_frame_instruction_iter_err() { + // DW_CFA_advance_loc1 without an operand. + let section = Section::with_endian(Endian::Big).D8(constants::DW_CFA_advance_loc1.0); + + let contents = section.get_contents().unwrap(); + let input = EndianSlice::new(&contents, BigEndian); + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 8, + section: &EndianSlice::default(), + }; + let mut iter = CallFrameInstructionIter { + input, + address_encoding: None, + parameters, + vendor: Vendor::Default, + }; + + assert_eq!( + iter.next().map_eof(&contents), + Err(Error::UnexpectedEof(ReaderOffsetId(1))) + ); + assert_eq!(iter.next(), Ok(None)); + } + + fn assert_eval<'a, I>( + mut initial_ctx: UnwindContext, + expected_ctx: UnwindContext, + cie: CommonInformationEntry>, + fde: Option>>, + instructions: I, + ) where + I: AsRef<[(Result, CallFrameInstruction)]>, + { + { + let section = &DebugFrame::from(EndianSlice::default()); + let bases = &BaseAddresses::default(); + let mut table = match fde { + Some(fde) => UnwindTable::new_for_fde(section, bases, &mut initial_ctx, &fde), + None => UnwindTable::new_for_cie(section, bases, &mut initial_ctx, &cie), + }; + for (expected_result, instruction) in instructions.as_ref() { + assert_eq!(*expected_result, table.evaluate(instruction.clone())); + } + } + + assert_eq!(expected_ctx, initial_ctx); + } + + fn make_test_cie<'a>() -> CommonInformationEntry> { + CommonInformationEntry { + offset: 0, + format: Format::Dwarf64, + length: 0, + return_address_register: Register(0), + version: 4, + address_size: mem::size_of::() as u8, + initial_instructions: EndianSlice::new(&[], LittleEndian), + augmentation: None, + segment_size: 0, + data_alignment_factor: 2, + code_alignment_factor: 3, + } + } + + #[test] + fn test_eval_set_loc() { + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected.row_mut().end_address = 42; + let instructions = [(Ok(true), CallFrameInstruction::SetLoc { address: 42 })]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_set_loc_backwards() { + let cie = make_test_cie(); + let mut ctx = UnwindContext::new(); + ctx.row_mut().start_address = 999; + let expected = ctx.clone(); + let instructions = [( + Err(Error::InvalidAddressRange), + CallFrameInstruction::SetLoc { address: 42 }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_advance_loc() { + let cie = make_test_cie(); + let mut ctx = UnwindContext::new(); + ctx.row_mut().start_address = 3; + let mut expected = ctx.clone(); + expected.row_mut().end_address = 3 + 2 * cie.code_alignment_factor; + let instructions = [(Ok(true), CallFrameInstruction::AdvanceLoc { delta: 2 })]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_advance_loc_overflow() { + let cie = make_test_cie(); + let mut ctx = UnwindContext::new(); + ctx.row_mut().start_address = u64::MAX; + let mut expected = ctx.clone(); + expected.row_mut().end_address = 42 * cie.code_alignment_factor - 1; + let instructions = [(Ok(true), CallFrameInstruction::AdvanceLoc { delta: 42 })]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_def_cfa() { + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected.set_cfa(CfaRule::RegisterAndOffset { + register: Register(42), + offset: 36, + }); + let instructions = [( + Ok(false), + CallFrameInstruction::DefCfa { + register: Register(42), + offset: 36, + }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_def_cfa_sf() { + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected.set_cfa(CfaRule::RegisterAndOffset { + register: Register(42), + offset: 36 * cie.data_alignment_factor as i64, + }); + let instructions = [( + Ok(false), + CallFrameInstruction::DefCfaSf { + register: Register(42), + factored_offset: 36, + }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_def_cfa_register() { + let cie = make_test_cie(); + let mut ctx = UnwindContext::new(); + ctx.set_cfa(CfaRule::RegisterAndOffset { + register: Register(3), + offset: 8, + }); + let mut expected = ctx.clone(); + expected.set_cfa(CfaRule::RegisterAndOffset { + register: Register(42), + offset: 8, + }); + let instructions = [( + Ok(false), + CallFrameInstruction::DefCfaRegister { + register: Register(42), + }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_def_cfa_register_invalid_context() { + let cie = make_test_cie(); + let mut ctx = UnwindContext::new(); + ctx.set_cfa(CfaRule::Expression(UnwindExpression { + offset: 0, + length: 0, + })); + let expected = ctx.clone(); + let instructions = [( + Err(Error::CfiInstructionInInvalidContext), + CallFrameInstruction::DefCfaRegister { + register: Register(42), + }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_def_cfa_offset() { + let cie = make_test_cie(); + let mut ctx = UnwindContext::new(); + ctx.set_cfa(CfaRule::RegisterAndOffset { + register: Register(3), + offset: 8, + }); + let mut expected = ctx.clone(); + expected.set_cfa(CfaRule::RegisterAndOffset { + register: Register(3), + offset: 42, + }); + let instructions = [(Ok(false), CallFrameInstruction::DefCfaOffset { offset: 42 })]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_def_cfa_offset_invalid_context() { + let cie = make_test_cie(); + let mut ctx = UnwindContext::new(); + ctx.set_cfa(CfaRule::Expression(UnwindExpression { + offset: 10, + length: 11, + })); + let expected = ctx.clone(); + let instructions = [( + Err(Error::CfiInstructionInInvalidContext), + CallFrameInstruction::DefCfaOffset { offset: 1993 }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_def_cfa_expression() { + let expr = UnwindExpression { + offset: 10, + length: 11, + }; + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected.set_cfa(CfaRule::Expression(expr)); + let instructions = [( + Ok(false), + CallFrameInstruction::DefCfaExpression { expression: expr }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_undefined() { + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected + .set_register_rule(Register(5), RegisterRule::Undefined) + .unwrap(); + let instructions = [( + Ok(false), + CallFrameInstruction::Undefined { + register: Register(5), + }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_same_value() { + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected + .set_register_rule(Register(0), RegisterRule::SameValue) + .unwrap(); + let instructions = [( + Ok(false), + CallFrameInstruction::SameValue { + register: Register(0), + }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_offset() { + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected + .set_register_rule( + Register(2), + RegisterRule::Offset(3 * cie.data_alignment_factor), + ) + .unwrap(); + let instructions = [( + Ok(false), + CallFrameInstruction::Offset { + register: Register(2), + factored_offset: 3, + }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_offset_extended_sf() { + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected + .set_register_rule( + Register(4), + RegisterRule::Offset(-3 * cie.data_alignment_factor), + ) + .unwrap(); + let instructions = [( + Ok(false), + CallFrameInstruction::OffsetExtendedSf { + register: Register(4), + factored_offset: -3, + }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_val_offset() { + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected + .set_register_rule( + Register(5), + RegisterRule::ValOffset(7 * cie.data_alignment_factor), + ) + .unwrap(); + let instructions = [( + Ok(false), + CallFrameInstruction::ValOffset { + register: Register(5), + factored_offset: 7, + }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_val_offset_sf() { + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected + .set_register_rule( + Register(5), + RegisterRule::ValOffset(-7 * cie.data_alignment_factor), + ) + .unwrap(); + let instructions = [( + Ok(false), + CallFrameInstruction::ValOffsetSf { + register: Register(5), + factored_offset: -7, + }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_expression() { + let expr = UnwindExpression { + offset: 10, + length: 11, + }; + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected + .set_register_rule(Register(9), RegisterRule::Expression(expr)) + .unwrap(); + let instructions = [( + Ok(false), + CallFrameInstruction::Expression { + register: Register(9), + expression: expr, + }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_val_expression() { + let expr = UnwindExpression { + offset: 10, + length: 11, + }; + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected + .set_register_rule(Register(9), RegisterRule::ValExpression(expr)) + .unwrap(); + let instructions = [( + Ok(false), + CallFrameInstruction::ValExpression { + register: Register(9), + expression: expr, + }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_restore() { + let cie = make_test_cie(); + let fde = FrameDescriptionEntry { + offset: 0, + format: Format::Dwarf64, + length: 0, + address_range: 0, + augmentation: None, + initial_address: 0, + initial_segment: 0, + cie: cie.clone(), + instructions: EndianSlice::new(&[], LittleEndian), + }; + + let mut ctx = UnwindContext::new(); + ctx.set_register_rule(Register(0), RegisterRule::Offset(1)) + .unwrap(); + ctx.save_initial_rules().unwrap(); + let expected = ctx.clone(); + ctx.set_register_rule(Register(0), RegisterRule::Offset(2)) + .unwrap(); + + let instructions = [( + Ok(false), + CallFrameInstruction::Restore { + register: Register(0), + }, + )]; + assert_eval(ctx, expected, cie, Some(fde), instructions); + } + + #[test] + fn test_eval_restore_havent_saved_initial_context() { + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let expected = ctx.clone(); + let instructions = [( + Err(Error::CfiInstructionInInvalidContext), + CallFrameInstruction::Restore { + register: Register(0), + }, + )]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_remember_state() { + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected.push_row().unwrap(); + let instructions = [(Ok(false), CallFrameInstruction::RememberState)]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_restore_state() { + let cie = make_test_cie(); + + let mut ctx = UnwindContext::new(); + ctx.set_start_address(1); + ctx.set_register_rule(Register(0), RegisterRule::SameValue) + .unwrap(); + let mut expected = ctx.clone(); + ctx.push_row().unwrap(); + ctx.set_start_address(2); + ctx.set_register_rule(Register(0), RegisterRule::Offset(16)) + .unwrap(); + + // Restore state should preserve current location. + expected.set_start_address(2); + + let instructions = [ + // First one pops just fine. + (Ok(false), CallFrameInstruction::RestoreState), + // Second pop would try to pop out of bounds. + ( + Err(Error::PopWithEmptyStack), + CallFrameInstruction::RestoreState, + ), + ]; + + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_negate_ra_state() { + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected + .set_register_rule(crate::AArch64::RA_SIGN_STATE, RegisterRule::Constant(1)) + .unwrap(); + let instructions = [(Ok(false), CallFrameInstruction::NegateRaState)]; + assert_eval(ctx, expected, cie, None, instructions); + + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected + .set_register_rule(crate::AArch64::RA_SIGN_STATE, RegisterRule::Constant(0)) + .unwrap(); + let instructions = [ + (Ok(false), CallFrameInstruction::NegateRaState), + (Ok(false), CallFrameInstruction::NegateRaState), + ]; + assert_eval(ctx, expected, cie, None, instructions); + + // NegateRaState can't be used with other instructions. + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let mut expected = ctx.clone(); + expected + .set_register_rule( + crate::AArch64::RA_SIGN_STATE, + RegisterRule::Offset(cie.data_alignment_factor as i64), + ) + .unwrap(); + let instructions = [ + ( + Ok(false), + CallFrameInstruction::Offset { + register: crate::AArch64::RA_SIGN_STATE, + factored_offset: 1, + }, + ), + ( + Err(Error::CfiInstructionInInvalidContext), + CallFrameInstruction::NegateRaState, + ), + ]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_eval_nop() { + let cie = make_test_cie(); + let ctx = UnwindContext::new(); + let expected = ctx.clone(); + let instructions = [(Ok(false), CallFrameInstruction::Nop)]; + assert_eval(ctx, expected, cie, None, instructions); + } + + #[test] + fn test_unwind_table_cie_no_rule() { + let initial_instructions = Section::with_endian(Endian::Little) + // The CFA is -12 from register 4. + .D8(constants::DW_CFA_def_cfa_sf.0) + .uleb(4) + .sleb(-12) + .append_repeated(constants::DW_CFA_nop.0, 4); + let initial_instructions = initial_instructions.get_contents().unwrap(); + + let cie = CommonInformationEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + version: 4, + augmentation: None, + address_size: 8, + segment_size: 0, + code_alignment_factor: 1, + data_alignment_factor: 1, + return_address_register: Register(3), + initial_instructions: EndianSlice::new(&initial_instructions, LittleEndian), + }; + + let instructions = Section::with_endian(Endian::Little) + // A bunch of nop padding. + .append_repeated(constants::DW_CFA_nop.0, 8); + let instructions = instructions.get_contents().unwrap(); + + let fde = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie.clone(), + initial_segment: 0, + initial_address: 0, + address_range: 100, + augmentation: None, + instructions: EndianSlice::new(&instructions, LittleEndian), + }; + + let section = &DebugFrame::from(EndianSlice::default()); + let bases = &BaseAddresses::default(); + let mut ctx = Box::new(UnwindContext::new()); + + let mut table = fde + .rows(section, bases, &mut ctx) + .expect("Should run initial program OK"); + assert!(table.ctx.is_initialized); + let expected_initial_rule = (Register(0), RegisterRule::Undefined); + assert_eq!(table.ctx.initial_rule, Some(expected_initial_rule)); + + { + let row = table.next_row().expect("Should evaluate first row OK"); + let expected = UnwindTableRow { + start_address: 0, + end_address: 100, + saved_args_size: 0, + cfa: CfaRule::RegisterAndOffset { + register: Register(4), + offset: -12, + }, + registers: [].iter().collect(), + }; + assert_eq!(Some(&expected), row); + } + + // All done! + assert_eq!(Ok(None), table.next_row()); + assert_eq!(Ok(None), table.next_row()); + } + + #[test] + fn test_unwind_table_cie_single_rule() { + let initial_instructions = Section::with_endian(Endian::Little) + // The CFA is -12 from register 4. + .D8(constants::DW_CFA_def_cfa_sf.0) + .uleb(4) + .sleb(-12) + // Register 3 is 4 from the CFA. + .D8(constants::DW_CFA_offset.0 | 3) + .uleb(4) + .append_repeated(constants::DW_CFA_nop.0, 4); + let initial_instructions = initial_instructions.get_contents().unwrap(); + + let cie = CommonInformationEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + version: 4, + augmentation: None, + address_size: 8, + segment_size: 0, + code_alignment_factor: 1, + data_alignment_factor: 1, + return_address_register: Register(3), + initial_instructions: EndianSlice::new(&initial_instructions, LittleEndian), + }; + + let instructions = Section::with_endian(Endian::Little) + // A bunch of nop padding. + .append_repeated(constants::DW_CFA_nop.0, 8); + let instructions = instructions.get_contents().unwrap(); + + let fde = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie.clone(), + initial_segment: 0, + initial_address: 0, + address_range: 100, + augmentation: None, + instructions: EndianSlice::new(&instructions, LittleEndian), + }; + + let section = &DebugFrame::from(EndianSlice::default()); + let bases = &BaseAddresses::default(); + let mut ctx = Box::new(UnwindContext::new()); + + let mut table = fde + .rows(section, bases, &mut ctx) + .expect("Should run initial program OK"); + assert!(table.ctx.is_initialized); + let expected_initial_rule = (Register(3), RegisterRule::Offset(4)); + assert_eq!(table.ctx.initial_rule, Some(expected_initial_rule)); + + { + let row = table.next_row().expect("Should evaluate first row OK"); + let expected = UnwindTableRow { + start_address: 0, + end_address: 100, + saved_args_size: 0, + cfa: CfaRule::RegisterAndOffset { + register: Register(4), + offset: -12, + }, + registers: [(Register(3), RegisterRule::Offset(4))].iter().collect(), + }; + assert_eq!(Some(&expected), row); + } + + // All done! + assert_eq!(Ok(None), table.next_row()); + assert_eq!(Ok(None), table.next_row()); + } + + #[test] + fn test_unwind_table_cie_invalid_rule() { + let initial_instructions1 = Section::with_endian(Endian::Little) + // Test that stack length is reset. + .D8(constants::DW_CFA_remember_state.0) + // Test that stack value is reset (different register from that used later). + .D8(constants::DW_CFA_offset.0 | 4) + .uleb(8) + // Invalid due to missing operands. + .D8(constants::DW_CFA_offset.0); + let initial_instructions1 = initial_instructions1.get_contents().unwrap(); + + let cie1 = CommonInformationEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + version: 4, + augmentation: None, + address_size: 8, + segment_size: 0, + code_alignment_factor: 1, + data_alignment_factor: 1, + return_address_register: Register(3), + initial_instructions: EndianSlice::new(&initial_instructions1, LittleEndian), + }; + + let initial_instructions2 = Section::with_endian(Endian::Little) + // Register 3 is 4 from the CFA. + .D8(constants::DW_CFA_offset.0 | 3) + .uleb(4) + .append_repeated(constants::DW_CFA_nop.0, 4); + let initial_instructions2 = initial_instructions2.get_contents().unwrap(); + + let cie2 = CommonInformationEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + version: 4, + augmentation: None, + address_size: 8, + segment_size: 0, + code_alignment_factor: 1, + data_alignment_factor: 1, + return_address_register: Register(3), + initial_instructions: EndianSlice::new(&initial_instructions2, LittleEndian), + }; + + let fde1 = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie1.clone(), + initial_segment: 0, + initial_address: 0, + address_range: 100, + augmentation: None, + instructions: EndianSlice::new(&[], LittleEndian), + }; + + let fde2 = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie2.clone(), + initial_segment: 0, + initial_address: 0, + address_range: 100, + augmentation: None, + instructions: EndianSlice::new(&[], LittleEndian), + }; + + let section = &DebugFrame::from(EndianSlice::default()); + let bases = &BaseAddresses::default(); + let mut ctx = Box::new(UnwindContext::new()); + + let table = fde1 + .rows(section, bases, &mut ctx) + .map_eof(&initial_instructions1); + assert_eq!(table.err(), Some(Error::UnexpectedEof(ReaderOffsetId(4)))); + assert!(!ctx.is_initialized); + assert_eq!(ctx.stack.len(), 2); + assert_eq!(ctx.initial_rule, None); + + let _table = fde2 + .rows(section, bases, &mut ctx) + .expect("Should run initial program OK"); + assert!(ctx.is_initialized); + assert_eq!(ctx.stack.len(), 1); + let expected_initial_rule = (Register(3), RegisterRule::Offset(4)); + assert_eq!(ctx.initial_rule, Some(expected_initial_rule)); + } + + #[test] + fn test_unwind_table_next_row() { + #[allow(clippy::identity_op)] + let initial_instructions = Section::with_endian(Endian::Little) + // The CFA is -12 from register 4. + .D8(constants::DW_CFA_def_cfa_sf.0) + .uleb(4) + .sleb(-12) + // Register 0 is 8 from the CFA. + .D8(constants::DW_CFA_offset.0 | 0) + .uleb(8) + // Register 3 is 4 from the CFA. + .D8(constants::DW_CFA_offset.0 | 3) + .uleb(4) + .append_repeated(constants::DW_CFA_nop.0, 4); + let initial_instructions = initial_instructions.get_contents().unwrap(); + + let cie = CommonInformationEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + version: 4, + augmentation: None, + address_size: 8, + segment_size: 0, + code_alignment_factor: 1, + data_alignment_factor: 1, + return_address_register: Register(3), + initial_instructions: EndianSlice::new(&initial_instructions, LittleEndian), + }; + + let instructions = Section::with_endian(Endian::Little) + // Initial instructions form a row, advance the address by 1. + .D8(constants::DW_CFA_advance_loc1.0) + .D8(1) + // Register 0 is -16 from the CFA. + .D8(constants::DW_CFA_offset_extended_sf.0) + .uleb(0) + .sleb(-16) + // Finish this row, advance the address by 32. + .D8(constants::DW_CFA_advance_loc1.0) + .D8(32) + // Register 3 is -4 from the CFA. + .D8(constants::DW_CFA_offset_extended_sf.0) + .uleb(3) + .sleb(-4) + // Finish this row, advance the address by 64. + .D8(constants::DW_CFA_advance_loc1.0) + .D8(64) + // Register 5 is 4 from the CFA. + .D8(constants::DW_CFA_offset.0 | 5) + .uleb(4) + // A bunch of nop padding. + .append_repeated(constants::DW_CFA_nop.0, 8); + let instructions = instructions.get_contents().unwrap(); + + let fde = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie.clone(), + initial_segment: 0, + initial_address: 0, + address_range: 100, + augmentation: None, + instructions: EndianSlice::new(&instructions, LittleEndian), + }; + + let section = &DebugFrame::from(EndianSlice::default()); + let bases = &BaseAddresses::default(); + let mut ctx = Box::new(UnwindContext::new()); + + let mut table = fde + .rows(section, bases, &mut ctx) + .expect("Should run initial program OK"); + assert!(table.ctx.is_initialized); + assert!(table.ctx.initial_rule.is_none()); + let expected_initial_rules: RegisterRuleMap<_> = [ + (Register(0), RegisterRule::Offset(8)), + (Register(3), RegisterRule::Offset(4)), + ] + .iter() + .collect(); + assert_eq!(table.ctx.stack[0].registers, expected_initial_rules); + + { + let row = table.next_row().expect("Should evaluate first row OK"); + let expected = UnwindTableRow { + start_address: 0, + end_address: 1, + saved_args_size: 0, + cfa: CfaRule::RegisterAndOffset { + register: Register(4), + offset: -12, + }, + registers: [ + (Register(0), RegisterRule::Offset(8)), + (Register(3), RegisterRule::Offset(4)), + ] + .iter() + .collect(), + }; + assert_eq!(Some(&expected), row); + } + + { + let row = table.next_row().expect("Should evaluate second row OK"); + let expected = UnwindTableRow { + start_address: 1, + end_address: 33, + saved_args_size: 0, + cfa: CfaRule::RegisterAndOffset { + register: Register(4), + offset: -12, + }, + registers: [ + (Register(0), RegisterRule::Offset(-16)), + (Register(3), RegisterRule::Offset(4)), + ] + .iter() + .collect(), + }; + assert_eq!(Some(&expected), row); + } + + { + let row = table.next_row().expect("Should evaluate third row OK"); + let expected = UnwindTableRow { + start_address: 33, + end_address: 97, + saved_args_size: 0, + cfa: CfaRule::RegisterAndOffset { + register: Register(4), + offset: -12, + }, + registers: [ + (Register(0), RegisterRule::Offset(-16)), + (Register(3), RegisterRule::Offset(-4)), + ] + .iter() + .collect(), + }; + assert_eq!(Some(&expected), row); + } + + { + let row = table.next_row().expect("Should evaluate fourth row OK"); + let expected = UnwindTableRow { + start_address: 97, + end_address: 100, + saved_args_size: 0, + cfa: CfaRule::RegisterAndOffset { + register: Register(4), + offset: -12, + }, + registers: [ + (Register(0), RegisterRule::Offset(-16)), + (Register(3), RegisterRule::Offset(-4)), + (Register(5), RegisterRule::Offset(4)), + ] + .iter() + .collect(), + }; + assert_eq!(Some(&expected), row); + } + + // All done! + assert_eq!(Ok(None), table.next_row()); + assert_eq!(Ok(None), table.next_row()); + } + + #[test] + fn test_unwind_info_for_address_ok() { + let instrs1 = Section::with_endian(Endian::Big) + // The CFA is -12 from register 4. + .D8(constants::DW_CFA_def_cfa_sf.0) + .uleb(4) + .sleb(-12); + let instrs1 = instrs1.get_contents().unwrap(); + + let instrs2: Vec<_> = (0..8).map(|_| constants::DW_CFA_nop.0).collect(); + + let instrs3 = Section::with_endian(Endian::Big) + // Initial instructions form a row, advance the address by 100. + .D8(constants::DW_CFA_advance_loc1.0) + .D8(100) + // Register 0 is -16 from the CFA. + .D8(constants::DW_CFA_offset_extended_sf.0) + .uleb(0) + .sleb(-16); + let instrs3 = instrs3.get_contents().unwrap(); + + let instrs4: Vec<_> = (0..16).map(|_| constants::DW_CFA_nop.0).collect(); + + let mut cie1 = CommonInformationEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + version: 4, + augmentation: None, + address_size: 8, + segment_size: 0, + code_alignment_factor: 1, + data_alignment_factor: 1, + return_address_register: Register(3), + initial_instructions: EndianSlice::new(&instrs1, BigEndian), + }; + + let mut cie2 = CommonInformationEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + version: 4, + augmentation: None, + address_size: 4, + segment_size: 0, + code_alignment_factor: 1, + data_alignment_factor: 1, + return_address_register: Register(1), + initial_instructions: EndianSlice::new(&instrs2, BigEndian), + }; + + let cie1_location = Label::new(); + let cie2_location = Label::new(); + + // Write the CIEs first so that their length gets set before we clone + // them into the FDEs and our equality assertions down the line end up + // with all the CIEs always having he correct length. + let kind = debug_frame_be(); + let section = Section::with_endian(kind.endian()) + .mark(&cie1_location) + .cie(kind, None, &mut cie1) + .mark(&cie2_location) + .cie(kind, None, &mut cie2); + + let mut fde1 = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie1.clone(), + initial_segment: 0, + initial_address: 0xfeed_beef, + address_range: 200, + augmentation: None, + instructions: EndianSlice::new(&instrs3, BigEndian), + }; + + let mut fde2 = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie2.clone(), + initial_segment: 0, + initial_address: 0xfeed_face, + address_range: 9000, + augmentation: None, + instructions: EndianSlice::new(&instrs4, BigEndian), + }; + + let section = + section + .fde(kind, &cie1_location, &mut fde1) + .fde(kind, &cie2_location, &mut fde2); + section.start().set_const(0); + + let contents = section.get_contents().unwrap(); + let debug_frame = kind.section(&contents); + + // Get the second row of the unwind table in `instrs3`. + let bases = Default::default(); + let mut ctx = Box::new(UnwindContext::new()); + let result = debug_frame.unwind_info_for_address( + &bases, + &mut ctx, + 0xfeed_beef + 150, + DebugFrame::cie_from_offset, + ); + assert!(result.is_ok()); + let unwind_info = result.unwrap(); + + assert_eq!( + *unwind_info, + UnwindTableRow { + start_address: fde1.initial_address() + 100, + end_address: fde1.initial_address() + fde1.len(), + saved_args_size: 0, + cfa: CfaRule::RegisterAndOffset { + register: Register(4), + offset: -12, + }, + registers: [(Register(0), RegisterRule::Offset(-16))].iter().collect(), + } + ); + } + + #[test] + fn test_unwind_info_for_address_not_found() { + let debug_frame = DebugFrame::new(&[], NativeEndian); + let bases = Default::default(); + let mut ctx = Box::new(UnwindContext::new()); + let result = debug_frame.unwind_info_for_address( + &bases, + &mut ctx, + 0xbadb_ad99, + DebugFrame::cie_from_offset, + ); + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), Error::NoUnwindInfoForAddress); + } + + #[test] + fn test_eh_frame_hdr_unknown_version() { + let bases = BaseAddresses::default(); + let buf = &[42]; + let result = EhFrameHdr::new(buf, NativeEndian).parse(&bases, 8); + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), Error::UnknownVersion(42)); + } + + #[test] + fn test_eh_frame_hdr_omit_ehptr() { + let section = Section::with_endian(Endian::Little) + .L8(1) + .L8(0xff) + .L8(0x03) + .L8(0x0b) + .L32(2) + .L32(10) + .L32(1) + .L32(20) + .L32(2) + .L32(0); + let section = section.get_contents().unwrap(); + let bases = BaseAddresses::default(); + let result = EhFrameHdr::new(§ion, LittleEndian).parse(&bases, 8); + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), Error::CannotParseOmitPointerEncoding); + } + + #[test] + fn test_eh_frame_hdr_omit_count() { + let section = Section::with_endian(Endian::Little) + .L8(1) + .L8(0x0b) + .L8(0xff) + .L8(0x0b) + .L32(0x12345); + let section = section.get_contents().unwrap(); + let bases = BaseAddresses::default(); + let result = EhFrameHdr::new(§ion, LittleEndian).parse(&bases, 8); + assert!(result.is_ok()); + let result = result.unwrap(); + assert_eq!(result.eh_frame_ptr(), Pointer::Direct(0x12345)); + assert!(result.table().is_none()); + } + + #[test] + fn test_eh_frame_hdr_omit_table() { + let section = Section::with_endian(Endian::Little) + .L8(1) + .L8(0x0b) + .L8(0x03) + .L8(0xff) + .L32(0x12345) + .L32(2); + let section = section.get_contents().unwrap(); + let bases = BaseAddresses::default(); + let result = EhFrameHdr::new(§ion, LittleEndian).parse(&bases, 8); + assert!(result.is_ok()); + let result = result.unwrap(); + assert_eq!(result.eh_frame_ptr(), Pointer::Direct(0x12345)); + assert!(result.table().is_none()); + } + + #[test] + fn test_eh_frame_hdr_varlen_table() { + let section = Section::with_endian(Endian::Little) + .L8(1) + .L8(0x0b) + .L8(0x03) + .L8(0x01) + .L32(0x12345) + .L32(2); + let section = section.get_contents().unwrap(); + let bases = BaseAddresses::default(); + let result = EhFrameHdr::new(§ion, LittleEndian).parse(&bases, 8); + assert!(result.is_ok()); + let result = result.unwrap(); + assert_eq!(result.eh_frame_ptr(), Pointer::Direct(0x12345)); + let table = result.table(); + assert!(table.is_some()); + let table = table.unwrap(); + assert_eq!( + table.lookup(0, &bases), + Err(Error::VariableLengthSearchTable) + ); + } + + #[test] + fn test_eh_frame_hdr_indirect_length() { + let section = Section::with_endian(Endian::Little) + .L8(1) + .L8(0x0b) + .L8(0x83) + .L8(0x0b) + .L32(0x12345) + .L32(2); + let section = section.get_contents().unwrap(); + let bases = BaseAddresses::default(); + let result = EhFrameHdr::new(§ion, LittleEndian).parse(&bases, 8); + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), Error::UnsupportedPointerEncoding); + } + + #[test] + fn test_eh_frame_hdr_indirect_ptrs() { + let section = Section::with_endian(Endian::Little) + .L8(1) + .L8(0x8b) + .L8(0x03) + .L8(0x8b) + .L32(0x12345) + .L32(2) + .L32(10) + .L32(1) + .L32(20) + .L32(2); + let section = section.get_contents().unwrap(); + let bases = BaseAddresses::default(); + let result = EhFrameHdr::new(§ion, LittleEndian).parse(&bases, 8); + assert!(result.is_ok()); + let result = result.unwrap(); + assert_eq!(result.eh_frame_ptr(), Pointer::Indirect(0x12345)); + let table = result.table(); + assert!(table.is_some()); + let table = table.unwrap(); + assert_eq!( + table.lookup(0, &bases), + Err(Error::UnsupportedPointerEncoding) + ); + } + + #[test] + fn test_eh_frame_hdr_good() { + let section = Section::with_endian(Endian::Little) + .L8(1) + .L8(0x0b) + .L8(0x03) + .L8(0x0b) + .L32(0x12345) + .L32(2) + .L32(10) + .L32(1) + .L32(20) + .L32(2); + let section = section.get_contents().unwrap(); + let bases = BaseAddresses::default(); + let result = EhFrameHdr::new(§ion, LittleEndian).parse(&bases, 8); + assert!(result.is_ok()); + let result = result.unwrap(); + assert_eq!(result.eh_frame_ptr(), Pointer::Direct(0x12345)); + let table = result.table(); + assert!(table.is_some()); + let table = table.unwrap(); + assert_eq!(table.lookup(0, &bases), Ok(Pointer::Direct(1))); + assert_eq!(table.lookup(9, &bases), Ok(Pointer::Direct(1))); + assert_eq!(table.lookup(10, &bases), Ok(Pointer::Direct(1))); + assert_eq!(table.lookup(11, &bases), Ok(Pointer::Direct(1))); + assert_eq!(table.lookup(19, &bases), Ok(Pointer::Direct(1))); + assert_eq!(table.lookup(20, &bases), Ok(Pointer::Direct(2))); + assert_eq!(table.lookup(21, &bases), Ok(Pointer::Direct(2))); + assert_eq!(table.lookup(100_000, &bases), Ok(Pointer::Direct(2))); + } + + #[test] + fn test_eh_frame_fde_for_address_good() { + // First, setup eh_frame + // Write the CIE first so that its length gets set before we clone it + // into the FDE. + let mut cie = make_test_cie(); + cie.format = Format::Dwarf32; + cie.version = 1; + + let start_of_cie = Label::new(); + let end_of_cie = Label::new(); + + let kind = eh_frame_le(); + let section = Section::with_endian(kind.endian()) + .append_repeated(0, 16) + .mark(&start_of_cie) + .cie(kind, None, &mut cie) + .mark(&end_of_cie); + + let mut fde1 = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie.clone(), + initial_segment: 0, + initial_address: 9, + address_range: 4, + augmentation: None, + instructions: EndianSlice::new(&[], LittleEndian), + }; + let mut fde2 = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie.clone(), + initial_segment: 0, + initial_address: 20, + address_range: 8, + augmentation: None, + instructions: EndianSlice::new(&[], LittleEndian), + }; + + let start_of_fde1 = Label::new(); + let start_of_fde2 = Label::new(); + + let section = section + // +4 for the FDE length before the CIE offset. + .mark(&start_of_fde1) + .fde(kind, (&start_of_fde1 - &start_of_cie + 4) as u64, &mut fde1) + .mark(&start_of_fde2) + .fde(kind, (&start_of_fde2 - &start_of_cie + 4) as u64, &mut fde2); + + section.start().set_const(0); + let section = section.get_contents().unwrap(); + let eh_frame = kind.section(§ion); + + // Setup eh_frame_hdr + let section = Section::with_endian(kind.endian()) + .L8(1) + .L8(0x0b) + .L8(0x03) + .L8(0x0b) + .L32(0x12345) + .L32(2) + .L32(10) + .L32(0x12345 + start_of_fde1.value().unwrap() as u32) + .L32(20) + .L32(0x12345 + start_of_fde2.value().unwrap() as u32); + + let section = section.get_contents().unwrap(); + let bases = BaseAddresses::default(); + let eh_frame_hdr = EhFrameHdr::new(§ion, LittleEndian).parse(&bases, 8); + assert!(eh_frame_hdr.is_ok()); + let eh_frame_hdr = eh_frame_hdr.unwrap(); + + let table = eh_frame_hdr.table(); + assert!(table.is_some()); + let table = table.unwrap(); + + let bases = Default::default(); + let mut iter = table.iter(&bases); + assert_eq!( + iter.next(), + Ok(Some(( + Pointer::Direct(10), + Pointer::Direct(0x12345 + start_of_fde1.value().unwrap()) + ))) + ); + assert_eq!( + iter.next(), + Ok(Some(( + Pointer::Direct(20), + Pointer::Direct(0x12345 + start_of_fde2.value().unwrap()) + ))) + ); + assert_eq!(iter.next(), Ok(None)); + + assert_eq!( + table.iter(&bases).nth(0), + Ok(Some(( + Pointer::Direct(10), + Pointer::Direct(0x12345 + start_of_fde1.value().unwrap()) + ))) + ); + + assert_eq!( + table.iter(&bases).nth(1), + Ok(Some(( + Pointer::Direct(20), + Pointer::Direct(0x12345 + start_of_fde2.value().unwrap()) + ))) + ); + assert_eq!(table.iter(&bases).nth(2), Ok(None)); + + let f = |_: &_, _: &_, o: EhFrameOffset| { + assert_eq!(o, EhFrameOffset(start_of_cie.value().unwrap() as usize)); + Ok(cie.clone()) + }; + assert_eq!( + table.fde_for_address(&eh_frame, &bases, 9, f), + Ok(fde1.clone()) + ); + assert_eq!( + table.fde_for_address(&eh_frame, &bases, 10, f), + Ok(fde1.clone()) + ); + assert_eq!(table.fde_for_address(&eh_frame, &bases, 11, f), Ok(fde1)); + assert_eq!( + table.fde_for_address(&eh_frame, &bases, 19, f), + Err(Error::NoUnwindInfoForAddress) + ); + assert_eq!( + table.fde_for_address(&eh_frame, &bases, 20, f), + Ok(fde2.clone()) + ); + assert_eq!(table.fde_for_address(&eh_frame, &bases, 21, f), Ok(fde2)); + assert_eq!( + table.fde_for_address(&eh_frame, &bases, 100_000, f), + Err(Error::NoUnwindInfoForAddress) + ); + } + + #[test] + fn test_eh_frame_stops_at_zero_length() { + let section = Section::with_endian(Endian::Little).L32(0); + let section = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(§ion, LittleEndian); + let bases = Default::default(); + + assert_eq!( + parse_cfi_entry(&bases, &EhFrame::new(§ion, LittleEndian), rest), + Ok(None) + ); + + assert_eq!( + EhFrame::new(§ion, LittleEndian).cie_from_offset(&bases, EhFrameOffset(0)), + Err(Error::NoEntryAtGivenOffset) + ); + } + + fn resolve_cie_offset(buf: &[u8], cie_offset: usize) -> Result { + let mut fde = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf64, + cie: make_test_cie(), + initial_segment: 0, + initial_address: 0xfeed_beef, + address_range: 39, + augmentation: None, + instructions: EndianSlice::new(&[], LittleEndian), + }; + + let kind = eh_frame_le(); + let section = Section::with_endian(kind.endian()) + .append_bytes(buf) + .fde(kind, cie_offset as u64, &mut fde) + .append_bytes(buf); + + let section = section.get_contents().unwrap(); + let eh_frame = kind.section(§ion); + let input = &mut EndianSlice::new(§ion[buf.len()..], LittleEndian); + + let bases = Default::default(); + match parse_cfi_entry(&bases, &eh_frame, input) { + Ok(Some(CieOrFde::Fde(partial))) => Ok(partial.cie_offset.0), + Err(e) => Err(e), + otherwise => panic!("Unexpected result: {:#?}", otherwise), + } + } + + #[test] + fn test_eh_frame_resolve_cie_offset_ok() { + let buf = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; + let cie_offset = 2; + // + 4 for size of length field + assert_eq!( + resolve_cie_offset(&buf, buf.len() + 4 - cie_offset), + Ok(cie_offset) + ); + } + + #[test] + fn test_eh_frame_resolve_cie_offset_out_of_bounds() { + let buf = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; + assert_eq!( + resolve_cie_offset(&buf, buf.len() + 4 + 2), + Err(Error::OffsetOutOfBounds) + ); + } + + #[test] + fn test_eh_frame_resolve_cie_offset_underflow() { + let buf = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; + assert_eq!( + resolve_cie_offset(&buf, ::core::usize::MAX), + Err(Error::OffsetOutOfBounds) + ); + } + + #[test] + fn test_eh_frame_fde_ok() { + let mut cie = make_test_cie(); + cie.format = Format::Dwarf32; + cie.version = 1; + + let start_of_cie = Label::new(); + let end_of_cie = Label::new(); + + // Write the CIE first so that its length gets set before we clone it + // into the FDE. + let kind = eh_frame_le(); + let section = Section::with_endian(kind.endian()) + .append_repeated(0, 16) + .mark(&start_of_cie) + .cie(kind, None, &mut cie) + .mark(&end_of_cie); + + let mut fde = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie.clone(), + initial_segment: 0, + initial_address: 0xfeed_beef, + address_range: 999, + augmentation: None, + instructions: EndianSlice::new(&[], LittleEndian), + }; + + let section = section + // +4 for the FDE length before the CIE offset. + .fde(kind, (&end_of_cie - &start_of_cie + 4) as u64, &mut fde); + + section.start().set_const(0); + let section = section.get_contents().unwrap(); + let eh_frame = kind.section(§ion); + let section = EndianSlice::new(§ion, LittleEndian); + + let mut offset = None; + let result = parse_fde( + eh_frame, + &mut section.range_from(end_of_cie.value().unwrap() as usize..), + |_, _, o| { + offset = Some(o); + assert_eq!(o, EhFrameOffset(start_of_cie.value().unwrap() as usize)); + Ok(cie.clone()) + }, + ); + match result { + Ok(actual) => assert_eq!(actual, fde), + otherwise => panic!("Unexpected result {:?}", otherwise), + } + assert!(offset.is_some()); + } + + #[test] + fn test_eh_frame_fde_out_of_bounds() { + let mut cie = make_test_cie(); + cie.version = 1; + + let end_of_cie = Label::new(); + + let mut fde = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf64, + cie: cie.clone(), + initial_segment: 0, + initial_address: 0xfeed_beef, + address_range: 999, + augmentation: None, + instructions: EndianSlice::new(&[], LittleEndian), + }; + + let kind = eh_frame_le(); + let section = Section::with_endian(kind.endian()) + .cie(kind, None, &mut cie) + .mark(&end_of_cie) + .fde(kind, 99_999_999_999_999, &mut fde); + + section.start().set_const(0); + let section = section.get_contents().unwrap(); + let eh_frame = kind.section(§ion); + let section = EndianSlice::new(§ion, LittleEndian); + + let result = parse_fde( + eh_frame, + &mut section.range_from(end_of_cie.value().unwrap() as usize..), + UnwindSection::cie_from_offset, + ); + assert_eq!(result, Err(Error::OffsetOutOfBounds)); + } + + #[test] + fn test_augmentation_parse_not_z_augmentation() { + let augmentation = &mut EndianSlice::new(b"wtf", NativeEndian); + let bases = Default::default(); + let address_size = 8; + let section = EhFrame::new(&[], NativeEndian); + let input = &mut EndianSlice::new(&[], NativeEndian); + assert_eq!( + Augmentation::parse(augmentation, &bases, address_size, §ion, input), + Err(Error::UnknownAugmentation) + ); + } + + #[test] + fn test_augmentation_parse_just_signal_trampoline() { + let aug_str = &mut EndianSlice::new(b"S", LittleEndian); + let bases = Default::default(); + let address_size = 8; + let section = EhFrame::new(&[], LittleEndian); + let input = &mut EndianSlice::new(&[], LittleEndian); + + let augmentation = Augmentation { + is_signal_trampoline: true, + ..Default::default() + }; + + assert_eq!( + Augmentation::parse(aug_str, &bases, address_size, §ion, input), + Ok(augmentation) + ); + } + + #[test] + fn test_augmentation_parse_unknown_part_of_z_augmentation() { + // The 'Z' character is not defined by the z-style augmentation. + let bases = Default::default(); + let address_size = 8; + let section = Section::with_endian(Endian::Little) + .uleb(4) + .append_repeated(4, 4) + .get_contents() + .unwrap(); + let section = EhFrame::new(§ion, LittleEndian); + let input = &mut section.section().clone(); + let augmentation = &mut EndianSlice::new(b"zZ", LittleEndian); + assert_eq!( + Augmentation::parse(augmentation, &bases, address_size, §ion, input), + Err(Error::UnknownAugmentation) + ); + } + + #[test] + #[allow(non_snake_case)] + fn test_augmentation_parse_L() { + let bases = Default::default(); + let address_size = 8; + let rest = [9, 8, 7, 6, 5, 4, 3, 2, 1]; + + let section = Section::with_endian(Endian::Little) + .uleb(1) + .D8(constants::DW_EH_PE_uleb128.0) + .append_bytes(&rest) + .get_contents() + .unwrap(); + let section = EhFrame::new(§ion, LittleEndian); + let input = &mut section.section().clone(); + let aug_str = &mut EndianSlice::new(b"zL", LittleEndian); + + let augmentation = Augmentation { + lsda: Some(constants::DW_EH_PE_uleb128), + ..Default::default() + }; + + assert_eq!( + Augmentation::parse(aug_str, &bases, address_size, §ion, input), + Ok(augmentation) + ); + assert_eq!(*input, EndianSlice::new(&rest, LittleEndian)); + } + + #[test] + #[allow(non_snake_case)] + fn test_augmentation_parse_P() { + let bases = Default::default(); + let address_size = 8; + let rest = [9, 8, 7, 6, 5, 4, 3, 2, 1]; + + let section = Section::with_endian(Endian::Little) + .uleb(9) + .D8(constants::DW_EH_PE_udata8.0) + .L64(0xf00d_f00d) + .append_bytes(&rest) + .get_contents() + .unwrap(); + let section = EhFrame::new(§ion, LittleEndian); + let input = &mut section.section().clone(); + let aug_str = &mut EndianSlice::new(b"zP", LittleEndian); + + let augmentation = Augmentation { + personality: Some((constants::DW_EH_PE_udata8, Pointer::Direct(0xf00d_f00d))), + ..Default::default() + }; + + assert_eq!( + Augmentation::parse(aug_str, &bases, address_size, §ion, input), + Ok(augmentation) + ); + assert_eq!(*input, EndianSlice::new(&rest, LittleEndian)); + } + + #[test] + #[allow(non_snake_case)] + fn test_augmentation_parse_R() { + let bases = Default::default(); + let address_size = 8; + let rest = [9, 8, 7, 6, 5, 4, 3, 2, 1]; + + let section = Section::with_endian(Endian::Little) + .uleb(1) + .D8(constants::DW_EH_PE_udata4.0) + .append_bytes(&rest) + .get_contents() + .unwrap(); + let section = EhFrame::new(§ion, LittleEndian); + let input = &mut section.section().clone(); + let aug_str = &mut EndianSlice::new(b"zR", LittleEndian); + + let augmentation = Augmentation { + fde_address_encoding: Some(constants::DW_EH_PE_udata4), + ..Default::default() + }; + + assert_eq!( + Augmentation::parse(aug_str, &bases, address_size, §ion, input), + Ok(augmentation) + ); + assert_eq!(*input, EndianSlice::new(&rest, LittleEndian)); + } + + #[test] + #[allow(non_snake_case)] + fn test_augmentation_parse_S() { + let bases = Default::default(); + let address_size = 8; + let rest = [9, 8, 7, 6, 5, 4, 3, 2, 1]; + + let section = Section::with_endian(Endian::Little) + .uleb(0) + .append_bytes(&rest) + .get_contents() + .unwrap(); + let section = EhFrame::new(§ion, LittleEndian); + let input = &mut section.section().clone(); + let aug_str = &mut EndianSlice::new(b"zS", LittleEndian); + + let augmentation = Augmentation { + is_signal_trampoline: true, + ..Default::default() + }; + + assert_eq!( + Augmentation::parse(aug_str, &bases, address_size, §ion, input), + Ok(augmentation) + ); + assert_eq!(*input, EndianSlice::new(&rest, LittleEndian)); + } + + #[test] + fn test_augmentation_parse_all() { + let bases = Default::default(); + let address_size = 8; + let rest = [9, 8, 7, 6, 5, 4, 3, 2, 1]; + + let section = Section::with_endian(Endian::Little) + .uleb(1 + 9 + 1) + // L + .D8(constants::DW_EH_PE_uleb128.0) + // P + .D8(constants::DW_EH_PE_udata8.0) + .L64(0x1bad_f00d) + // R + .D8(constants::DW_EH_PE_uleb128.0) + .append_bytes(&rest) + .get_contents() + .unwrap(); + let section = EhFrame::new(§ion, LittleEndian); + let input = &mut section.section().clone(); + let aug_str = &mut EndianSlice::new(b"zLPRS", LittleEndian); + + let augmentation = Augmentation { + lsda: Some(constants::DW_EH_PE_uleb128), + personality: Some((constants::DW_EH_PE_udata8, Pointer::Direct(0x1bad_f00d))), + fde_address_encoding: Some(constants::DW_EH_PE_uleb128), + is_signal_trampoline: true, + }; + + assert_eq!( + Augmentation::parse(aug_str, &bases, address_size, §ion, input), + Ok(augmentation) + ); + assert_eq!(*input, EndianSlice::new(&rest, LittleEndian)); + } + + #[test] + fn test_eh_frame_fde_no_augmentation() { + let instrs = [1, 2, 3, 4]; + let cie_offset = 1; + + let mut cie = make_test_cie(); + cie.format = Format::Dwarf32; + cie.version = 1; + + let mut fde = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie.clone(), + initial_segment: 0, + initial_address: 0xfeed_face, + address_range: 9000, + augmentation: None, + instructions: EndianSlice::new(&instrs, LittleEndian), + }; + + let rest = [1, 2, 3, 4]; + + let kind = eh_frame_le(); + let section = Section::with_endian(kind.endian()) + .fde(kind, cie_offset, &mut fde) + .append_bytes(&rest) + .get_contents() + .unwrap(); + let section = kind.section(§ion); + let input = &mut section.section().clone(); + + let result = parse_fde(section, input, |_, _, _| Ok(cie.clone())); + assert_eq!(result, Ok(fde)); + assert_eq!(*input, EndianSlice::new(&rest, LittleEndian)); + } + + #[test] + fn test_eh_frame_fde_empty_augmentation() { + let instrs = [1, 2, 3, 4]; + let cie_offset = 1; + + let mut cie = make_test_cie(); + cie.format = Format::Dwarf32; + cie.version = 1; + cie.augmentation = Some(Augmentation::default()); + + let mut fde = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie.clone(), + initial_segment: 0, + initial_address: 0xfeed_face, + address_range: 9000, + augmentation: Some(AugmentationData::default()), + instructions: EndianSlice::new(&instrs, LittleEndian), + }; + + let rest = [1, 2, 3, 4]; + + let kind = eh_frame_le(); + let section = Section::with_endian(kind.endian()) + .fde(kind, cie_offset, &mut fde) + .append_bytes(&rest) + .get_contents() + .unwrap(); + let section = kind.section(§ion); + let input = &mut section.section().clone(); + + let result = parse_fde(section, input, |_, _, _| Ok(cie.clone())); + assert_eq!(result, Ok(fde)); + assert_eq!(*input, EndianSlice::new(&rest, LittleEndian)); + } + + #[test] + fn test_eh_frame_fde_lsda_augmentation() { + let instrs = [1, 2, 3, 4]; + let cie_offset = 1; + + let mut cie = make_test_cie(); + cie.format = Format::Dwarf32; + cie.version = 1; + cie.augmentation = Some(Augmentation::default()); + cie.augmentation.as_mut().unwrap().lsda = Some(constants::DW_EH_PE_absptr); + + let mut fde = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie.clone(), + initial_segment: 0, + initial_address: 0xfeed_face, + address_range: 9000, + augmentation: Some(AugmentationData { + lsda: Some(Pointer::Direct(0x1122_3344)), + }), + instructions: EndianSlice::new(&instrs, LittleEndian), + }; + + let rest = [1, 2, 3, 4]; + + let kind = eh_frame_le(); + let section = Section::with_endian(kind.endian()) + .fde(kind, cie_offset, &mut fde) + .append_bytes(&rest) + .get_contents() + .unwrap(); + let section = kind.section(§ion); + let input = &mut section.section().clone(); + + let result = parse_fde(section, input, |_, _, _| Ok(cie.clone())); + assert_eq!(result, Ok(fde)); + assert_eq!(*input, EndianSlice::new(&rest, LittleEndian)); + } + + #[test] + fn test_eh_frame_fde_lsda_function_relative() { + let instrs = [1, 2, 3, 4]; + let cie_offset = 1; + + let mut cie = make_test_cie(); + cie.format = Format::Dwarf32; + cie.version = 1; + cie.augmentation = Some(Augmentation::default()); + cie.augmentation.as_mut().unwrap().lsda = + Some(constants::DW_EH_PE_funcrel | constants::DW_EH_PE_absptr); + + let mut fde = FrameDescriptionEntry { + offset: 0, + length: 0, + format: Format::Dwarf32, + cie: cie.clone(), + initial_segment: 0, + initial_address: 0xfeed_face, + address_range: 9000, + augmentation: Some(AugmentationData { + lsda: Some(Pointer::Direct(0xbeef)), + }), + instructions: EndianSlice::new(&instrs, LittleEndian), + }; + + let rest = [1, 2, 3, 4]; + + let kind = eh_frame_le(); + let section = Section::with_endian(kind.endian()) + .append_repeated(10, 10) + .fde(kind, cie_offset, &mut fde) + .append_bytes(&rest) + .get_contents() + .unwrap(); + let section = kind.section(§ion); + let input = &mut section.section().range_from(10..); + + // Adjust the FDE's augmentation to be relative to the function. + fde.augmentation.as_mut().unwrap().lsda = Some(Pointer::Direct(0xfeed_face + 0xbeef)); + + let result = parse_fde(section, input, |_, _, _| Ok(cie.clone())); + assert_eq!(result, Ok(fde)); + assert_eq!(*input, EndianSlice::new(&rest, LittleEndian)); + } + + #[test] + fn test_eh_frame_cie_personality_function_relative_bad_context() { + let instrs = [1, 2, 3, 4]; + + let length = Label::new(); + let start = Label::new(); + let end = Label::new(); + + let aug_len = Label::new(); + let aug_start = Label::new(); + let aug_end = Label::new(); + + let section = Section::with_endian(Endian::Little) + // Length + .L32(&length) + .mark(&start) + // CIE ID + .L32(0) + // Version + .D8(1) + // Augmentation + .append_bytes(b"zP\0") + // Code alignment factor + .uleb(1) + // Data alignment factor + .sleb(1) + // Return address register + .uleb(1) + // Augmentation data length. This is a uleb, be we rely on the value + // being less than 2^7 and therefore a valid uleb (can't use Label + // with uleb). + .D8(&aug_len) + .mark(&aug_start) + // Augmentation data. Personality encoding and then encoded pointer. + .D8(constants::DW_EH_PE_funcrel.0 | constants::DW_EH_PE_uleb128.0) + .uleb(1) + .mark(&aug_end) + // Initial instructions + .append_bytes(&instrs) + .mark(&end); + + length.set_const((&end - &start) as u64); + aug_len.set_const((&aug_end - &aug_start) as u64); + + let section = section.get_contents().unwrap(); + let section = EhFrame::new(§ion, LittleEndian); + + let bases = BaseAddresses::default(); + let mut iter = section.entries(&bases); + assert_eq!(iter.next(), Err(Error::FuncRelativePointerInBadContext)); + } + + #[test] + fn register_rule_map_eq() { + // Different order, but still equal. + let map1: RegisterRuleMap = [ + (Register(0), RegisterRule::SameValue), + (Register(3), RegisterRule::Offset(1)), + ] + .iter() + .collect(); + let map2: RegisterRuleMap = [ + (Register(3), RegisterRule::Offset(1)), + (Register(0), RegisterRule::SameValue), + ] + .iter() + .collect(); + assert_eq!(map1, map2); + assert_eq!(map2, map1); + + // Not equal. + let map3: RegisterRuleMap = [ + (Register(0), RegisterRule::SameValue), + (Register(2), RegisterRule::Offset(1)), + ] + .iter() + .collect(); + let map4: RegisterRuleMap = [ + (Register(3), RegisterRule::Offset(1)), + (Register(0), RegisterRule::SameValue), + ] + .iter() + .collect(); + assert!(map3 != map4); + assert!(map4 != map3); + + // One has undefined explicitly set, other implicitly has undefined. + let mut map5 = RegisterRuleMap::::default(); + map5.set(Register(0), RegisterRule::SameValue).unwrap(); + map5.set(Register(0), RegisterRule::Undefined).unwrap(); + let map6 = RegisterRuleMap::::default(); + assert_eq!(map5, map6); + assert_eq!(map6, map5); + } + + #[test] + fn iter_register_rules() { + let row = UnwindTableRow:: { + registers: [ + (Register(0), RegisterRule::SameValue), + (Register(1), RegisterRule::Offset(1)), + (Register(2), RegisterRule::ValOffset(2)), + ] + .iter() + .collect(), + ..Default::default() + }; + + let mut found0 = false; + let mut found1 = false; + let mut found2 = false; + + for &(register, ref rule) in row.registers() { + match register.0 { + 0 => { + assert!(!found0); + found0 = true; + assert_eq!(*rule, RegisterRule::SameValue); + } + 1 => { + assert!(!found1); + found1 = true; + assert_eq!(*rule, RegisterRule::Offset(1)); + } + 2 => { + assert!(!found2); + found2 = true; + assert_eq!(*rule, RegisterRule::ValOffset(2)); + } + x => panic!("Unexpected register rule: ({}, {:?})", x, rule), + } + } + + assert!(found0); + assert!(found1); + assert!(found2); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn size_of_unwind_ctx() { + use core::mem; + let size = mem::size_of::>(); + let max_size = 30968; + if size > max_size { + assert_eq!(size, max_size); + } + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn size_of_register_rule_map() { + use core::mem; + let size = mem::size_of::>(); + let max_size = 6152; + if size > max_size { + assert_eq!(size, max_size); + } + } + + #[test] + fn test_parse_pointer_encoding_ok() { + use crate::endianity::NativeEndian; + let expected = constants::DW_EH_PE_uleb128 | constants::DW_EH_PE_pcrel; + let input = [expected.0, 1, 2, 3, 4]; + let input = &mut EndianSlice::new(&input, NativeEndian); + assert_eq!(parse_pointer_encoding(input), Ok(expected)); + assert_eq!(*input, EndianSlice::new(&[1, 2, 3, 4], NativeEndian)); + } + + #[test] + fn test_parse_pointer_encoding_bad_encoding() { + use crate::endianity::NativeEndian; + let expected = + constants::DwEhPe((constants::DW_EH_PE_sdata8.0 + 1) | constants::DW_EH_PE_pcrel.0); + let input = [expected.0, 1, 2, 3, 4]; + let input = &mut EndianSlice::new(&input, NativeEndian); + assert_eq!( + Err(Error::UnknownPointerEncoding(expected)), + parse_pointer_encoding(input) + ); + } + + #[test] + fn test_parse_encoded_pointer_absptr() { + let encoding = constants::DW_EH_PE_absptr; + let expected_rest = [1, 2, 3, 4]; + + let input = Section::with_endian(Endian::Little) + .L32(0xf00d_f00d) + .append_bytes(&expected_rest); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Ok(Pointer::Direct(0xf00d_f00d)) + ); + assert_eq!(rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_encoded_pointer_pcrel() { + let encoding = constants::DW_EH_PE_pcrel; + let expected_rest = [1, 2, 3, 4]; + + let input = Section::with_endian(Endian::Little) + .append_repeated(0, 0x10) + .L32(0x1) + .append_bytes(&expected_rest); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input.range_from(0x10..); + + let parameters = PointerEncodingParameters { + bases: &BaseAddresses::default().set_eh_frame(0x100).eh_frame, + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Ok(Pointer::Direct(0x111)) + ); + assert_eq!(rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_encoded_pointer_pcrel_undefined() { + let encoding = constants::DW_EH_PE_pcrel; + + let input = Section::with_endian(Endian::Little).L32(0x1); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Err(Error::PcRelativePointerButSectionBaseIsUndefined) + ); + } + + #[test] + fn test_parse_encoded_pointer_textrel() { + let encoding = constants::DW_EH_PE_textrel; + let expected_rest = [1, 2, 3, 4]; + + let input = Section::with_endian(Endian::Little) + .L32(0x1) + .append_bytes(&expected_rest); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &BaseAddresses::default().set_text(0x10).eh_frame, + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Ok(Pointer::Direct(0x11)) + ); + assert_eq!(rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_encoded_pointer_textrel_undefined() { + let encoding = constants::DW_EH_PE_textrel; + + let input = Section::with_endian(Endian::Little).L32(0x1); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Err(Error::TextRelativePointerButTextBaseIsUndefined) + ); + } + + #[test] + fn test_parse_encoded_pointer_datarel() { + let encoding = constants::DW_EH_PE_datarel; + let expected_rest = [1, 2, 3, 4]; + + let input = Section::with_endian(Endian::Little) + .L32(0x1) + .append_bytes(&expected_rest); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &BaseAddresses::default().set_got(0x10).eh_frame, + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Ok(Pointer::Direct(0x11)) + ); + assert_eq!(rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_encoded_pointer_datarel_undefined() { + let encoding = constants::DW_EH_PE_datarel; + + let input = Section::with_endian(Endian::Little).L32(0x1); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Err(Error::DataRelativePointerButDataBaseIsUndefined) + ); + } + + #[test] + fn test_parse_encoded_pointer_funcrel() { + let encoding = constants::DW_EH_PE_funcrel; + let expected_rest = [1, 2, 3, 4]; + + let input = Section::with_endian(Endian::Little) + .L32(0x1) + .append_bytes(&expected_rest); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: Some(0x10), + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Ok(Pointer::Direct(0x11)) + ); + assert_eq!(rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_encoded_pointer_funcrel_undefined() { + let encoding = constants::DW_EH_PE_funcrel; + + let input = Section::with_endian(Endian::Little).L32(0x1); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Err(Error::FuncRelativePointerInBadContext) + ); + } + + #[test] + fn test_parse_encoded_pointer_uleb128() { + let encoding = constants::DW_EH_PE_absptr | constants::DW_EH_PE_uleb128; + let expected_rest = [1, 2, 3, 4]; + + let input = Section::with_endian(Endian::Little) + .uleb(0x12_3456) + .append_bytes(&expected_rest); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Ok(Pointer::Direct(0x12_3456)) + ); + assert_eq!(rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_encoded_pointer_udata2() { + let encoding = constants::DW_EH_PE_absptr | constants::DW_EH_PE_udata2; + let expected_rest = [1, 2, 3, 4]; + + let input = Section::with_endian(Endian::Little) + .L16(0x1234) + .append_bytes(&expected_rest); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Ok(Pointer::Direct(0x1234)) + ); + assert_eq!(rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_encoded_pointer_udata4() { + let encoding = constants::DW_EH_PE_absptr | constants::DW_EH_PE_udata4; + let expected_rest = [1, 2, 3, 4]; + + let input = Section::with_endian(Endian::Little) + .L32(0x1234_5678) + .append_bytes(&expected_rest); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Ok(Pointer::Direct(0x1234_5678)) + ); + assert_eq!(rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_encoded_pointer_udata8() { + let encoding = constants::DW_EH_PE_absptr | constants::DW_EH_PE_udata8; + let expected_rest = [1, 2, 3, 4]; + + let input = Section::with_endian(Endian::Little) + .L64(0x1234_5678_1234_5678) + .append_bytes(&expected_rest); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Ok(Pointer::Direct(0x1234_5678_1234_5678)) + ); + assert_eq!(rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_encoded_pointer_sleb128() { + let encoding = constants::DW_EH_PE_textrel | constants::DW_EH_PE_sleb128; + let expected_rest = [1, 2, 3, 4]; + + let input = Section::with_endian(Endian::Little) + .sleb(-0x1111) + .append_bytes(&expected_rest); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &BaseAddresses::default().set_text(0x1111_1111).eh_frame, + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Ok(Pointer::Direct(0x1111_0000)) + ); + assert_eq!(rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_encoded_pointer_sdata2() { + let encoding = constants::DW_EH_PE_absptr | constants::DW_EH_PE_sdata2; + let expected_rest = [1, 2, 3, 4]; + let expected = 0x111_i16; + + let input = Section::with_endian(Endian::Little) + .L16(expected as u16) + .append_bytes(&expected_rest); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Ok(Pointer::Direct(expected as u64)) + ); + assert_eq!(rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_encoded_pointer_sdata4() { + let encoding = constants::DW_EH_PE_absptr | constants::DW_EH_PE_sdata4; + let expected_rest = [1, 2, 3, 4]; + let expected = 0x111_1111_i32; + + let input = Section::with_endian(Endian::Little) + .L32(expected as u32) + .append_bytes(&expected_rest); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Ok(Pointer::Direct(expected as u64)) + ); + assert_eq!(rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_encoded_pointer_sdata8() { + let encoding = constants::DW_EH_PE_absptr | constants::DW_EH_PE_sdata8; + let expected_rest = [1, 2, 3, 4]; + let expected = -0x11_1111_1222_2222_i64; + + let input = Section::with_endian(Endian::Little) + .L64(expected as u64) + .append_bytes(&expected_rest); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Ok(Pointer::Direct(expected as u64)) + ); + assert_eq!(rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_encoded_pointer_omit() { + let encoding = constants::DW_EH_PE_omit; + + let input = Section::with_endian(Endian::Little).L32(0x1); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Err(Error::CannotParseOmitPointerEncoding) + ); + assert_eq!(rest, input); + } + + #[test] + fn test_parse_encoded_pointer_bad_encoding() { + let encoding = constants::DwEhPe(constants::DW_EH_PE_sdata8.0 + 1); + + let input = Section::with_endian(Endian::Little).L32(0x1); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Err(Error::UnknownPointerEncoding(encoding)) + ); + } + + #[test] + fn test_parse_encoded_pointer_aligned() { + // FIXME: support this encoding! + + let encoding = constants::DW_EH_PE_aligned; + + let input = Section::with_endian(Endian::Little).L32(0x1); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Err(Error::UnsupportedPointerEncoding) + ); + } + + #[test] + fn test_parse_encoded_pointer_indirect() { + let expected_rest = [1, 2, 3, 4]; + let encoding = constants::DW_EH_PE_indirect; + + let input = Section::with_endian(Endian::Little) + .L32(0x1234_5678) + .append_bytes(&expected_rest); + let input = input.get_contents().unwrap(); + let input = EndianSlice::new(&input, LittleEndian); + let mut rest = input; + + let parameters = PointerEncodingParameters { + bases: &SectionBaseAddresses::default(), + func_base: None, + address_size: 4, + section: &input, + }; + assert_eq!( + parse_encoded_pointer(encoding, ¶meters, &mut rest), + Ok(Pointer::Indirect(0x1234_5678)) + ); + assert_eq!(rest, EndianSlice::new(&expected_rest, LittleEndian)); + } + + #[test] + fn test_unwind_context_reuse() { + fn unwind_one(ctx: &mut UnwindContext, data: &[u8]) { + let debug_frame = DebugFrame::new(data, NativeEndian); + let bases = Default::default(); + let result = debug_frame.unwind_info_for_address( + &bases, + ctx, + 0xbadb_ad99, + DebugFrame::cie_from_offset, + ); + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), Error::NoUnwindInfoForAddress); + } + + // Use the same context for two different data lifetimes. + let mut ctx: UnwindContext = UnwindContext::new(); + { + let data1 = vec![]; + unwind_one(&mut ctx, &data1); + } + { + let data2 = vec![]; + unwind_one(&mut ctx, &data2); + } + } +} diff --git a/third_party/rust/gimli/src/read/dwarf.rs b/third_party/rust/gimli/src/read/dwarf.rs new file mode 100644 index 000000000000..c4a65aee3978 --- /dev/null +++ b/third_party/rust/gimli/src/read/dwarf.rs @@ -0,0 +1,1681 @@ +use alloc::string::String; +use alloc::sync::Arc; + +use crate::common::{ + DebugAddrBase, DebugAddrIndex, DebugInfoOffset, DebugLineStrOffset, DebugLocListsBase, + DebugLocListsIndex, DebugRngListsBase, DebugRngListsIndex, DebugStrOffset, DebugStrOffsetsBase, + DebugStrOffsetsIndex, DebugTypeSignature, DebugTypesOffset, DwarfFileType, DwoId, Encoding, + LocationListsOffset, RangeListsOffset, RawRangeListsOffset, SectionId, UnitSectionOffset, +}; +use crate::constants; +use crate::read::{ + Abbreviations, AbbreviationsCache, AbbreviationsCacheStrategy, AttributeValue, DebugAbbrev, + DebugAddr, DebugAranges, DebugCuIndex, DebugInfo, DebugInfoUnitHeadersIter, DebugLine, + DebugLineStr, DebugLoc, DebugLocLists, DebugRanges, DebugRngLists, DebugStr, DebugStrOffsets, + DebugTuIndex, DebugTypes, DebugTypesUnitHeadersIter, DebuggingInformationEntry, EntriesCursor, + EntriesRaw, EntriesTree, Error, IncompleteLineProgram, IndexSectionId, LocListIter, + LocationLists, Range, RangeLists, RawLocListIter, RawRngListIter, Reader, ReaderOffset, + ReaderOffsetId, Result, RngListIter, Section, UnitHeader, UnitIndex, UnitIndexSectionIterator, + UnitOffset, UnitType, +}; + +/// All of the commonly used DWARF sections. +/// +/// This is useful for storing sections when `T` does not implement `Reader`. +/// It can be used to create a `Dwarf` that references the data in `self`. +/// If `T` does implement `Reader`, then use `Dwarf` directly. +/// +/// ## Example Usage +/// +/// It can be useful to load DWARF sections into owned data structures, +/// such as `Vec`. However, we do not implement the `Reader` trait +/// for `Vec`, because it would be very inefficient, but this trait +/// is required for all of the methods that parse the DWARF data. +/// So we first load the DWARF sections into `Vec`s, and then use +/// `borrow` to create `Reader`s that reference the data. +/// +/// ```rust,no_run +/// # fn example() -> Result<(), gimli::Error> { +/// # let loader = |name| -> Result<_, gimli::Error> { unimplemented!() }; +/// // Read the DWARF sections into `Vec`s with whatever object loader you're using. +/// let dwarf_sections: gimli::DwarfSections> = gimli::DwarfSections::load(loader)?; +/// // Create references to the DWARF sections. +/// let dwarf: gimli::Dwarf<_> = dwarf_sections.borrow(|section| { +/// gimli::EndianSlice::new(§ion, gimli::LittleEndian) +/// }); +/// # unreachable!() +/// # } +/// ``` +#[derive(Debug)] +pub struct DwarfSections { + /// The `.debug_abbrev` section. + pub debug_abbrev: DebugAbbrev, + /// The `.debug_addr` section. + pub debug_addr: DebugAddr, + /// The `.debug_aranges` section. + pub debug_aranges: DebugAranges, + /// The `.debug_info` section. + pub debug_info: DebugInfo, + /// The `.debug_line` section. + pub debug_line: DebugLine, + /// The `.debug_line_str` section. + pub debug_line_str: DebugLineStr, + /// The `.debug_str` section. + pub debug_str: DebugStr, + /// The `.debug_str_offsets` section. + pub debug_str_offsets: DebugStrOffsets, + /// The `.debug_types` section. + pub debug_types: DebugTypes, + /// The `.debug_loc` section. + pub debug_loc: DebugLoc, + /// The `.debug_loclists` section. + pub debug_loclists: DebugLocLists, + /// The `.debug_ranges` section. + pub debug_ranges: DebugRanges, + /// The `.debug_rnglists` section. + pub debug_rnglists: DebugRngLists, +} + +impl DwarfSections { + /// Try to load the DWARF sections using the given loader function. + /// + /// `section` loads a DWARF section from the object file. + /// It should return an empty section if the section does not exist. + pub fn load(mut section: F) -> core::result::Result + where + F: FnMut(SectionId) -> core::result::Result, + { + Ok(DwarfSections { + // Section types are inferred. + debug_abbrev: Section::load(&mut section)?, + debug_addr: Section::load(&mut section)?, + debug_aranges: Section::load(&mut section)?, + debug_info: Section::load(&mut section)?, + debug_line: Section::load(&mut section)?, + debug_line_str: Section::load(&mut section)?, + debug_str: Section::load(&mut section)?, + debug_str_offsets: Section::load(&mut section)?, + debug_types: Section::load(&mut section)?, + debug_loc: Section::load(&mut section)?, + debug_loclists: Section::load(&mut section)?, + debug_ranges: Section::load(&mut section)?, + debug_rnglists: Section::load(&mut section)?, + }) + } + + /// Create a `Dwarf` structure that references the data in `self`. + pub fn borrow<'a, F, R>(&'a self, mut borrow: F) -> Dwarf + where + F: FnMut(&'a T) -> R, + { + Dwarf::from_sections(DwarfSections { + debug_abbrev: self.debug_abbrev.borrow(&mut borrow), + debug_addr: self.debug_addr.borrow(&mut borrow), + debug_aranges: self.debug_aranges.borrow(&mut borrow), + debug_info: self.debug_info.borrow(&mut borrow), + debug_line: self.debug_line.borrow(&mut borrow), + debug_line_str: self.debug_line_str.borrow(&mut borrow), + debug_str: self.debug_str.borrow(&mut borrow), + debug_str_offsets: self.debug_str_offsets.borrow(&mut borrow), + debug_types: self.debug_types.borrow(&mut borrow), + debug_loc: self.debug_loc.borrow(&mut borrow), + debug_loclists: self.debug_loclists.borrow(&mut borrow), + debug_ranges: self.debug_ranges.borrow(&mut borrow), + debug_rnglists: self.debug_rnglists.borrow(&mut borrow), + }) + } + + /// Create a `Dwarf` structure that references the data in `self` and `sup`. + /// + /// This is like `borrow`, but also includes the supplementary object file. + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// ## Example Usage + /// + /// ```rust,no_run + /// # fn example() -> Result<(), gimli::Error> { + /// # let loader = |name| -> Result<_, gimli::Error> { unimplemented!() }; + /// # let sup_loader = |name| -> Result<_, gimli::Error> { unimplemented!() }; + /// // Read the DWARF sections into `Vec`s with whatever object loader you're using. + /// let dwarf_sections: gimli::DwarfSections> = gimli::DwarfSections::load(loader)?; + /// let dwarf_sup_sections: gimli::DwarfSections> = gimli::DwarfSections::load(sup_loader)?; + /// // Create references to the DWARF sections. + /// let dwarf = dwarf_sections.borrow_with_sup(&dwarf_sup_sections, |section| { + /// gimli::EndianSlice::new(§ion, gimli::LittleEndian) + /// }); + /// # unreachable!() + /// # } + /// ``` + pub fn borrow_with_sup<'a, F, R>(&'a self, sup: &'a Self, mut borrow: F) -> Dwarf + where + F: FnMut(&'a T) -> R, + { + let mut dwarf = self.borrow(&mut borrow); + dwarf.set_sup(sup.borrow(&mut borrow)); + dwarf + } +} + +/// All of the commonly used DWARF sections, and other common information. +#[derive(Debug, Default)] +pub struct Dwarf { + /// The `.debug_abbrev` section. + pub debug_abbrev: DebugAbbrev, + + /// The `.debug_addr` section. + pub debug_addr: DebugAddr, + + /// The `.debug_aranges` section. + pub debug_aranges: DebugAranges, + + /// The `.debug_info` section. + pub debug_info: DebugInfo, + + /// The `.debug_line` section. + pub debug_line: DebugLine, + + /// The `.debug_line_str` section. + pub debug_line_str: DebugLineStr, + + /// The `.debug_str` section. + pub debug_str: DebugStr, + + /// The `.debug_str_offsets` section. + pub debug_str_offsets: DebugStrOffsets, + + /// The `.debug_types` section. + pub debug_types: DebugTypes, + + /// The location lists in the `.debug_loc` and `.debug_loclists` sections. + pub locations: LocationLists, + + /// The range lists in the `.debug_ranges` and `.debug_rnglists` sections. + pub ranges: RangeLists, + + /// The type of this file. + pub file_type: DwarfFileType, + + /// The DWARF sections for a supplementary object file. + pub sup: Option>>, + + /// A cache of previously parsed abbreviations for units in this file. + pub abbreviations_cache: AbbreviationsCache, +} + +impl Dwarf { + /// Try to load the DWARF sections using the given loader function. + /// + /// `section` loads a DWARF section from the object file. + /// It should return an empty section if the section does not exist. + /// + /// After loading, the user should set the `file_type` field and + /// call `load_sup` if required. + pub fn load(section: F) -> core::result::Result + where + F: FnMut(SectionId) -> core::result::Result, + { + let sections = DwarfSections::load(section)?; + Ok(Self::from_sections(sections)) + } + + /// Load the DWARF sections from the supplementary object file. + /// + /// `section` operates the same as for `load`. + /// + /// Sets `self.sup`, replacing any previous value. + pub fn load_sup(&mut self, section: F) -> core::result::Result<(), E> + where + F: FnMut(SectionId) -> core::result::Result, + { + self.set_sup(Self::load(section)?); + Ok(()) + } + + /// Create a `Dwarf` structure from the given sections. + /// + /// The caller should set the `file_type` and `sup` fields if required. + fn from_sections(sections: DwarfSections) -> Self { + Dwarf { + debug_abbrev: sections.debug_abbrev, + debug_addr: sections.debug_addr, + debug_aranges: sections.debug_aranges, + debug_info: sections.debug_info, + debug_line: sections.debug_line, + debug_line_str: sections.debug_line_str, + debug_str: sections.debug_str, + debug_str_offsets: sections.debug_str_offsets, + debug_types: sections.debug_types, + locations: LocationLists::new(sections.debug_loc, sections.debug_loclists), + ranges: RangeLists::new(sections.debug_ranges, sections.debug_rnglists), + file_type: DwarfFileType::Main, + sup: None, + abbreviations_cache: AbbreviationsCache::new(), + } + } + + /// Create a `Dwarf` structure that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// ## Example Usage + /// + /// It can be useful to load DWARF sections into owned data structures, + /// such as `Vec`. However, we do not implement the `Reader` trait + /// for `Vec`, because it would be very inefficient, but this trait + /// is required for all of the methods that parse the DWARF data. + /// So we first load the DWARF sections into `Vec`s, and then use + /// `borrow` to create `Reader`s that reference the data. + /// + /// ```rust,no_run + /// # fn example() -> Result<(), gimli::Error> { + /// # let loader = |name| -> Result<_, gimli::Error> { unimplemented!() }; + /// # let sup_loader = |name| -> Result<_, gimli::Error> { unimplemented!() }; + /// // Read the DWARF sections into `Vec`s with whatever object loader you're using. + /// let mut owned_dwarf: gimli::Dwarf> = gimli::Dwarf::load(loader)?; + /// owned_dwarf.load_sup(sup_loader)?; + /// // Create references to the DWARF sections. + /// let dwarf = owned_dwarf.borrow(|section| { + /// gimli::EndianSlice::new(§ion, gimli::LittleEndian) + /// }); + /// # unreachable!() + /// # } + /// ``` + #[deprecated(note = "use `DwarfSections::borrow` instead")] + pub fn borrow<'a, F, R>(&'a self, mut borrow: F) -> Dwarf + where + F: FnMut(&'a T) -> R, + { + Dwarf { + debug_abbrev: self.debug_abbrev.borrow(&mut borrow), + debug_addr: self.debug_addr.borrow(&mut borrow), + debug_aranges: self.debug_aranges.borrow(&mut borrow), + debug_info: self.debug_info.borrow(&mut borrow), + debug_line: self.debug_line.borrow(&mut borrow), + debug_line_str: self.debug_line_str.borrow(&mut borrow), + debug_str: self.debug_str.borrow(&mut borrow), + debug_str_offsets: self.debug_str_offsets.borrow(&mut borrow), + debug_types: self.debug_types.borrow(&mut borrow), + locations: self.locations.borrow(&mut borrow), + ranges: self.ranges.borrow(&mut borrow), + file_type: self.file_type, + sup: self.sup().map(|sup| Arc::new(sup.borrow(borrow))), + abbreviations_cache: AbbreviationsCache::new(), + } + } + + /// Store the DWARF sections for the supplementary object file. + pub fn set_sup(&mut self, sup: Dwarf) { + self.sup = Some(Arc::new(sup)); + } + + /// Return a reference to the DWARF sections for the supplementary object file. + pub fn sup(&self) -> Option<&Dwarf> { + self.sup.as_ref().map(Arc::as_ref) + } +} + +impl Dwarf { + /// Parse abbreviations and store them in the cache. + /// + /// This will iterate over the units in `self.debug_info` to determine the + /// abbreviations offsets. + /// + /// Errors during parsing abbreviations are also stored in the cache. + /// Errors during iterating over the units are ignored. + pub fn populate_abbreviations_cache(&mut self, strategy: AbbreviationsCacheStrategy) { + self.abbreviations_cache + .populate(strategy, &self.debug_abbrev, self.debug_info.units()); + } + + /// Iterate the unit headers in the `.debug_info` section. + /// + /// Can be [used with + /// `FallibleIterator`](./index.html#using-with-fallibleiterator). + #[inline] + pub fn units(&self) -> DebugInfoUnitHeadersIter { + self.debug_info.units() + } + + /// Construct a new `Unit` from the given unit header. + #[inline] + pub fn unit(&self, header: UnitHeader) -> Result> { + Unit::new(self, header) + } + + /// Iterate the type-unit headers in the `.debug_types` section. + /// + /// Can be [used with + /// `FallibleIterator`](./index.html#using-with-fallibleiterator). + #[inline] + pub fn type_units(&self) -> DebugTypesUnitHeadersIter { + self.debug_types.units() + } + + /// Parse the abbreviations for a compilation unit. + #[inline] + pub fn abbreviations(&self, unit: &UnitHeader) -> Result> { + self.abbreviations_cache + .get(&self.debug_abbrev, unit.debug_abbrev_offset()) + } + + /// Return the string offset at the given index. + #[inline] + pub fn string_offset( + &self, + unit: &Unit, + index: DebugStrOffsetsIndex, + ) -> Result> { + self.debug_str_offsets + .get_str_offset(unit.header.format(), unit.str_offsets_base, index) + } + + /// Return the string at the given offset in `.debug_str`. + #[inline] + pub fn string(&self, offset: DebugStrOffset) -> Result { + self.debug_str.get_str(offset) + } + + /// Return the string at the given offset in `.debug_line_str`. + #[inline] + pub fn line_string(&self, offset: DebugLineStrOffset) -> Result { + self.debug_line_str.get_str(offset) + } + + /// Return the string at the given offset in the `.debug_str` + /// in the supplementary object file. + #[inline] + pub fn sup_string(&self, offset: DebugStrOffset) -> Result { + if let Some(sup) = self.sup() { + sup.debug_str.get_str(offset) + } else { + Err(Error::ExpectedStringAttributeValue) + } + } + + /// Return an attribute value as a string slice. + /// + /// If the attribute value is one of: + /// + /// - an inline `DW_FORM_string` string + /// - a `DW_FORM_strp` reference to an offset into the `.debug_str` section + /// - a `DW_FORM_strp_sup` reference to an offset into a supplementary + /// object file + /// - a `DW_FORM_line_strp` reference to an offset into the `.debug_line_str` + /// section + /// - a `DW_FORM_strx` index into the `.debug_str_offsets` entries for the unit + /// + /// then return the attribute's string value. Returns an error if the attribute + /// value does not have a string form, or if a string form has an invalid value. + pub fn attr_string(&self, unit: &Unit, attr: AttributeValue) -> Result { + match attr { + AttributeValue::String(string) => Ok(string), + AttributeValue::DebugStrRef(offset) => self.string(offset), + AttributeValue::DebugStrRefSup(offset) => self.sup_string(offset), + AttributeValue::DebugLineStrRef(offset) => self.line_string(offset), + AttributeValue::DebugStrOffsetsIndex(index) => { + let offset = self.string_offset(unit, index)?; + self.string(offset) + } + _ => Err(Error::ExpectedStringAttributeValue), + } + } + + /// Return the address at the given index. + pub fn address(&self, unit: &Unit, index: DebugAddrIndex) -> Result { + self.debug_addr + .get_address(unit.encoding().address_size, unit.addr_base, index) + } + + /// Try to return an attribute value as an address. + /// + /// If the attribute value is one of: + /// + /// - a `DW_FORM_addr` + /// - a `DW_FORM_addrx` index into the `.debug_addr` entries for the unit + /// + /// then return the address. + /// Returns `None` for other forms. + pub fn attr_address(&self, unit: &Unit, attr: AttributeValue) -> Result> { + match attr { + AttributeValue::Addr(addr) => Ok(Some(addr)), + AttributeValue::DebugAddrIndex(index) => self.address(unit, index).map(Some), + _ => Ok(None), + } + } + + /// Return the range list offset for the given raw offset. + /// + /// This handles adding `DW_AT_GNU_ranges_base` if required. + pub fn ranges_offset_from_raw( + &self, + unit: &Unit, + offset: RawRangeListsOffset, + ) -> RangeListsOffset { + if self.file_type == DwarfFileType::Dwo && unit.header.version() < 5 { + RangeListsOffset(offset.0.wrapping_add(unit.rnglists_base.0)) + } else { + RangeListsOffset(offset.0) + } + } + + /// Return the range list offset at the given index. + pub fn ranges_offset( + &self, + unit: &Unit, + index: DebugRngListsIndex, + ) -> Result> { + self.ranges + .get_offset(unit.encoding(), unit.rnglists_base, index) + } + + /// Iterate over the `RangeListEntry`s starting at the given offset. + pub fn ranges( + &self, + unit: &Unit, + offset: RangeListsOffset, + ) -> Result> { + self.ranges.ranges( + offset, + unit.encoding(), + unit.low_pc, + &self.debug_addr, + unit.addr_base, + ) + } + + /// Iterate over the `RawRngListEntry`ies starting at the given offset. + pub fn raw_ranges( + &self, + unit: &Unit, + offset: RangeListsOffset, + ) -> Result> { + self.ranges.raw_ranges(offset, unit.encoding()) + } + + /// Try to return an attribute value as a range list offset. + /// + /// If the attribute value is one of: + /// + /// - a `DW_FORM_sec_offset` reference to the `.debug_ranges` or `.debug_rnglists` sections + /// - a `DW_FORM_rnglistx` index into the `.debug_rnglists` entries for the unit + /// + /// then return the range list offset of the range list. + /// Returns `None` for other forms. + pub fn attr_ranges_offset( + &self, + unit: &Unit, + attr: AttributeValue, + ) -> Result>> { + match attr { + AttributeValue::RangeListsRef(offset) => { + Ok(Some(self.ranges_offset_from_raw(unit, offset))) + } + AttributeValue::DebugRngListsIndex(index) => self.ranges_offset(unit, index).map(Some), + _ => Ok(None), + } + } + + /// Try to return an attribute value as a range list entry iterator. + /// + /// If the attribute value is one of: + /// + /// - a `DW_FORM_sec_offset` reference to the `.debug_ranges` or `.debug_rnglists` sections + /// - a `DW_FORM_rnglistx` index into the `.debug_rnglists` entries for the unit + /// + /// then return an iterator over the entries in the range list. + /// Returns `None` for other forms. + pub fn attr_ranges( + &self, + unit: &Unit, + attr: AttributeValue, + ) -> Result>> { + match self.attr_ranges_offset(unit, attr)? { + Some(offset) => Ok(Some(self.ranges(unit, offset)?)), + None => Ok(None), + } + } + + /// Return an iterator for the address ranges of a `DebuggingInformationEntry`. + /// + /// This uses `DW_AT_low_pc`, `DW_AT_high_pc` and `DW_AT_ranges`. + pub fn die_ranges( + &self, + unit: &Unit, + entry: &DebuggingInformationEntry<'_, '_, R>, + ) -> Result> { + let mut low_pc = None; + let mut high_pc = None; + let mut size = None; + let mut attrs = entry.attrs(); + while let Some(attr) = attrs.next()? { + match attr.name() { + constants::DW_AT_low_pc => { + low_pc = Some( + self.attr_address(unit, attr.value())? + .ok_or(Error::UnsupportedAttributeForm)?, + ); + } + constants::DW_AT_high_pc => match attr.value() { + AttributeValue::Udata(val) => size = Some(val), + attr => { + high_pc = Some( + self.attr_address(unit, attr)? + .ok_or(Error::UnsupportedAttributeForm)?, + ); + } + }, + constants::DW_AT_ranges => { + if let Some(list) = self.attr_ranges(unit, attr.value())? { + return Ok(RangeIter(RangeIterInner::List(list))); + } + } + _ => {} + } + } + let range = low_pc.and_then(|begin| { + let end = size.map(|size| begin + size).or(high_pc); + // TODO: perhaps return an error if `end` is `None` + end.map(|end| Range { begin, end }) + }); + Ok(RangeIter(RangeIterInner::Single(range))) + } + + /// Return an iterator for the address ranges of a `Unit`. + /// + /// This uses `DW_AT_low_pc`, `DW_AT_high_pc` and `DW_AT_ranges` of the + /// root `DebuggingInformationEntry`. + pub fn unit_ranges(&self, unit: &Unit) -> Result> { + let mut cursor = unit.header.entries(&unit.abbreviations); + cursor.next_dfs()?; + let root = cursor.current().ok_or(Error::MissingUnitDie)?; + self.die_ranges(unit, root) + } + + /// Return the location list offset at the given index. + pub fn locations_offset( + &self, + unit: &Unit, + index: DebugLocListsIndex, + ) -> Result> { + self.locations + .get_offset(unit.encoding(), unit.loclists_base, index) + } + + /// Iterate over the `LocationListEntry`s starting at the given offset. + pub fn locations( + &self, + unit: &Unit, + offset: LocationListsOffset, + ) -> Result> { + match self.file_type { + DwarfFileType::Main => self.locations.locations( + offset, + unit.encoding(), + unit.low_pc, + &self.debug_addr, + unit.addr_base, + ), + DwarfFileType::Dwo => self.locations.locations_dwo( + offset, + unit.encoding(), + unit.low_pc, + &self.debug_addr, + unit.addr_base, + ), + } + } + + /// Iterate over the raw `LocationListEntry`s starting at the given offset. + pub fn raw_locations( + &self, + unit: &Unit, + offset: LocationListsOffset, + ) -> Result> { + match self.file_type { + DwarfFileType::Main => self.locations.raw_locations(offset, unit.encoding()), + DwarfFileType::Dwo => self.locations.raw_locations_dwo(offset, unit.encoding()), + } + } + + /// Try to return an attribute value as a location list offset. + /// + /// If the attribute value is one of: + /// + /// - a `DW_FORM_sec_offset` reference to the `.debug_loc` or `.debug_loclists` sections + /// - a `DW_FORM_loclistx` index into the `.debug_loclists` entries for the unit + /// + /// then return the location list offset of the location list. + /// Returns `None` for other forms. + pub fn attr_locations_offset( + &self, + unit: &Unit, + attr: AttributeValue, + ) -> Result>> { + match attr { + AttributeValue::LocationListsRef(offset) => Ok(Some(offset)), + AttributeValue::DebugLocListsIndex(index) => { + self.locations_offset(unit, index).map(Some) + } + _ => Ok(None), + } + } + + /// Try to return an attribute value as a location list entry iterator. + /// + /// If the attribute value is one of: + /// + /// - a `DW_FORM_sec_offset` reference to the `.debug_loc` or `.debug_loclists` sections + /// - a `DW_FORM_loclistx` index into the `.debug_loclists` entries for the unit + /// + /// then return an iterator over the entries in the location list. + /// Returns `None` for other forms. + pub fn attr_locations( + &self, + unit: &Unit, + attr: AttributeValue, + ) -> Result>> { + match self.attr_locations_offset(unit, attr)? { + Some(offset) => Ok(Some(self.locations(unit, offset)?)), + None => Ok(None), + } + } + + /// Call `Reader::lookup_offset_id` for each section, and return the first match. + /// + /// The first element of the tuple is `true` for supplementary sections. + pub fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option<(bool, SectionId, R::Offset)> { + None.or_else(|| self.debug_abbrev.lookup_offset_id(id)) + .or_else(|| self.debug_addr.lookup_offset_id(id)) + .or_else(|| self.debug_aranges.lookup_offset_id(id)) + .or_else(|| self.debug_info.lookup_offset_id(id)) + .or_else(|| self.debug_line.lookup_offset_id(id)) + .or_else(|| self.debug_line_str.lookup_offset_id(id)) + .or_else(|| self.debug_str.lookup_offset_id(id)) + .or_else(|| self.debug_str_offsets.lookup_offset_id(id)) + .or_else(|| self.debug_types.lookup_offset_id(id)) + .or_else(|| self.locations.lookup_offset_id(id)) + .or_else(|| self.ranges.lookup_offset_id(id)) + .map(|(id, offset)| (false, id, offset)) + .or_else(|| { + self.sup() + .and_then(|sup| sup.lookup_offset_id(id)) + .map(|(_, id, offset)| (true, id, offset)) + }) + } + + /// Returns a string representation of the given error. + /// + /// This uses information from the DWARF sections to provide more information in some cases. + pub fn format_error(&self, err: Error) -> String { + #[allow(clippy::single_match)] + match err { + Error::UnexpectedEof(id) => match self.lookup_offset_id(id) { + Some((sup, section, offset)) => { + return format!( + "{} at {}{}+0x{:x}", + err, + section.name(), + if sup { "(sup)" } else { "" }, + offset.into_u64(), + ); + } + None => {} + }, + _ => {} + } + err.description().into() + } +} + +impl Dwarf { + /// Assuming `self` was loaded from a .dwo, take the appropriate + /// sections from `parent` (which contains the skeleton unit for this + /// dwo) such as `.debug_addr` and merge them into this `Dwarf`. + pub fn make_dwo(&mut self, parent: &Dwarf) { + self.file_type = DwarfFileType::Dwo; + // These sections are always taken from the parent file and not the dwo. + self.debug_addr = parent.debug_addr.clone(); + // .debug_rnglists comes from the DWO, .debug_ranges comes from the + // parent file. + self.ranges + .set_debug_ranges(parent.ranges.debug_ranges().clone()); + self.sup = parent.sup.clone(); + } +} + +/// The sections from a `.dwp` file. +/// +/// This is useful for storing sections when `T` does not implement `Reader`. +/// It can be used to create a `DwarfPackage` that references the data in `self`. +/// If `T` does implement `Reader`, then use `DwarfPackage` directly. +/// +/// ## Example Usage +/// +/// It can be useful to load DWARF sections into owned data structures, +/// such as `Vec`. However, we do not implement the `Reader` trait +/// for `Vec`, because it would be very inefficient, but this trait +/// is required for all of the methods that parse the DWARF data. +/// So we first load the DWARF sections into `Vec`s, and then use +/// `borrow` to create `Reader`s that reference the data. +/// +/// ```rust,no_run +/// # fn example() -> Result<(), gimli::Error> { +/// # let loader = |name| -> Result<_, gimli::Error> { unimplemented!() }; +/// // Read the DWARF sections into `Vec`s with whatever object loader you're using. +/// let dwp_sections: gimli::DwarfPackageSections> = gimli::DwarfPackageSections::load(loader)?; +/// // Create references to the DWARF sections. +/// let dwp: gimli::DwarfPackage<_> = dwp_sections.borrow( +/// |section| gimli::EndianSlice::new(§ion, gimli::LittleEndian), +/// gimli::EndianSlice::new(&[], gimli::LittleEndian), +/// )?; +/// # unreachable!() +/// # } +/// ``` +#[derive(Debug)] +pub struct DwarfPackageSections { + /// The `.debug_cu_index` section. + pub cu_index: DebugCuIndex, + /// The `.debug_tu_index` section. + pub tu_index: DebugTuIndex, + /// The `.debug_abbrev.dwo` section. + pub debug_abbrev: DebugAbbrev, + /// The `.debug_info.dwo` section. + pub debug_info: DebugInfo, + /// The `.debug_line.dwo` section. + pub debug_line: DebugLine, + /// The `.debug_str.dwo` section. + pub debug_str: DebugStr, + /// The `.debug_str_offsets.dwo` section. + pub debug_str_offsets: DebugStrOffsets, + /// The `.debug_loc.dwo` section. + /// + /// Only present when using GNU split-dwarf extension to DWARF 4. + pub debug_loc: DebugLoc, + /// The `.debug_loclists.dwo` section. + pub debug_loclists: DebugLocLists, + /// The `.debug_rnglists.dwo` section. + pub debug_rnglists: DebugRngLists, + /// The `.debug_types.dwo` section. + /// + /// Only present when using GNU split-dwarf extension to DWARF 4. + pub debug_types: DebugTypes, +} + +impl DwarfPackageSections { + /// Try to load the `.dwp` sections using the given loader function. + /// + /// `section` loads a DWARF section from the object file. + /// It should return an empty section if the section does not exist. + pub fn load(mut section: F) -> core::result::Result + where + F: FnMut(SectionId) -> core::result::Result, + E: From, + { + Ok(DwarfPackageSections { + // Section types are inferred. + cu_index: Section::load(&mut section)?, + tu_index: Section::load(&mut section)?, + debug_abbrev: Section::load(&mut section)?, + debug_info: Section::load(&mut section)?, + debug_line: Section::load(&mut section)?, + debug_str: Section::load(&mut section)?, + debug_str_offsets: Section::load(&mut section)?, + debug_loc: Section::load(&mut section)?, + debug_loclists: Section::load(&mut section)?, + debug_rnglists: Section::load(&mut section)?, + debug_types: Section::load(&mut section)?, + }) + } + + /// Create a `DwarfPackage` structure that references the data in `self`. + pub fn borrow<'a, F, R>(&'a self, mut borrow: F, empty: R) -> Result> + where + F: FnMut(&'a T) -> R, + R: Reader, + { + DwarfPackage::from_sections( + DwarfPackageSections { + cu_index: self.cu_index.borrow(&mut borrow), + tu_index: self.tu_index.borrow(&mut borrow), + debug_abbrev: self.debug_abbrev.borrow(&mut borrow), + debug_info: self.debug_info.borrow(&mut borrow), + debug_line: self.debug_line.borrow(&mut borrow), + debug_str: self.debug_str.borrow(&mut borrow), + debug_str_offsets: self.debug_str_offsets.borrow(&mut borrow), + debug_loc: self.debug_loc.borrow(&mut borrow), + debug_loclists: self.debug_loclists.borrow(&mut borrow), + debug_rnglists: self.debug_rnglists.borrow(&mut borrow), + debug_types: self.debug_types.borrow(&mut borrow), + }, + empty, + ) + } +} + +/// The sections from a `.dwp` file, with parsed indices. +#[derive(Debug)] +pub struct DwarfPackage { + /// The compilation unit index in the `.debug_cu_index` section. + pub cu_index: UnitIndex, + + /// The type unit index in the `.debug_tu_index` section. + pub tu_index: UnitIndex, + + /// The `.debug_abbrev.dwo` section. + pub debug_abbrev: DebugAbbrev, + + /// The `.debug_info.dwo` section. + pub debug_info: DebugInfo, + + /// The `.debug_line.dwo` section. + pub debug_line: DebugLine, + + /// The `.debug_str.dwo` section. + pub debug_str: DebugStr, + + /// The `.debug_str_offsets.dwo` section. + pub debug_str_offsets: DebugStrOffsets, + + /// The `.debug_loc.dwo` section. + /// + /// Only present when using GNU split-dwarf extension to DWARF 4. + pub debug_loc: DebugLoc, + + /// The `.debug_loclists.dwo` section. + pub debug_loclists: DebugLocLists, + + /// The `.debug_rnglists.dwo` section. + pub debug_rnglists: DebugRngLists, + + /// The `.debug_types.dwo` section. + /// + /// Only present when using GNU split-dwarf extension to DWARF 4. + pub debug_types: DebugTypes, + + /// An empty section. + /// + /// Used when creating `Dwarf`. + pub empty: R, +} + +impl DwarfPackage { + /// Try to load the `.dwp` sections using the given loader function. + /// + /// `section` loads a DWARF section from the object file. + /// It should return an empty section if the section does not exist. + pub fn load(section: F, empty: R) -> core::result::Result + where + F: FnMut(SectionId) -> core::result::Result, + E: From, + { + let sections = DwarfPackageSections::load(section)?; + Ok(Self::from_sections(sections, empty)?) + } + + /// Create a `DwarfPackage` structure from the given sections. + fn from_sections(sections: DwarfPackageSections, empty: R) -> Result { + Ok(DwarfPackage { + cu_index: sections.cu_index.index()?, + tu_index: sections.tu_index.index()?, + debug_abbrev: sections.debug_abbrev, + debug_info: sections.debug_info, + debug_line: sections.debug_line, + debug_str: sections.debug_str, + debug_str_offsets: sections.debug_str_offsets, + debug_loc: sections.debug_loc, + debug_loclists: sections.debug_loclists, + debug_rnglists: sections.debug_rnglists, + debug_types: sections.debug_types, + empty, + }) + } + + /// Find the compilation unit with the given DWO identifier and return its section + /// contributions. + /// + /// ## Example Usage + /// + /// ```rust,no_run + /// # fn example( + /// # dwarf: &gimli::Dwarf, + /// # dwp: &gimli::DwarfPackage, + /// # dwo_id: gimli::DwoId, + /// # ) -> Result<(), gimli::Error> { + /// if let Some(dwo) = dwp.find_cu(dwo_id, dwarf)? { + /// let dwo_header = dwo.units().next()?.expect("DWO should have one unit"); + /// let dwo_unit = dwo.unit(dwo_header)?; + /// // Do something with `dwo_unit`. + /// } + /// # unreachable!() + /// # } + pub fn find_cu(&self, id: DwoId, parent: &Dwarf) -> Result>> { + let row = match self.cu_index.find(id.0) { + Some(row) => row, + None => return Ok(None), + }; + self.cu_sections(row, parent).map(Some) + } + + /// Find the type unit with the given type signature and return its section + /// contributions. + pub fn find_tu( + &self, + signature: DebugTypeSignature, + parent: &Dwarf, + ) -> Result>> { + let row = match self.tu_index.find(signature.0) { + Some(row) => row, + None => return Ok(None), + }; + self.tu_sections(row, parent).map(Some) + } + + /// Return the section contributions of the compilation unit at the given index. + /// + /// The index must be in the range `1..cu_index.unit_count`. + /// + /// This function should only be needed by low level parsers. + pub fn cu_sections(&self, index: u32, parent: &Dwarf) -> Result> { + self.sections(self.cu_index.sections(index)?, parent) + } + + /// Return the section contributions of the compilation unit at the given index. + /// + /// The index must be in the range `1..tu_index.unit_count`. + /// + /// This function should only be needed by low level parsers. + pub fn tu_sections(&self, index: u32, parent: &Dwarf) -> Result> { + self.sections(self.tu_index.sections(index)?, parent) + } + + /// Return the section contributions of a unit. + /// + /// This function should only be needed by low level parsers. + pub fn sections( + &self, + sections: UnitIndexSectionIterator<'_, R>, + parent: &Dwarf, + ) -> Result> { + let mut abbrev_offset = 0; + let mut abbrev_size = 0; + let mut info_offset = 0; + let mut info_size = 0; + let mut line_offset = 0; + let mut line_size = 0; + let mut loc_offset = 0; + let mut loc_size = 0; + let mut loclists_offset = 0; + let mut loclists_size = 0; + let mut str_offsets_offset = 0; + let mut str_offsets_size = 0; + let mut rnglists_offset = 0; + let mut rnglists_size = 0; + let mut types_offset = 0; + let mut types_size = 0; + for section in sections { + match section.section { + IndexSectionId::DebugAbbrev => { + abbrev_offset = section.offset; + abbrev_size = section.size; + } + IndexSectionId::DebugInfo => { + info_offset = section.offset; + info_size = section.size; + } + IndexSectionId::DebugLine => { + line_offset = section.offset; + line_size = section.size; + } + IndexSectionId::DebugLoc => { + loc_offset = section.offset; + loc_size = section.size; + } + IndexSectionId::DebugLocLists => { + loclists_offset = section.offset; + loclists_size = section.size; + } + IndexSectionId::DebugStrOffsets => { + str_offsets_offset = section.offset; + str_offsets_size = section.size; + } + IndexSectionId::DebugRngLists => { + rnglists_offset = section.offset; + rnglists_size = section.size; + } + IndexSectionId::DebugTypes => { + types_offset = section.offset; + types_size = section.size; + } + IndexSectionId::DebugMacro | IndexSectionId::DebugMacinfo => { + // These are valid but we can't parse these yet. + } + } + } + + let debug_abbrev = self.debug_abbrev.dwp_range(abbrev_offset, abbrev_size)?; + let debug_info = self.debug_info.dwp_range(info_offset, info_size)?; + let debug_line = self.debug_line.dwp_range(line_offset, line_size)?; + let debug_loc = self.debug_loc.dwp_range(loc_offset, loc_size)?; + let debug_loclists = self + .debug_loclists + .dwp_range(loclists_offset, loclists_size)?; + let debug_str_offsets = self + .debug_str_offsets + .dwp_range(str_offsets_offset, str_offsets_size)?; + let debug_rnglists = self + .debug_rnglists + .dwp_range(rnglists_offset, rnglists_size)?; + let debug_types = self.debug_types.dwp_range(types_offset, types_size)?; + + let debug_str = self.debug_str.clone(); + + let debug_addr = parent.debug_addr.clone(); + let debug_ranges = parent.ranges.debug_ranges().clone(); + + let debug_aranges = self.empty.clone().into(); + let debug_line_str = self.empty.clone().into(); + + Ok(Dwarf { + debug_abbrev, + debug_addr, + debug_aranges, + debug_info, + debug_line, + debug_line_str, + debug_str, + debug_str_offsets, + debug_types, + locations: LocationLists::new(debug_loc, debug_loclists), + ranges: RangeLists::new(debug_ranges, debug_rnglists), + file_type: DwarfFileType::Dwo, + sup: parent.sup.clone(), + abbreviations_cache: AbbreviationsCache::new(), + }) + } +} + +/// All of the commonly used information for a unit in the `.debug_info` or `.debug_types` +/// sections. +#[derive(Debug)] +pub struct Unit::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + /// The header of the unit. + pub header: UnitHeader, + + /// The parsed abbreviations for the unit. + pub abbreviations: Arc, + + /// The `DW_AT_name` attribute of the unit. + pub name: Option, + + /// The `DW_AT_comp_dir` attribute of the unit. + pub comp_dir: Option, + + /// The `DW_AT_low_pc` attribute of the unit. Defaults to 0. + pub low_pc: u64, + + /// The `DW_AT_str_offsets_base` attribute of the unit. Defaults to 0. + pub str_offsets_base: DebugStrOffsetsBase, + + /// The `DW_AT_addr_base` attribute of the unit. Defaults to 0. + pub addr_base: DebugAddrBase, + + /// The `DW_AT_loclists_base` attribute of the unit. Defaults to 0. + pub loclists_base: DebugLocListsBase, + + /// The `DW_AT_rnglists_base` attribute of the unit. Defaults to 0. + pub rnglists_base: DebugRngListsBase, + + /// The line number program of the unit. + pub line_program: Option>, + + /// The DWO ID of a skeleton unit or split compilation unit. + pub dwo_id: Option, +} + +impl Unit { + /// Construct a new `Unit` from the given unit header. + #[inline] + pub fn new(dwarf: &Dwarf, header: UnitHeader) -> Result { + let abbreviations = dwarf.abbreviations(&header)?; + Self::new_with_abbreviations(dwarf, header, abbreviations) + } + + /// Construct a new `Unit` from the given unit header and abbreviations. + /// + /// The abbreviations for this call can be obtained using `dwarf.abbreviations(&header)`. + /// The caller may implement caching to reuse the `Abbreviations` across units with the + /// same `header.debug_abbrev_offset()` value. + #[inline] + pub fn new_with_abbreviations( + dwarf: &Dwarf, + header: UnitHeader, + abbreviations: Arc, + ) -> Result { + let mut unit = Unit { + abbreviations, + name: None, + comp_dir: None, + low_pc: 0, + str_offsets_base: DebugStrOffsetsBase::default_for_encoding_and_file( + header.encoding(), + dwarf.file_type, + ), + // NB: Because the .debug_addr section never lives in a .dwo, we can assume its base is always 0 or provided. + addr_base: DebugAddrBase(R::Offset::from_u8(0)), + loclists_base: DebugLocListsBase::default_for_encoding_and_file( + header.encoding(), + dwarf.file_type, + ), + rnglists_base: DebugRngListsBase::default_for_encoding_and_file( + header.encoding(), + dwarf.file_type, + ), + line_program: None, + dwo_id: match header.type_() { + UnitType::Skeleton(dwo_id) | UnitType::SplitCompilation(dwo_id) => Some(dwo_id), + _ => None, + }, + header, + }; + let mut name = None; + let mut comp_dir = None; + let mut line_program_offset = None; + let mut low_pc_attr = None; + + { + let mut cursor = unit.header.entries(&unit.abbreviations); + cursor.next_dfs()?; + let root = cursor.current().ok_or(Error::MissingUnitDie)?; + let mut attrs = root.attrs(); + while let Some(attr) = attrs.next()? { + match attr.name() { + constants::DW_AT_name => { + name = Some(attr.value()); + } + constants::DW_AT_comp_dir => { + comp_dir = Some(attr.value()); + } + constants::DW_AT_low_pc => { + low_pc_attr = Some(attr.value()); + } + constants::DW_AT_stmt_list => { + if let AttributeValue::DebugLineRef(offset) = attr.value() { + line_program_offset = Some(offset); + } + } + constants::DW_AT_str_offsets_base => { + if let AttributeValue::DebugStrOffsetsBase(base) = attr.value() { + unit.str_offsets_base = base; + } + } + constants::DW_AT_addr_base | constants::DW_AT_GNU_addr_base => { + if let AttributeValue::DebugAddrBase(base) = attr.value() { + unit.addr_base = base; + } + } + constants::DW_AT_loclists_base => { + if let AttributeValue::DebugLocListsBase(base) = attr.value() { + unit.loclists_base = base; + } + } + constants::DW_AT_rnglists_base | constants::DW_AT_GNU_ranges_base => { + if let AttributeValue::DebugRngListsBase(base) = attr.value() { + unit.rnglists_base = base; + } + } + constants::DW_AT_GNU_dwo_id => { + if unit.dwo_id.is_none() { + if let AttributeValue::DwoId(dwo_id) = attr.value() { + unit.dwo_id = Some(dwo_id); + } + } + } + _ => {} + } + } + } + + unit.name = match name { + Some(val) => dwarf.attr_string(&unit, val).ok(), + None => None, + }; + unit.comp_dir = match comp_dir { + Some(val) => dwarf.attr_string(&unit, val).ok(), + None => None, + }; + unit.line_program = match line_program_offset { + Some(offset) => Some(dwarf.debug_line.program( + offset, + unit.header.address_size(), + unit.comp_dir.clone(), + unit.name.clone(), + )?), + None => None, + }; + if let Some(low_pc_attr) = low_pc_attr { + if let Some(addr) = dwarf.attr_address(&unit, low_pc_attr)? { + unit.low_pc = addr; + } + } + Ok(unit) + } + + /// Return a reference to this unit and its associated `Dwarf`. + pub fn unit_ref<'a>(&'a self, dwarf: &'a Dwarf) -> UnitRef<'a, R> { + UnitRef::new(dwarf, self) + } + + /// Return the encoding parameters for this unit. + #[inline] + pub fn encoding(&self) -> Encoding { + self.header.encoding() + } + + /// Read the `DebuggingInformationEntry` at the given offset. + pub fn entry( + &self, + offset: UnitOffset, + ) -> Result> { + self.header.entry(&self.abbreviations, offset) + } + + /// Navigate this unit's `DebuggingInformationEntry`s. + #[inline] + pub fn entries(&self) -> EntriesCursor<'_, '_, R> { + self.header.entries(&self.abbreviations) + } + + /// Navigate this unit's `DebuggingInformationEntry`s + /// starting at the given offset. + #[inline] + pub fn entries_at_offset( + &self, + offset: UnitOffset, + ) -> Result> { + self.header.entries_at_offset(&self.abbreviations, offset) + } + + /// Navigate this unit's `DebuggingInformationEntry`s as a tree + /// starting at the given offset. + #[inline] + pub fn entries_tree( + &self, + offset: Option>, + ) -> Result> { + self.header.entries_tree(&self.abbreviations, offset) + } + + /// Read the raw data that defines the Debugging Information Entries. + #[inline] + pub fn entries_raw( + &self, + offset: Option>, + ) -> Result> { + self.header.entries_raw(&self.abbreviations, offset) + } + + /// Copy attributes that are subject to relocation from another unit. This is intended + /// to be used to copy attributes from a skeleton compilation unit to the corresponding + /// split compilation unit. + pub fn copy_relocated_attributes(&mut self, other: &Unit) { + self.low_pc = other.low_pc; + self.addr_base = other.addr_base; + if self.header.version() < 5 { + self.rnglists_base = other.rnglists_base; + } + } + + /// Find the dwo name (if any) for this unit, automatically handling the differences + /// between the standardized DWARF 5 split DWARF format and the pre-DWARF 5 GNU + /// extension. + /// + /// The returned value is relative to this unit's `comp_dir`. + pub fn dwo_name(&self) -> Result>> { + let mut entries = self.entries(); + entries.next_entry()?; + let entry = entries.current().ok_or(Error::MissingUnitDie)?; + if self.header.version() < 5 { + entry.attr_value(constants::DW_AT_GNU_dwo_name) + } else { + entry.attr_value(constants::DW_AT_dwo_name) + } + } +} + +/// A reference to a `Unit` and its associated `Dwarf`. +/// +/// These often need to be passed around together, so this struct makes that easier. +/// +/// It implements `Deref` to `Unit`, so you can use it as if it were a `Unit`. +/// It also implements methods that correspond to methods on `Dwarf` that take a `Unit`. +#[derive(Debug)] +pub struct UnitRef<'a, R: Reader> { + /// The `Dwarf` that contains the unit. + pub dwarf: &'a Dwarf, + + /// The `Unit` being referenced. + pub unit: &'a Unit, +} + +impl<'a, R: Reader> Clone for UnitRef<'a, R> { + fn clone(&self) -> Self { + *self + } +} + +impl<'a, R: Reader> Copy for UnitRef<'a, R> {} + +impl<'a, R: Reader> core::ops::Deref for UnitRef<'a, R> { + type Target = Unit; + + fn deref(&self) -> &Self::Target { + self.unit + } +} + +impl<'a, R: Reader> UnitRef<'a, R> { + /// Construct a new `UnitRef` from a `Dwarf` and a `Unit`. + pub fn new(dwarf: &'a Dwarf, unit: &'a Unit) -> Self { + UnitRef { dwarf, unit } + } + + /// Return the string offset at the given index. + #[inline] + pub fn string_offset( + &self, + index: DebugStrOffsetsIndex, + ) -> Result> { + self.dwarf.string_offset(self.unit, index) + } + + /// Return the string at the given offset in `.debug_str`. + #[inline] + pub fn string(&self, offset: DebugStrOffset) -> Result { + self.dwarf.string(offset) + } + + /// Return the string at the given offset in `.debug_line_str`. + #[inline] + pub fn line_string(&self, offset: DebugLineStrOffset) -> Result { + self.dwarf.line_string(offset) + } + + /// Return the string at the given offset in the `.debug_str` + /// in the supplementary object file. + #[inline] + pub fn sup_string(&self, offset: DebugStrOffset) -> Result { + self.dwarf.sup_string(offset) + } + + /// Return an attribute value as a string slice. + /// + /// See [`Dwarf::attr_string`] for more information. + pub fn attr_string(&self, attr: AttributeValue) -> Result { + self.dwarf.attr_string(self.unit, attr) + } + + /// Return the address at the given index. + pub fn address(&self, index: DebugAddrIndex) -> Result { + self.dwarf.address(self.unit, index) + } + + /// Try to return an attribute value as an address. + /// + /// See [`Dwarf::attr_address`] for more information. + pub fn attr_address(&self, attr: AttributeValue) -> Result> { + self.dwarf.attr_address(self.unit, attr) + } + + /// Return the range list offset for the given raw offset. + /// + /// This handles adding `DW_AT_GNU_ranges_base` if required. + pub fn ranges_offset_from_raw( + &self, + offset: RawRangeListsOffset, + ) -> RangeListsOffset { + self.dwarf.ranges_offset_from_raw(self.unit, offset) + } + + /// Return the range list offset at the given index. + pub fn ranges_offset( + &self, + index: DebugRngListsIndex, + ) -> Result> { + self.dwarf.ranges_offset(self.unit, index) + } + + /// Iterate over the `RangeListEntry`s starting at the given offset. + pub fn ranges(&self, offset: RangeListsOffset) -> Result> { + self.dwarf.ranges(self.unit, offset) + } + + /// Iterate over the `RawRngListEntry`ies starting at the given offset. + pub fn raw_ranges(&self, offset: RangeListsOffset) -> Result> { + self.dwarf.raw_ranges(self.unit, offset) + } + + /// Try to return an attribute value as a range list offset. + /// + /// See [`Dwarf::attr_ranges_offset`] for more information. + pub fn attr_ranges_offset( + &self, + attr: AttributeValue, + ) -> Result>> { + self.dwarf.attr_ranges_offset(self.unit, attr) + } + + /// Try to return an attribute value as a range list entry iterator. + /// + /// See [`Dwarf::attr_ranges`] for more information. + pub fn attr_ranges(&self, attr: AttributeValue) -> Result>> { + self.dwarf.attr_ranges(self.unit, attr) + } + + /// Return an iterator for the address ranges of a `DebuggingInformationEntry`. + /// + /// This uses `DW_AT_low_pc`, `DW_AT_high_pc` and `DW_AT_ranges`. + pub fn die_ranges(&self, entry: &DebuggingInformationEntry<'_, '_, R>) -> Result> { + self.dwarf.die_ranges(self.unit, entry) + } + + /// Return an iterator for the address ranges of the `Unit`. + /// + /// This uses `DW_AT_low_pc`, `DW_AT_high_pc` and `DW_AT_ranges` of the + /// root `DebuggingInformationEntry`. + pub fn unit_ranges(&self) -> Result> { + self.dwarf.unit_ranges(self.unit) + } + + /// Return the location list offset at the given index. + pub fn locations_offset( + &self, + index: DebugLocListsIndex, + ) -> Result> { + self.dwarf.locations_offset(self.unit, index) + } + + /// Iterate over the `LocationListEntry`s starting at the given offset. + pub fn locations(&self, offset: LocationListsOffset) -> Result> { + self.dwarf.locations(self.unit, offset) + } + + /// Iterate over the raw `LocationListEntry`s starting at the given offset. + pub fn raw_locations( + &self, + offset: LocationListsOffset, + ) -> Result> { + self.dwarf.raw_locations(self.unit, offset) + } + + /// Try to return an attribute value as a location list offset. + /// + /// See [`Dwarf::attr_locations_offset`] for more information. + pub fn attr_locations_offset( + &self, + attr: AttributeValue, + ) -> Result>> { + self.dwarf.attr_locations_offset(self.unit, attr) + } + + /// Try to return an attribute value as a location list entry iterator. + /// + /// See [`Dwarf::attr_locations`] for more information. + pub fn attr_locations(&self, attr: AttributeValue) -> Result>> { + self.dwarf.attr_locations(self.unit, attr) + } +} + +impl UnitSectionOffset { + /// Convert an offset to be relative to the start of the given unit, + /// instead of relative to the start of the section. + /// + /// Returns `None` if the offset is not within the unit entries. + pub fn to_unit_offset(&self, unit: &Unit) -> Option> + where + R: Reader, + { + let (offset, unit_offset) = match (self, unit.header.offset()) { + ( + UnitSectionOffset::DebugInfoOffset(offset), + UnitSectionOffset::DebugInfoOffset(unit_offset), + ) => (offset.0, unit_offset.0), + ( + UnitSectionOffset::DebugTypesOffset(offset), + UnitSectionOffset::DebugTypesOffset(unit_offset), + ) => (offset.0, unit_offset.0), + _ => return None, + }; + let offset = match offset.checked_sub(unit_offset) { + Some(offset) => UnitOffset(offset), + None => return None, + }; + if !unit.header.is_valid_offset(offset) { + return None; + } + Some(offset) + } +} + +impl UnitOffset { + /// Convert an offset to be relative to the start of the .debug_info section, + /// instead of relative to the start of the given compilation unit. + /// + /// Does not check that the offset is valid. + pub fn to_unit_section_offset(&self, unit: &Unit) -> UnitSectionOffset + where + R: Reader, + { + match unit.header.offset() { + UnitSectionOffset::DebugInfoOffset(unit_offset) => { + DebugInfoOffset(unit_offset.0 + self.0).into() + } + UnitSectionOffset::DebugTypesOffset(unit_offset) => { + DebugTypesOffset(unit_offset.0 + self.0).into() + } + } + } +} + +/// An iterator for the address ranges of a `DebuggingInformationEntry`. +/// +/// Returned by `Dwarf::die_ranges` and `Dwarf::unit_ranges`. +#[derive(Debug)] +pub struct RangeIter(RangeIterInner); + +#[derive(Debug)] +enum RangeIterInner { + Single(Option), + List(RngListIter), +} + +impl Default for RangeIter { + fn default() -> Self { + RangeIter(RangeIterInner::Single(None)) + } +} + +impl RangeIter { + /// Advance the iterator to the next range. + pub fn next(&mut self) -> Result> { + match self.0 { + RangeIterInner::Single(ref mut range) => Ok(range.take()), + RangeIterInner::List(ref mut list) => list.next(), + } + } +} + +#[cfg(feature = "fallible-iterator")] +impl fallible_iterator::FallibleIterator for RangeIter { + type Item = Range; + type Error = Error; + + #[inline] + fn next(&mut self) -> ::core::result::Result, Self::Error> { + RangeIter::next(self) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::read::EndianSlice; + use crate::{Endianity, LittleEndian}; + + /// Ensure that `Dwarf` is covariant wrt R. + #[test] + fn test_dwarf_variance() { + /// This only needs to compile. + fn _f<'a: 'b, 'b, E: Endianity>(x: Dwarf>) -> Dwarf> { + x + } + } + + /// Ensure that `Unit` is covariant wrt R. + #[test] + fn test_dwarf_unit_variance() { + /// This only needs to compile. + fn _f<'a: 'b, 'b, E: Endianity>(x: Unit>) -> Unit> { + x + } + } + + #[test] + fn test_send() { + fn assert_is_send() {} + assert_is_send::>>(); + assert_is_send::>>(); + } + + #[test] + fn test_format_error() { + let dwarf_sections = DwarfSections::load(|_| -> Result<_> { Ok(vec![1, 2]) }).unwrap(); + let sup_sections = DwarfSections::load(|_| -> Result<_> { Ok(vec![1, 2]) }).unwrap(); + let dwarf = dwarf_sections.borrow_with_sup(&sup_sections, |section| { + EndianSlice::new(section, LittleEndian) + }); + + match dwarf.debug_str.get_str(DebugStrOffset(1)) { + Ok(r) => panic!("Unexpected str {:?}", r), + Err(e) => { + assert_eq!( + dwarf.format_error(e), + "Hit the end of input before it was expected at .debug_str+0x1" + ); + } + } + match dwarf.sup().unwrap().debug_str.get_str(DebugStrOffset(1)) { + Ok(r) => panic!("Unexpected str {:?}", r), + Err(e) => { + assert_eq!( + dwarf.format_error(e), + "Hit the end of input before it was expected at .debug_str(sup)+0x1" + ); + } + } + assert_eq!(dwarf.format_error(Error::Io), Error::Io.description()); + } +} diff --git a/third_party/rust/gimli/src/read/endian_reader.rs b/third_party/rust/gimli/src/read/endian_reader.rs new file mode 100644 index 000000000000..c35267ef0dea --- /dev/null +++ b/third_party/rust/gimli/src/read/endian_reader.rs @@ -0,0 +1,639 @@ +//! Defining custom `Reader`s quickly. + +use alloc::borrow::Cow; +use alloc::rc::Rc; +use alloc::string::String; +use alloc::sync::Arc; +use core::fmt::Debug; +use core::ops::{Deref, Index, Range, RangeFrom, RangeTo}; +use core::slice; +use core::str; +use stable_deref_trait::CloneStableDeref; + +use crate::endianity::Endianity; +use crate::read::{Error, Reader, ReaderOffsetId, Result}; + +/// A reference counted, non-thread-safe slice of bytes and associated +/// endianity. +/// +/// ``` +/// # #[cfg(feature = "std")] { +/// use std::rc::Rc; +/// +/// let buf = Rc::from(&[1, 2, 3, 4][..]); +/// let reader = gimli::EndianRcSlice::new(buf, gimli::NativeEndian); +/// # let _ = reader; +/// # } +/// ``` +pub type EndianRcSlice = EndianReader>; + +/// An atomically reference counted, thread-safe slice of bytes and associated +/// endianity. +/// +/// ``` +/// # #[cfg(feature = "std")] { +/// use std::sync::Arc; +/// +/// let buf = Arc::from(&[1, 2, 3, 4][..]); +/// let reader = gimli::EndianArcSlice::new(buf, gimli::NativeEndian); +/// # let _ = reader; +/// # } +/// ``` +pub type EndianArcSlice = EndianReader>; + +/// An easy way to define a custom `Reader` implementation with a reference to a +/// generic buffer of bytes and an associated endianity. +/// +/// Note that the whole original buffer is kept alive in memory even if there is +/// only one reader that references only a handful of bytes from that original +/// buffer. That is, `EndianReader` will not do any copying, moving, or +/// compacting in order to free up unused regions of the original buffer. If you +/// require this kind of behavior, it is up to you to implement `Reader` +/// directly by-hand. +/// +/// # Example +/// +/// Say you have an `mmap`ed file that you want to serve as a `gimli::Reader`. +/// You can wrap that `mmap`ed file up in a `MmapFile` type and use +/// `EndianReader>` or `EndianReader>` as readers as +/// long as `MmapFile` dereferences to the underlying `[u8]` data. +/// +/// ``` +/// use std::io; +/// use std::ops::Deref; +/// use std::path::Path; +/// use std::slice; +/// use std::sync::Arc; +/// +/// /// A type that represents an `mmap`ed file. +/// #[derive(Debug)] +/// pub struct MmapFile { +/// ptr: *const u8, +/// len: usize, +/// } +/// +/// impl MmapFile { +/// pub fn new(path: &Path) -> io::Result { +/// // Call `mmap` and check for errors and all that... +/// # unimplemented!() +/// } +/// } +/// +/// impl Drop for MmapFile { +/// fn drop(&mut self) { +/// // Call `munmap` to clean up after ourselves... +/// # unimplemented!() +/// } +/// } +/// +/// // And `MmapFile` can deref to a slice of the `mmap`ed region of memory. +/// impl Deref for MmapFile { +/// type Target = [u8]; +/// fn deref(&self) -> &[u8] { +/// unsafe { +/// slice::from_raw_parts(self.ptr, self.len) +/// } +/// } +/// } +/// +/// /// A type that represents a shared `mmap`ed file. +/// #[derive(Debug, Clone)] +/// pub struct ArcMmapFile(Arc); +/// +/// // And `ArcMmapFile` can deref to a slice of the `mmap`ed region of memory. +/// impl Deref for ArcMmapFile { +/// type Target = [u8]; +/// fn deref(&self) -> &[u8] { +/// &self.0 +/// } +/// } +/// +/// // These are both valid for any `Rc` or `Arc`. +/// unsafe impl gimli::StableDeref for ArcMmapFile {} +/// unsafe impl gimli::CloneStableDeref for ArcMmapFile {} +/// +/// /// A `gimli::Reader` that is backed by an `mmap`ed file! +/// pub type MmapFileReader = gimli::EndianReader; +/// # fn test(_: &MmapFileReader) { } +/// ``` +#[derive(Debug, Clone, Copy, Hash)] +pub struct EndianReader +where + Endian: Endianity, + T: CloneStableDeref + Debug, +{ + range: SubRange, + endian: Endian, +} + +impl PartialEq> for EndianReader +where + Endian: Endianity, + T1: CloneStableDeref + Debug, + T2: CloneStableDeref + Debug, +{ + fn eq(&self, rhs: &EndianReader) -> bool { + self.bytes() == rhs.bytes() + } +} + +impl Eq for EndianReader +where + Endian: Endianity, + T: CloneStableDeref + Debug, +{ +} + +// This is separated out from `EndianReader` so that we can avoid running afoul +// of borrowck. We need to `read_slice(&mut self, ...) -> &[u8]` and then call +// `self.endian.read_whatever` on the result. The problem is that the returned +// slice keeps the `&mut self` borrow active, so we wouldn't be able to access +// `self.endian`. Splitting the sub-range out from the endian lets us work +// around this, making it so that only the `self.range` borrow is held active, +// not all of `self`. +// +// This also serves to encapsulate the unsafe code concerning `CloneStableDeref`. +// The `bytes` member is held so that the bytes live long enough, and the +// `CloneStableDeref` ensures these bytes never move. The `ptr` and `len` +// members point inside `bytes`, and are updated during read operations. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +struct SubRange +where + T: CloneStableDeref + Debug, +{ + bytes: T, + ptr: *const u8, + len: usize, +} + +unsafe impl Send for SubRange where T: CloneStableDeref + Debug + Send {} + +unsafe impl Sync for SubRange where T: CloneStableDeref + Debug + Sync {} + +impl SubRange +where + T: CloneStableDeref + Debug, +{ + #[inline] + fn new(bytes: T) -> Self { + let ptr = bytes.as_ptr(); + let len = bytes.len(); + SubRange { bytes, ptr, len } + } + + #[inline] + fn bytes(&self) -> &[u8] { + // Safe because `T` implements `CloneStableDeref`, `bytes` can't be modified, + // and all operations that modify `ptr` and `len` ensure they stay in range. + unsafe { slice::from_raw_parts(self.ptr, self.len) } + } + + #[inline] + fn len(&self) -> usize { + self.len + } + + #[inline] + fn truncate(&mut self, len: usize) { + assert!(len <= self.len); + self.len = len; + } + + #[inline] + fn skip(&mut self, len: usize) { + assert!(len <= self.len); + self.ptr = unsafe { self.ptr.add(len) }; + self.len -= len; + } + + #[inline] + fn read_slice(&mut self, len: usize) -> Option<&[u8]> { + if self.len() < len { + None + } else { + // Same as for `bytes()`. + let bytes = unsafe { slice::from_raw_parts(self.ptr, len) }; + self.skip(len); + Some(bytes) + } + } +} + +impl EndianReader +where + Endian: Endianity, + T: CloneStableDeref + Debug, +{ + /// Construct a new `EndianReader` with the given bytes. + #[inline] + pub fn new(bytes: T, endian: Endian) -> EndianReader { + EndianReader { + range: SubRange::new(bytes), + endian, + } + } + + /// Return a reference to the raw bytes underlying this reader. + #[inline] + pub fn bytes(&self) -> &[u8] { + self.range.bytes() + } +} + +/// # Range Methods +/// +/// Unfortunately, `std::ops::Index` *must* return a reference, so we can't +/// implement `Index>` to return a new `EndianReader` the way we +/// would like to. Instead, we abandon fancy indexing operators and have these +/// plain old methods. +impl EndianReader +where + Endian: Endianity, + T: CloneStableDeref + Debug, +{ + /// Take the given `start..end` range of the underlying buffer and return a + /// new `EndianReader`. + /// + /// ``` + /// # #[cfg(feature = "std")] { + /// use gimli::{EndianReader, LittleEndian}; + /// use std::sync::Arc; + /// + /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]); + /// let reader = EndianReader::new(buf.clone(), LittleEndian); + /// assert_eq!(reader.range(1..3), + /// EndianReader::new(&buf[1..3], LittleEndian)); + /// # } + /// ``` + /// + /// # Panics + /// + /// Panics if the range is out of bounds. + pub fn range(&self, idx: Range) -> EndianReader { + let mut r = self.clone(); + r.range.skip(idx.start); + r.range.truncate(idx.len()); + r + } + + /// Take the given `start..` range of the underlying buffer and return a new + /// `EndianReader`. + /// + /// ``` + /// # #[cfg(feature = "std")] { + /// use gimli::{EndianReader, LittleEndian}; + /// use std::sync::Arc; + /// + /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]); + /// let reader = EndianReader::new(buf.clone(), LittleEndian); + /// assert_eq!(reader.range_from(2..), + /// EndianReader::new(&buf[2..], LittleEndian)); + /// # } + /// ``` + /// + /// # Panics + /// + /// Panics if the range is out of bounds. + pub fn range_from(&self, idx: RangeFrom) -> EndianReader { + let mut r = self.clone(); + r.range.skip(idx.start); + r + } + + /// Take the given `..end` range of the underlying buffer and return a new + /// `EndianReader`. + /// + /// ``` + /// # #[cfg(feature = "std")] { + /// use gimli::{EndianReader, LittleEndian}; + /// use std::sync::Arc; + /// + /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]); + /// let reader = EndianReader::new(buf.clone(), LittleEndian); + /// assert_eq!(reader.range_to(..3), + /// EndianReader::new(&buf[..3], LittleEndian)); + /// # } + /// ``` + /// + /// # Panics + /// + /// Panics if the range is out of bounds. + pub fn range_to(&self, idx: RangeTo) -> EndianReader { + let mut r = self.clone(); + r.range.truncate(idx.end); + r + } +} + +impl Index for EndianReader +where + Endian: Endianity, + T: CloneStableDeref + Debug, +{ + type Output = u8; + fn index(&self, idx: usize) -> &Self::Output { + &self.bytes()[idx] + } +} + +impl Index> for EndianReader +where + Endian: Endianity, + T: CloneStableDeref + Debug, +{ + type Output = [u8]; + fn index(&self, idx: RangeFrom) -> &Self::Output { + &self.bytes()[idx] + } +} + +impl Deref for EndianReader +where + Endian: Endianity, + T: CloneStableDeref + Debug, +{ + type Target = [u8]; + fn deref(&self) -> &Self::Target { + self.bytes() + } +} + +impl Reader for EndianReader +where + Endian: Endianity, + T: CloneStableDeref + Debug, +{ + type Endian = Endian; + type Offset = usize; + + #[inline] + fn endian(&self) -> Endian { + self.endian + } + + #[inline] + fn len(&self) -> usize { + self.range.len() + } + + #[inline] + fn empty(&mut self) { + self.range.truncate(0); + } + + #[inline] + fn truncate(&mut self, len: usize) -> Result<()> { + if self.len() < len { + Err(Error::UnexpectedEof(self.offset_id())) + } else { + self.range.truncate(len); + Ok(()) + } + } + + #[inline] + fn offset_from(&self, base: &EndianReader) -> usize { + let base_ptr = base.bytes().as_ptr() as usize; + let ptr = self.bytes().as_ptr() as usize; + debug_assert!(base_ptr <= ptr); + debug_assert!(ptr + self.bytes().len() <= base_ptr + base.bytes().len()); + ptr - base_ptr + } + + #[inline] + fn offset_id(&self) -> ReaderOffsetId { + ReaderOffsetId(self.bytes().as_ptr() as u64) + } + + #[inline] + fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option { + let id = id.0; + let self_id = self.bytes().as_ptr() as u64; + let self_len = self.bytes().len() as u64; + if id >= self_id && id <= self_id + self_len { + Some((id - self_id) as usize) + } else { + None + } + } + + #[inline] + fn find(&self, byte: u8) -> Result { + self.bytes() + .iter() + .position(|x| *x == byte) + .ok_or_else(|| Error::UnexpectedEof(self.offset_id())) + } + + #[inline] + fn skip(&mut self, len: usize) -> Result<()> { + if self.len() < len { + Err(Error::UnexpectedEof(self.offset_id())) + } else { + self.range.skip(len); + Ok(()) + } + } + + #[inline] + fn split(&mut self, len: usize) -> Result { + if self.len() < len { + Err(Error::UnexpectedEof(self.offset_id())) + } else { + let mut r = self.clone(); + r.range.truncate(len); + self.range.skip(len); + Ok(r) + } + } + + #[inline] + fn to_slice(&self) -> Result> { + Ok(self.bytes().into()) + } + + #[inline] + fn to_string(&self) -> Result> { + match str::from_utf8(self.bytes()) { + Ok(s) => Ok(s.into()), + _ => Err(Error::BadUtf8), + } + } + + #[inline] + fn to_string_lossy(&self) -> Result> { + Ok(String::from_utf8_lossy(self.bytes())) + } + + #[inline] + fn read_slice(&mut self, buf: &mut [u8]) -> Result<()> { + match self.range.read_slice(buf.len()) { + Some(slice) => { + buf.copy_from_slice(slice); + Ok(()) + } + None => Err(Error::UnexpectedEof(self.offset_id())), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::endianity::NativeEndian; + use crate::read::Reader; + + fn native_reader + Debug>( + bytes: T, + ) -> EndianReader { + EndianReader::new(bytes, NativeEndian) + } + + const BUF: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]; + + #[test] + fn test_reader_split() { + let mut reader = native_reader(BUF); + let left = reader.split(3).unwrap(); + assert_eq!(left, native_reader(&BUF[..3])); + assert_eq!(reader, native_reader(&BUF[3..])); + } + + #[test] + fn test_reader_split_out_of_bounds() { + let mut reader = native_reader(BUF); + assert!(reader.split(30).is_err()); + } + + #[test] + fn bytes_and_len_and_range_and_eq() { + let reader = native_reader(BUF); + assert_eq!(reader.len(), BUF.len()); + assert_eq!(reader.bytes(), BUF); + assert_eq!(reader, native_reader(BUF)); + + let range = reader.range(2..8); + let buf_range = &BUF[2..8]; + assert_eq!(range.len(), buf_range.len()); + assert_eq!(range.bytes(), buf_range); + assert_ne!(range, native_reader(BUF)); + assert_eq!(range, native_reader(buf_range)); + + let range_from = range.range_from(1..); + let buf_range_from = &buf_range[1..]; + assert_eq!(range_from.len(), buf_range_from.len()); + assert_eq!(range_from.bytes(), buf_range_from); + assert_ne!(range_from, native_reader(BUF)); + assert_eq!(range_from, native_reader(buf_range_from)); + + let range_to = range_from.range_to(..4); + let buf_range_to = &buf_range_from[..4]; + assert_eq!(range_to.len(), buf_range_to.len()); + assert_eq!(range_to.bytes(), buf_range_to); + assert_ne!(range_to, native_reader(BUF)); + assert_eq!(range_to, native_reader(buf_range_to)); + } + + #[test] + fn find() { + let mut reader = native_reader(BUF); + reader.skip(2).unwrap(); + assert_eq!( + reader.find(5), + Ok(BUF[2..].iter().position(|x| *x == 5).unwrap()) + ); + } + + #[test] + fn indexing() { + let mut reader = native_reader(BUF); + reader.skip(2).unwrap(); + assert_eq!(reader[0], BUF[2]); + } + + #[test] + #[should_panic] + fn indexing_out_of_bounds() { + let mut reader = native_reader(BUF); + reader.skip(2).unwrap(); + let _ = reader[900]; + } + + #[test] + fn endian() { + let reader = native_reader(BUF); + assert_eq!(reader.endian(), NativeEndian); + } + + #[test] + fn empty() { + let mut reader = native_reader(BUF); + assert!(!reader.is_empty()); + reader.empty(); + assert!(reader.is_empty()); + assert!(reader.bytes().is_empty()); + } + + #[test] + fn truncate() { + let reader = native_reader(BUF); + let mut reader = reader.range(2..8); + reader.truncate(2).unwrap(); + assert_eq!(reader.bytes(), &BUF[2..4]); + } + + #[test] + fn offset_from() { + let reader = native_reader(BUF); + let sub = reader.range(2..8); + assert_eq!(sub.offset_from(&reader), 2); + } + + #[test] + fn skip() { + let mut reader = native_reader(BUF); + reader.skip(2).unwrap(); + assert_eq!(reader.bytes(), &BUF[2..]); + } + + #[test] + fn to_slice() { + assert_eq!( + native_reader(BUF).range(2..5).to_slice(), + Ok(Cow::from(&BUF[2..5])) + ); + } + + #[test] + fn to_string_ok() { + let buf = b"hello, world!"; + let reader = native_reader(&buf[..]); + let reader = reader.range_from(7..); + assert_eq!(reader.to_string(), Ok(Cow::from("world!"))); + } + + // The rocket emoji (🚀 = [0xf0, 0x9f, 0x9a, 0x80]) but rotated left by one + // to make it invalid UTF-8. + const BAD_UTF8: &[u8] = &[0x9f, 0x9a, 0x80, 0xf0]; + + #[test] + fn to_string_err() { + let reader = native_reader(BAD_UTF8); + assert!(reader.to_string().is_err()); + } + + #[test] + fn to_string_lossy() { + let reader = native_reader(BAD_UTF8); + assert_eq!(reader.to_string_lossy(), Ok(Cow::from("����"))); + } + + #[test] + fn read_u8_array() { + let mut reader = native_reader(BAD_UTF8); + reader.skip(1).unwrap(); + let arr: [u8; 2] = reader.read_u8_array().unwrap(); + assert_eq!(arr, &BAD_UTF8[1..3]); + assert_eq!(reader.bytes(), &BAD_UTF8[3..]); + } +} diff --git a/third_party/rust/gimli/src/read/endian_slice.rs b/third_party/rust/gimli/src/read/endian_slice.rs new file mode 100644 index 000000000000..03351429ef25 --- /dev/null +++ b/third_party/rust/gimli/src/read/endian_slice.rs @@ -0,0 +1,360 @@ +//! Working with byte slices that have an associated endianity. + +#[cfg(feature = "read")] +use alloc::borrow::Cow; +#[cfg(feature = "read")] +use alloc::string::String; +use core::fmt; +use core::ops::{Deref, Range, RangeFrom, RangeTo}; +use core::str; + +use crate::endianity::Endianity; +use crate::read::{Error, Reader, ReaderOffsetId, Result}; + +/// A `&[u8]` slice with endianity metadata. +/// +/// This implements the `Reader` trait, which is used for all reading of DWARF sections. +#[derive(Default, Clone, Copy, PartialEq, Eq, Hash)] +pub struct EndianSlice<'input, Endian> +where + Endian: Endianity, +{ + slice: &'input [u8], + endian: Endian, +} + +impl<'input, Endian> EndianSlice<'input, Endian> +where + Endian: Endianity, +{ + /// Construct a new `EndianSlice` with the given slice and endianity. + #[inline] + pub fn new(slice: &'input [u8], endian: Endian) -> EndianSlice<'input, Endian> { + EndianSlice { slice, endian } + } + + /// Return a reference to the raw slice. + #[inline] + #[doc(hidden)] + #[deprecated(note = "Method renamed to EndianSlice::slice; use that instead.")] + pub fn buf(&self) -> &'input [u8] { + self.slice + } + + /// Return a reference to the raw slice. + #[inline] + pub fn slice(&self) -> &'input [u8] { + self.slice + } + + /// Split the slice in two at the given index, resulting in the tuple where + /// the first item has range [0, idx), and the second has range [idx, + /// len). Panics if the index is out of bounds. + #[inline] + pub fn split_at( + &self, + idx: usize, + ) -> (EndianSlice<'input, Endian>, EndianSlice<'input, Endian>) { + (self.range_to(..idx), self.range_from(idx..)) + } + + /// Find the first occurrence of a byte in the slice, and return its index. + #[inline] + pub fn find(&self, byte: u8) -> Option { + self.slice.iter().position(|ch| *ch == byte) + } + + /// Return the offset of the start of the slice relative to the start + /// of the given slice. + #[inline] + pub fn offset_from(&self, base: EndianSlice<'input, Endian>) -> usize { + let base_ptr = base.slice.as_ptr() as usize; + let ptr = self.slice.as_ptr() as usize; + debug_assert!(base_ptr <= ptr); + debug_assert!(ptr + self.slice.len() <= base_ptr + base.slice.len()); + ptr - base_ptr + } + + /// Converts the slice to a string using `str::from_utf8`. + /// + /// Returns an error if the slice contains invalid characters. + #[inline] + pub fn to_string(&self) -> Result<&'input str> { + str::from_utf8(self.slice).map_err(|_| Error::BadUtf8) + } + + /// Converts the slice to a string, including invalid characters, + /// using `String::from_utf8_lossy`. + #[cfg(feature = "read")] + #[inline] + pub fn to_string_lossy(&self) -> Cow<'input, str> { + String::from_utf8_lossy(self.slice) + } + + #[inline] + fn read_slice(&mut self, len: usize) -> Result<&'input [u8]> { + if self.slice.len() < len { + Err(Error::UnexpectedEof(self.offset_id())) + } else { + let val = &self.slice[..len]; + self.slice = &self.slice[len..]; + Ok(val) + } + } +} + +/// # Range Methods +/// +/// Unfortunately, `std::ops::Index` *must* return a reference, so we can't +/// implement `Index>` to return a new `EndianSlice` the way we would +/// like to. Instead, we abandon fancy indexing operators and have these plain +/// old methods. +impl<'input, Endian> EndianSlice<'input, Endian> +where + Endian: Endianity, +{ + /// Take the given `start..end` range of the underlying slice and return a + /// new `EndianSlice`. + /// + /// ``` + /// use gimli::{EndianSlice, LittleEndian}; + /// + /// let slice = &[0x01, 0x02, 0x03, 0x04]; + /// let endian_slice = EndianSlice::new(slice, LittleEndian); + /// assert_eq!(endian_slice.range(1..3), + /// EndianSlice::new(&slice[1..3], LittleEndian)); + /// ``` + pub fn range(&self, idx: Range) -> EndianSlice<'input, Endian> { + EndianSlice { + slice: &self.slice[idx], + endian: self.endian, + } + } + + /// Take the given `start..` range of the underlying slice and return a new + /// `EndianSlice`. + /// + /// ``` + /// use gimli::{EndianSlice, LittleEndian}; + /// + /// let slice = &[0x01, 0x02, 0x03, 0x04]; + /// let endian_slice = EndianSlice::new(slice, LittleEndian); + /// assert_eq!(endian_slice.range_from(2..), + /// EndianSlice::new(&slice[2..], LittleEndian)); + /// ``` + pub fn range_from(&self, idx: RangeFrom) -> EndianSlice<'input, Endian> { + EndianSlice { + slice: &self.slice[idx], + endian: self.endian, + } + } + + /// Take the given `..end` range of the underlying slice and return a new + /// `EndianSlice`. + /// + /// ``` + /// use gimli::{EndianSlice, LittleEndian}; + /// + /// let slice = &[0x01, 0x02, 0x03, 0x04]; + /// let endian_slice = EndianSlice::new(slice, LittleEndian); + /// assert_eq!(endian_slice.range_to(..3), + /// EndianSlice::new(&slice[..3], LittleEndian)); + /// ``` + pub fn range_to(&self, idx: RangeTo) -> EndianSlice<'input, Endian> { + EndianSlice { + slice: &self.slice[idx], + endian: self.endian, + } + } +} + +impl<'input, Endian> Deref for EndianSlice<'input, Endian> +where + Endian: Endianity, +{ + type Target = [u8]; + fn deref(&self) -> &Self::Target { + self.slice + } +} + +impl<'input, Endian: Endianity> fmt::Debug for EndianSlice<'input, Endian> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> core::result::Result<(), fmt::Error> { + fmt.debug_tuple("EndianSlice") + .field(&self.endian) + .field(&DebugBytes(self.slice)) + .finish() + } +} + +struct DebugBytes<'input>(&'input [u8]); + +impl<'input> core::fmt::Debug for DebugBytes<'input> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> core::result::Result<(), fmt::Error> { + let mut list = fmt.debug_list(); + list.entries(self.0.iter().take(8).copied().map(DebugByte)); + if self.0.len() > 8 { + list.entry(&DebugLen(self.0.len())); + } + list.finish() + } +} + +struct DebugByte(u8); + +impl fmt::Debug for DebugByte { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "0x{:02x}", self.0) + } +} + +struct DebugLen(usize); + +impl fmt::Debug for DebugLen { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "...; {}", self.0) + } +} + +impl<'input, Endian> Reader for EndianSlice<'input, Endian> +where + Endian: Endianity, +{ + type Endian = Endian; + type Offset = usize; + + #[inline] + fn endian(&self) -> Endian { + self.endian + } + + #[inline] + fn len(&self) -> usize { + self.slice.len() + } + + #[inline] + fn is_empty(&self) -> bool { + self.slice.is_empty() + } + + #[inline] + fn empty(&mut self) { + self.slice = &[]; + } + + #[inline] + fn truncate(&mut self, len: usize) -> Result<()> { + if self.slice.len() < len { + Err(Error::UnexpectedEof(self.offset_id())) + } else { + self.slice = &self.slice[..len]; + Ok(()) + } + } + + #[inline] + fn offset_from(&self, base: &Self) -> usize { + self.offset_from(*base) + } + + #[inline] + fn offset_id(&self) -> ReaderOffsetId { + ReaderOffsetId(self.slice.as_ptr() as u64) + } + + #[inline] + fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option { + let id = id.0; + let self_id = self.slice.as_ptr() as u64; + let self_len = self.slice.len() as u64; + if id >= self_id && id <= self_id + self_len { + Some((id - self_id) as usize) + } else { + None + } + } + + #[inline] + fn find(&self, byte: u8) -> Result { + self.find(byte) + .ok_or_else(|| Error::UnexpectedEof(self.offset_id())) + } + + #[inline] + fn skip(&mut self, len: usize) -> Result<()> { + if self.slice.len() < len { + Err(Error::UnexpectedEof(self.offset_id())) + } else { + self.slice = &self.slice[len..]; + Ok(()) + } + } + + #[inline] + fn split(&mut self, len: usize) -> Result { + let slice = self.read_slice(len)?; + Ok(EndianSlice::new(slice, self.endian)) + } + + #[cfg(not(feature = "read"))] + fn cannot_implement() -> super::reader::seal_if_no_alloc::Sealed { + super::reader::seal_if_no_alloc::Sealed + } + + #[cfg(feature = "read")] + #[inline] + fn to_slice(&self) -> Result> { + Ok(self.slice.into()) + } + + #[cfg(feature = "read")] + #[inline] + fn to_string(&self) -> Result> { + match str::from_utf8(self.slice) { + Ok(s) => Ok(s.into()), + _ => Err(Error::BadUtf8), + } + } + + #[cfg(feature = "read")] + #[inline] + fn to_string_lossy(&self) -> Result> { + Ok(String::from_utf8_lossy(self.slice)) + } + + #[inline] + fn read_slice(&mut self, buf: &mut [u8]) -> Result<()> { + let slice = self.read_slice(buf.len())?; + buf.copy_from_slice(slice); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::endianity::NativeEndian; + + #[test] + fn test_endian_slice_split_at() { + let endian = NativeEndian; + let slice = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]; + let eb = EndianSlice::new(slice, endian); + assert_eq!( + eb.split_at(3), + ( + EndianSlice::new(&slice[..3], endian), + EndianSlice::new(&slice[3..], endian) + ) + ); + } + + #[test] + #[should_panic] + fn test_endian_slice_split_at_out_of_bounds() { + let slice = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]; + let eb = EndianSlice::new(slice, NativeEndian); + eb.split_at(30); + } +} diff --git a/third_party/rust/gimli/src/read/index.rs b/third_party/rust/gimli/src/read/index.rs new file mode 100644 index 000000000000..7629a27819e3 --- /dev/null +++ b/third_party/rust/gimli/src/read/index.rs @@ -0,0 +1,627 @@ +use core::slice; + +use crate::common::SectionId; +use crate::constants; +use crate::endianity::Endianity; +use crate::read::{EndianSlice, Error, Reader, ReaderOffset, Result, Section}; + +/// The data in the `.debug_cu_index` section of a `.dwp` file. +/// +/// This section contains the compilation unit index. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugCuIndex { + section: R, +} + +impl<'input, Endian> DebugCuIndex> +where + Endian: Endianity, +{ + /// Construct a new `DebugCuIndex` instance from the data in the `.debug_cu_index` + /// section. + pub fn new(section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(section, endian)) + } +} + +impl DebugCuIndex { + /// Create a `DebugCuIndex` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfPackageSections::borrow`. + pub(crate) fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugCuIndex + where + F: FnMut(&'a T) -> R, + { + borrow(&self.section).into() + } +} + +impl Section for DebugCuIndex { + fn id() -> SectionId { + SectionId::DebugCuIndex + } + + fn reader(&self) -> &R { + &self.section + } +} + +impl From for DebugCuIndex { + fn from(section: R) -> Self { + DebugCuIndex { section } + } +} + +impl DebugCuIndex { + /// Parse the index header. + pub fn index(self) -> Result> { + UnitIndex::parse(self.section) + } +} + +/// The data in the `.debug_tu_index` section of a `.dwp` file. +/// +/// This section contains the type unit index. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugTuIndex { + section: R, +} + +impl<'input, Endian> DebugTuIndex> +where + Endian: Endianity, +{ + /// Construct a new `DebugTuIndex` instance from the data in the `.debug_tu_index` + /// section. + pub fn new(section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(section, endian)) + } +} + +impl DebugTuIndex { + /// Create a `DebugTuIndex` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfPackageSections::borrow`. + pub(crate) fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugTuIndex + where + F: FnMut(&'a T) -> R, + { + borrow(&self.section).into() + } +} + +impl Section for DebugTuIndex { + fn id() -> SectionId { + SectionId::DebugTuIndex + } + + fn reader(&self) -> &R { + &self.section + } +} + +impl From for DebugTuIndex { + fn from(section: R) -> Self { + DebugTuIndex { section } + } +} + +impl DebugTuIndex { + /// Parse the index header. + pub fn index(self) -> Result> { + UnitIndex::parse(self.section) + } +} + +const SECTION_COUNT_MAX: u8 = 8; + +/// The partially parsed index from a `DebugCuIndex` or `DebugTuIndex`. +#[derive(Debug, Clone)] +pub struct UnitIndex { + version: u16, + section_count: u32, + unit_count: u32, + slot_count: u32, + hash_ids: R, + hash_rows: R, + // Only `section_count` values are valid. + sections: [IndexSectionId; SECTION_COUNT_MAX as usize], + offsets: R, + sizes: R, +} + +impl UnitIndex { + fn parse(mut input: R) -> Result> { + if input.is_empty() { + return Ok(UnitIndex { + version: 0, + section_count: 0, + unit_count: 0, + slot_count: 0, + hash_ids: input.clone(), + hash_rows: input.clone(), + sections: [IndexSectionId::DebugAbbrev; SECTION_COUNT_MAX as usize], + offsets: input.clone(), + sizes: input.clone(), + }); + } + + // GNU split-dwarf extension to DWARF 4 uses a 32-bit version, + // but DWARF 5 uses a 16-bit version followed by 16-bit padding. + let mut original_input = input.clone(); + let version; + if input.read_u32()? == 2 { + version = 2 + } else { + version = original_input.read_u16()?; + if version != 5 { + return Err(Error::UnknownVersion(version.into())); + } + } + + let section_count = input.read_u32()?; + let unit_count = input.read_u32()?; + let slot_count = input.read_u32()?; + if slot_count != 0 && (slot_count & (slot_count - 1) != 0 || slot_count <= unit_count) { + return Err(Error::InvalidIndexSlotCount); + } + + let hash_ids = input.split(R::Offset::from_u64(u64::from(slot_count) * 8)?)?; + let hash_rows = input.split(R::Offset::from_u64(u64::from(slot_count) * 4)?)?; + + let mut sections = [IndexSectionId::DebugAbbrev; SECTION_COUNT_MAX as usize]; + if section_count > SECTION_COUNT_MAX.into() { + return Err(Error::InvalidIndexSectionCount); + } + for i in 0..section_count { + let section = input.read_u32()?; + sections[i as usize] = if version == 2 { + match constants::DwSectV2(section) { + constants::DW_SECT_V2_INFO => IndexSectionId::DebugInfo, + constants::DW_SECT_V2_TYPES => IndexSectionId::DebugTypes, + constants::DW_SECT_V2_ABBREV => IndexSectionId::DebugAbbrev, + constants::DW_SECT_V2_LINE => IndexSectionId::DebugLine, + constants::DW_SECT_V2_LOC => IndexSectionId::DebugLoc, + constants::DW_SECT_V2_STR_OFFSETS => IndexSectionId::DebugStrOffsets, + constants::DW_SECT_V2_MACINFO => IndexSectionId::DebugMacinfo, + constants::DW_SECT_V2_MACRO => IndexSectionId::DebugMacro, + section => return Err(Error::UnknownIndexSectionV2(section)), + } + } else { + match constants::DwSect(section) { + constants::DW_SECT_INFO => IndexSectionId::DebugInfo, + constants::DW_SECT_ABBREV => IndexSectionId::DebugAbbrev, + constants::DW_SECT_LINE => IndexSectionId::DebugLine, + constants::DW_SECT_LOCLISTS => IndexSectionId::DebugLocLists, + constants::DW_SECT_STR_OFFSETS => IndexSectionId::DebugStrOffsets, + constants::DW_SECT_MACRO => IndexSectionId::DebugMacro, + constants::DW_SECT_RNGLISTS => IndexSectionId::DebugRngLists, + section => return Err(Error::UnknownIndexSection(section)), + } + }; + } + + let offsets = input.split(R::Offset::from_u64( + u64::from(unit_count) * u64::from(section_count) * 4, + )?)?; + let sizes = input.split(R::Offset::from_u64( + u64::from(unit_count) * u64::from(section_count) * 4, + )?)?; + + Ok(UnitIndex { + version, + section_count, + unit_count, + slot_count, + hash_ids, + hash_rows, + sections, + offsets, + sizes, + }) + } + + /// Find `id` in the index hash table, and return the row index. + /// + /// `id` may be a compilation unit ID if this index is from `.debug_cu_index`, + /// or a type signature if this index is from `.debug_tu_index`. + pub fn find(&self, id: u64) -> Option { + if self.slot_count == 0 { + return None; + } + let mask = u64::from(self.slot_count - 1); + let mut hash1 = id & mask; + let hash2 = ((id >> 32) & mask) | 1; + for _ in 0..self.slot_count { + // The length of these arrays was validated in `UnitIndex::parse`. + let mut hash_ids = self.hash_ids.clone(); + hash_ids.skip(R::Offset::from_u64(hash1 * 8).ok()?).ok()?; + let hash_id = hash_ids.read_u64().ok()?; + if hash_id == id { + let mut hash_rows = self.hash_rows.clone(); + hash_rows.skip(R::Offset::from_u64(hash1 * 4).ok()?).ok()?; + let hash_row = hash_rows.read_u32().ok()?; + return Some(hash_row); + } + if hash_id == 0 { + return None; + } + hash1 = (hash1 + hash2) & mask; + } + None + } + + /// Return the section offsets and sizes for the given row index. + pub fn sections(&self, mut row: u32) -> Result> { + if row == 0 { + return Err(Error::InvalidIndexRow); + } + row -= 1; + if row >= self.unit_count { + return Err(Error::InvalidIndexRow); + } + let mut offsets = self.offsets.clone(); + offsets.skip(R::Offset::from_u64( + u64::from(row) * u64::from(self.section_count) * 4, + )?)?; + let mut sizes = self.sizes.clone(); + sizes.skip(R::Offset::from_u64( + u64::from(row) * u64::from(self.section_count) * 4, + )?)?; + Ok(UnitIndexSectionIterator { + sections: self.sections[..self.section_count as usize].iter(), + offsets, + sizes, + }) + } + + /// Return the version. + /// + /// Defaults to 0 for empty sections. + pub fn version(&self) -> u16 { + self.version + } + + /// Return the number of sections. + pub fn section_count(&self) -> u32 { + self.section_count + } + + /// Return the number of units. + pub fn unit_count(&self) -> u32 { + self.unit_count + } + + /// Return the number of slots. + pub fn slot_count(&self) -> u32 { + self.slot_count + } +} + +/// An iterator over the section offsets and sizes for a row in a `UnitIndex`. +#[derive(Debug, Clone)] +pub struct UnitIndexSectionIterator<'index, R: Reader> { + sections: slice::Iter<'index, IndexSectionId>, + offsets: R, + sizes: R, +} + +impl<'index, R: Reader> Iterator for UnitIndexSectionIterator<'index, R> { + type Item = UnitIndexSection; + + fn next(&mut self) -> Option { + let section = *self.sections.next()?; + // The length of these arrays was validated in `UnitIndex::parse`. + let offset = self.offsets.read_u32().ok()?; + let size = self.sizes.read_u32().ok()?; + Some(UnitIndexSection { + section, + offset, + size, + }) + } +} + +/// Information about a unit's contribution to a section in a `.dwp` file. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct UnitIndexSection { + /// The section kind. + pub section: IndexSectionId, + /// The base offset of the unit's contribution to the section. + pub offset: u32, + /// The size of the unit's contribution to the section. + pub size: u32, +} + +/// Section kinds which are permitted in a `.dwp` index. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum IndexSectionId { + /// The `.debug_abbrev.dwo` section. + DebugAbbrev, + /// The `.debug_info.dwo` section. + DebugInfo, + /// The `.debug_line.dwo` section. + DebugLine, + /// The `.debug_loc.dwo` section. + DebugLoc, + /// The `.debug_loclists.dwo` section. + DebugLocLists, + /// The `.debug_macinfo.dwo` section. + DebugMacinfo, + /// The `.debug_macro.dwo` section. + DebugMacro, + /// The `.debug_rnglists.dwo` section. + DebugRngLists, + /// The `.debug_str_offsets.dwo` section. + DebugStrOffsets, + /// The `.debug_types.dwo` section. + DebugTypes, +} + +impl IndexSectionId { + /// Returns the ELF section name for this kind, when found in a .dwo or .dwp file. + pub fn dwo_name(self) -> &'static str { + let section_id = match self { + IndexSectionId::DebugAbbrev => SectionId::DebugAbbrev, + IndexSectionId::DebugInfo => SectionId::DebugInfo, + IndexSectionId::DebugLine => SectionId::DebugLine, + IndexSectionId::DebugLoc => SectionId::DebugLoc, + IndexSectionId::DebugLocLists => SectionId::DebugLocLists, + IndexSectionId::DebugMacro => SectionId::DebugMacro, + IndexSectionId::DebugMacinfo => SectionId::DebugMacinfo, + IndexSectionId::DebugRngLists => SectionId::DebugRngLists, + IndexSectionId::DebugStrOffsets => SectionId::DebugStrOffsets, + IndexSectionId::DebugTypes => SectionId::DebugTypes, + }; + section_id.dwo_name().unwrap() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::endianity::BigEndian; + use test_assembler::{Endian, Section}; + + #[test] + fn test_empty() { + let buf = EndianSlice::new(&[], BigEndian); + let index = UnitIndex::parse(buf).unwrap(); + assert_eq!(index.version(), 0); + assert_eq!(index.unit_count(), 0); + assert_eq!(index.slot_count(), 0); + assert!(index.find(0).is_none()); + } + + #[test] + fn test_zero_slots() { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Big) + // Header. + .D32(2).D32(0).D32(0).D32(0); + let buf = section.get_contents().unwrap(); + let buf = EndianSlice::new(&buf, BigEndian); + let index = UnitIndex::parse(buf).unwrap(); + assert_eq!(index.version(), 2); + assert_eq!(index.unit_count(), 0); + assert_eq!(index.slot_count(), 0); + assert!(index.find(0).is_none()); + } + + #[test] + fn test_version_2() { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Big) + // Header. + .D32(2).D32(0).D32(0).D32(1) + // Slots. + .D64(0).D32(0); + let buf = section.get_contents().unwrap(); + let buf = EndianSlice::new(&buf, BigEndian); + let index = UnitIndex::parse(buf).unwrap(); + assert_eq!(index.version, 2); + } + + #[test] + fn test_version_5() { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Big) + // Header. + .D16(5).D16(0).D32(0).D32(0).D32(1) + // Slots. + .D64(0).D32(0); + let buf = section.get_contents().unwrap(); + let buf = EndianSlice::new(&buf, BigEndian); + let index = UnitIndex::parse(buf).unwrap(); + assert_eq!(index.version, 5); + } + + #[test] + fn test_version_5_invalid() { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Big) + // Header. + .D32(5).D32(0).D32(0).D32(1) + // Slots. + .D64(0).D32(0); + let buf = section.get_contents().unwrap(); + let buf = EndianSlice::new(&buf, BigEndian); + assert!(UnitIndex::parse(buf).is_err()); + } + + #[test] + fn test_version_2_sections() { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Big) + // Header. + .D32(2).D32(8).D32(1).D32(2) + // Slots. + .D64(0).D64(0).D32(0).D32(0) + // Sections. + .D32(constants::DW_SECT_V2_INFO.0) + .D32(constants::DW_SECT_V2_TYPES.0) + .D32(constants::DW_SECT_V2_ABBREV.0) + .D32(constants::DW_SECT_V2_LINE.0) + .D32(constants::DW_SECT_V2_LOC.0) + .D32(constants::DW_SECT_V2_STR_OFFSETS.0) + .D32(constants::DW_SECT_V2_MACINFO.0) + .D32(constants::DW_SECT_V2_MACRO.0) + // Offsets. + .D32(11).D32(12).D32(13).D32(14).D32(15).D32(16).D32(17).D32(18) + // Sizes. + .D32(21).D32(22).D32(23).D32(24).D32(25).D32(26).D32(27).D32(28); + let buf = section.get_contents().unwrap(); + let buf = EndianSlice::new(&buf, BigEndian); + let index = UnitIndex::parse(buf).unwrap(); + assert_eq!(index.section_count, 8); + assert_eq!( + index.sections, + [ + IndexSectionId::DebugInfo, + IndexSectionId::DebugTypes, + IndexSectionId::DebugAbbrev, + IndexSectionId::DebugLine, + IndexSectionId::DebugLoc, + IndexSectionId::DebugStrOffsets, + IndexSectionId::DebugMacinfo, + IndexSectionId::DebugMacro, + ] + ); + #[rustfmt::skip] + let expect = [ + UnitIndexSection { section: IndexSectionId::DebugInfo, offset: 11, size: 21 }, + UnitIndexSection { section: IndexSectionId::DebugTypes, offset: 12, size: 22 }, + UnitIndexSection { section: IndexSectionId::DebugAbbrev, offset: 13, size: 23 }, + UnitIndexSection { section: IndexSectionId::DebugLine, offset: 14, size: 24 }, + UnitIndexSection { section: IndexSectionId::DebugLoc, offset: 15, size: 25 }, + UnitIndexSection { section: IndexSectionId::DebugStrOffsets, offset: 16, size: 26 }, + UnitIndexSection { section: IndexSectionId::DebugMacinfo, offset: 17, size: 27 }, + UnitIndexSection { section: IndexSectionId::DebugMacro, offset: 18, size: 28 }, + ]; + let mut sections = index.sections(1).unwrap(); + for section in &expect { + assert_eq!(*section, sections.next().unwrap()); + } + assert!(sections.next().is_none()); + } + + #[test] + fn test_version_5_sections() { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Big) + // Header. + .D16(5).D16(0).D32(7).D32(1).D32(2) + // Slots. + .D64(0).D64(0).D32(0).D32(0) + // Sections. + .D32(constants::DW_SECT_INFO.0) + .D32(constants::DW_SECT_ABBREV.0) + .D32(constants::DW_SECT_LINE.0) + .D32(constants::DW_SECT_LOCLISTS.0) + .D32(constants::DW_SECT_STR_OFFSETS.0) + .D32(constants::DW_SECT_MACRO.0) + .D32(constants::DW_SECT_RNGLISTS.0) + // Offsets. + .D32(11).D32(12).D32(13).D32(14).D32(15).D32(16).D32(17) + // Sizes. + .D32(21).D32(22).D32(23).D32(24).D32(25).D32(26).D32(27); + let buf = section.get_contents().unwrap(); + let buf = EndianSlice::new(&buf, BigEndian); + let index = UnitIndex::parse(buf).unwrap(); + assert_eq!(index.section_count, 7); + assert_eq!( + index.sections[..7], + [ + IndexSectionId::DebugInfo, + IndexSectionId::DebugAbbrev, + IndexSectionId::DebugLine, + IndexSectionId::DebugLocLists, + IndexSectionId::DebugStrOffsets, + IndexSectionId::DebugMacro, + IndexSectionId::DebugRngLists, + ] + ); + #[rustfmt::skip] + let expect = [ + UnitIndexSection { section: IndexSectionId::DebugInfo, offset: 11, size: 21 }, + UnitIndexSection { section: IndexSectionId::DebugAbbrev, offset: 12, size: 22 }, + UnitIndexSection { section: IndexSectionId::DebugLine, offset: 13, size: 23 }, + UnitIndexSection { section: IndexSectionId::DebugLocLists, offset: 14, size: 24 }, + UnitIndexSection { section: IndexSectionId::DebugStrOffsets, offset: 15, size: 25 }, + UnitIndexSection { section: IndexSectionId::DebugMacro, offset: 16, size: 26 }, + UnitIndexSection { section: IndexSectionId::DebugRngLists, offset: 17, size: 27 }, + ]; + let mut sections = index.sections(1).unwrap(); + for section in &expect { + assert_eq!(*section, sections.next().unwrap()); + } + assert!(sections.next().is_none()); + + assert!(index.sections(0).is_err()); + assert!(index.sections(2).is_err()); + } + + #[test] + fn test_hash() { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Big) + // Header. + .D16(5).D16(0).D32(2).D32(3).D32(4) + // Slots. + .D64(0xffff_fff2_ffff_fff1) + .D64(0xffff_fff0_ffff_fff1) + .D64(0xffff_fff1_ffff_fff1) + .D64(0) + .D32(3).D32(1).D32(2).D32(0) + // Sections. + .D32(constants::DW_SECT_INFO.0) + .D32(constants::DW_SECT_ABBREV.0) + // Offsets. + .D32(0).D32(0).D32(0).D32(0).D32(0).D32(0) + // Sizes. + .D32(0).D32(0).D32(0).D32(0).D32(0).D32(0); + let buf = section.get_contents().unwrap(); + let buf = EndianSlice::new(&buf, BigEndian); + let index = UnitIndex::parse(buf).unwrap(); + assert_eq!(index.version(), 5); + assert_eq!(index.slot_count(), 4); + assert_eq!(index.unit_count(), 3); + assert_eq!(index.section_count(), 2); + assert_eq!(index.find(0xffff_fff0_ffff_fff1), Some(1)); + assert_eq!(index.find(0xffff_fff1_ffff_fff1), Some(2)); + assert_eq!(index.find(0xffff_fff2_ffff_fff1), Some(3)); + assert_eq!(index.find(0xffff_fff3_ffff_fff1), None); + } + + #[test] + fn test_cu_index() { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Big) + // Header. + .D16(5).D16(0).D32(0).D32(0).D32(1) + // Slots. + .D64(0).D32(0); + let buf = section.get_contents().unwrap(); + let cu_index = DebugCuIndex::new(&buf, BigEndian); + let index = cu_index.index().unwrap(); + assert_eq!(index.version, 5); + } + + #[test] + fn test_tu_index() { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Big) + // Header. + .D16(5).D16(0).D32(0).D32(0).D32(1) + // Slots. + .D64(0).D32(0); + let buf = section.get_contents().unwrap(); + let tu_index = DebugTuIndex::new(&buf, BigEndian); + let index = tu_index.index().unwrap(); + assert_eq!(index.version, 5); + } +} diff --git a/third_party/rust/gimli/src/read/line.rs b/third_party/rust/gimli/src/read/line.rs new file mode 100644 index 000000000000..e77f88d96434 --- /dev/null +++ b/third_party/rust/gimli/src/read/line.rs @@ -0,0 +1,3121 @@ +use alloc::vec::Vec; +use core::fmt; +use core::num::{NonZeroU64, Wrapping}; +use core::result; + +use crate::common::{ + DebugLineOffset, DebugLineStrOffset, DebugStrOffset, DebugStrOffsetsIndex, Encoding, Format, + LineEncoding, SectionId, +}; +use crate::constants; +use crate::endianity::Endianity; +use crate::read::{AttributeValue, EndianSlice, Error, Reader, ReaderOffset, Result, Section}; + +/// The `DebugLine` struct contains the source location to instruction mapping +/// found in the `.debug_line` section. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugLine { + debug_line_section: R, +} + +impl<'input, Endian> DebugLine> +where + Endian: Endianity, +{ + /// Construct a new `DebugLine` instance from the data in the `.debug_line` + /// section. + /// + /// It is the caller's responsibility to read the `.debug_line` section and + /// present it as a `&[u8]` slice. That means using some ELF loader on + /// Linux, a Mach-O loader on macOS, etc. + /// + /// ``` + /// use gimli::{DebugLine, LittleEndian}; + /// + /// # let buf = [0x00, 0x01, 0x02, 0x03]; + /// # let read_debug_line_section_somehow = || &buf; + /// let debug_line = DebugLine::new(read_debug_line_section_somehow(), LittleEndian); + /// ``` + pub fn new(debug_line_section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(debug_line_section, endian)) + } +} + +impl DebugLine { + /// Parse the line number program whose header is at the given `offset` in the + /// `.debug_line` section. + /// + /// The `address_size` must match the compilation unit that the lines apply to. + /// The `comp_dir` should be from the `DW_AT_comp_dir` attribute of the compilation + /// unit. The `comp_name` should be from the `DW_AT_name` attribute of the + /// compilation unit. + /// + /// ```rust,no_run + /// use gimli::{DebugLine, DebugLineOffset, IncompleteLineProgram, EndianSlice, LittleEndian}; + /// + /// # let buf = []; + /// # let read_debug_line_section_somehow = || &buf; + /// let debug_line = DebugLine::new(read_debug_line_section_somehow(), LittleEndian); + /// + /// // In a real example, we'd grab the offset via a compilation unit + /// // entry's `DW_AT_stmt_list` attribute, and the address size from that + /// // unit directly. + /// let offset = DebugLineOffset(0); + /// let address_size = 8; + /// + /// let program = debug_line.program(offset, address_size, None, None) + /// .expect("should have found a header at that offset, and parsed it OK"); + /// ``` + pub fn program( + &self, + offset: DebugLineOffset, + address_size: u8, + comp_dir: Option, + comp_name: Option, + ) -> Result> { + let input = &mut self.debug_line_section.clone(); + input.skip(offset.0)?; + let header = LineProgramHeader::parse(input, offset, address_size, comp_dir, comp_name)?; + let program = IncompleteLineProgram { header }; + Ok(program) + } +} + +impl DebugLine { + /// Create a `DebugLine` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfSections::borrow`. + pub fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugLine + where + F: FnMut(&'a T) -> R, + { + borrow(&self.debug_line_section).into() + } +} + +impl Section for DebugLine { + fn id() -> SectionId { + SectionId::DebugLine + } + + fn reader(&self) -> &R { + &self.debug_line_section + } +} + +impl From for DebugLine { + fn from(debug_line_section: R) -> Self { + DebugLine { debug_line_section } + } +} + +/// Deprecated. `LineNumberProgram` has been renamed to `LineProgram`. +#[deprecated(note = "LineNumberProgram has been renamed to LineProgram, use that instead.")] +pub type LineNumberProgram = dyn LineProgram; + +/// A `LineProgram` provides access to a `LineProgramHeader` and +/// a way to add files to the files table if necessary. Gimli consumers should +/// never need to use or see this trait. +pub trait LineProgram::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + /// Get a reference to the held `LineProgramHeader`. + fn header(&self) -> &LineProgramHeader; + /// Add a file to the file table if necessary. + fn add_file(&mut self, file: FileEntry); +} + +impl LineProgram for IncompleteLineProgram +where + R: Reader, + Offset: ReaderOffset, +{ + fn header(&self) -> &LineProgramHeader { + &self.header + } + fn add_file(&mut self, file: FileEntry) { + self.header.file_names.push(file); + } +} + +impl<'program, R, Offset> LineProgram for &'program CompleteLineProgram +where + R: Reader, + Offset: ReaderOffset, +{ + fn header(&self) -> &LineProgramHeader { + &self.header + } + fn add_file(&mut self, _: FileEntry) { + // Nop. Our file table is already complete. + } +} + +/// Deprecated. `StateMachine` has been renamed to `LineRows`. +#[deprecated(note = "StateMachine has been renamed to LineRows, use that instead.")] +pub type StateMachine = LineRows; + +/// Executes a `LineProgram` to iterate over the rows in the matrix of line number information. +/// +/// "The hypothetical machine used by a consumer of the line number information +/// to expand the byte-coded instruction stream into a matrix of line number +/// information." -- Section 6.2.1 +#[derive(Debug, Clone)] +pub struct LineRows::Offset> +where + Program: LineProgram, + R: Reader, + Offset: ReaderOffset, +{ + program: Program, + row: LineRow, + instructions: LineInstructions, +} + +type OneShotLineRows::Offset> = + LineRows, Offset>; + +type ResumedLineRows<'program, R, Offset = ::Offset> = + LineRows, Offset>; + +impl LineRows +where + Program: LineProgram, + R: Reader, + Offset: ReaderOffset, +{ + fn new(program: IncompleteLineProgram) -> OneShotLineRows { + let row = LineRow::new(program.header()); + let instructions = LineInstructions { + input: program.header().program_buf.clone(), + }; + LineRows { + program, + row, + instructions, + } + } + + fn resume<'program>( + program: &'program CompleteLineProgram, + sequence: &LineSequence, + ) -> ResumedLineRows<'program, R, Offset> { + let row = LineRow::new(program.header()); + let instructions = sequence.instructions.clone(); + LineRows { + program, + row, + instructions, + } + } + + /// Get a reference to the header for this state machine's line number + /// program. + #[inline] + pub fn header(&self) -> &LineProgramHeader { + self.program.header() + } + + /// Parse and execute the next instructions in the line number program until + /// another row in the line number matrix is computed. + /// + /// The freshly computed row is returned as `Ok(Some((header, row)))`. + /// If the matrix is complete, and there are no more new rows in the line + /// number matrix, then `Ok(None)` is returned. If there was an error parsing + /// an instruction, then `Err(e)` is returned. + /// + /// Unfortunately, the references mean that this cannot be a + /// `FallibleIterator`. + pub fn next_row(&mut self) -> Result, &LineRow)>> { + // Perform any reset that was required after copying the previous row. + self.row.reset(self.program.header()); + + loop { + // Split the borrow here, rather than calling `self.header()`. + match self.instructions.next_instruction(self.program.header()) { + Err(err) => return Err(err), + Ok(None) => return Ok(None), + Ok(Some(instruction)) => { + if self.row.execute(instruction, &mut self.program) { + if self.row.tombstone { + // Perform any reset that was required for the tombstone row. + // Normally this is done when `next_row` is called again, but for + // tombstones we loop immediately. + self.row.reset(self.program.header()); + } else { + return Ok(Some((self.header(), &self.row))); + } + } + // Fall through, parse the next instruction, and see if that + // yields a row. + } + } + } + } +} + +/// Deprecated. `Opcode` has been renamed to `LineInstruction`. +#[deprecated(note = "Opcode has been renamed to LineInstruction, use that instead.")] +pub type Opcode = LineInstruction::Offset>; + +/// A parsed line number program instruction. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LineInstruction::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + /// > ### 6.2.5.1 Special Opcodes + /// > + /// > Each ubyte special opcode has the following effect on the state machine: + /// > + /// > 1. Add a signed integer to the line register. + /// > + /// > 2. Modify the operation pointer by incrementing the address and + /// > op_index registers as described below. + /// > + /// > 3. Append a row to the matrix using the current values of the state + /// > machine registers. + /// > + /// > 4. Set the basic_block register to “false.” + /// > + /// > 5. Set the prologue_end register to “false.” + /// > + /// > 6. Set the epilogue_begin register to “false.” + /// > + /// > 7. Set the discriminator register to 0. + /// > + /// > All of the special opcodes do those same seven things; they differ from + /// > one another only in what values they add to the line, address and + /// > op_index registers. + Special(u8), + + /// "[`LineInstruction::Copy`] appends a row to the matrix using the current + /// values of the state machine registers. Then it sets the discriminator + /// register to 0, and sets the basic_block, prologue_end and epilogue_begin + /// registers to “false.”" + Copy, + + /// "The DW_LNS_advance_pc opcode takes a single unsigned LEB128 operand as + /// the operation advance and modifies the address and op_index registers + /// [the same as `LineInstruction::Special`]" + AdvancePc(u64), + + /// "The DW_LNS_advance_line opcode takes a single signed LEB128 operand and + /// adds that value to the line register of the state machine." + AdvanceLine(i64), + + /// "The DW_LNS_set_file opcode takes a single unsigned LEB128 operand and + /// stores it in the file register of the state machine." + SetFile(u64), + + /// "The DW_LNS_set_column opcode takes a single unsigned LEB128 operand and + /// stores it in the column register of the state machine." + SetColumn(u64), + + /// "The DW_LNS_negate_stmt opcode takes no operands. It sets the is_stmt + /// register of the state machine to the logical negation of its current + /// value." + NegateStatement, + + /// "The DW_LNS_set_basic_block opcode takes no operands. It sets the + /// basic_block register of the state machine to “true.”" + SetBasicBlock, + + /// > The DW_LNS_const_add_pc opcode takes no operands. It advances the + /// > address and op_index registers by the increments corresponding to + /// > special opcode 255. + /// > + /// > When the line number program needs to advance the address by a small + /// > amount, it can use a single special opcode, which occupies a single + /// > byte. When it needs to advance the address by up to twice the range of + /// > the last special opcode, it can use DW_LNS_const_add_pc followed by a + /// > special opcode, for a total of two bytes. Only if it needs to advance + /// > the address by more than twice that range will it need to use both + /// > DW_LNS_advance_pc and a special opcode, requiring three or more bytes. + ConstAddPc, + + /// > The DW_LNS_fixed_advance_pc opcode takes a single uhalf (unencoded) + /// > operand and adds it to the address register of the state machine and + /// > sets the op_index register to 0. This is the only standard opcode whose + /// > operand is not a variable length number. It also does not multiply the + /// > operand by the minimum_instruction_length field of the header. + FixedAddPc(u16), + + /// "[`LineInstruction::SetPrologueEnd`] sets the prologue_end register to “true”." + SetPrologueEnd, + + /// "[`LineInstruction::SetEpilogueBegin`] sets the epilogue_begin register to + /// “true”." + SetEpilogueBegin, + + /// "The DW_LNS_set_isa opcode takes a single unsigned LEB128 operand and + /// stores that value in the isa register of the state machine." + SetIsa(u64), + + /// An unknown standard opcode with zero operands. + UnknownStandard0(constants::DwLns), + + /// An unknown standard opcode with one operand. + UnknownStandard1(constants::DwLns, u64), + + /// An unknown standard opcode with multiple operands. + UnknownStandardN(constants::DwLns, R), + + /// > [`LineInstruction::EndSequence`] sets the end_sequence register of the state + /// > machine to “true” and appends a row to the matrix using the current + /// > values of the state-machine registers. Then it resets the registers to + /// > the initial values specified above (see Section 6.2.2). Every line + /// > number program sequence must end with a DW_LNE_end_sequence instruction + /// > which creates a row whose address is that of the byte after the last + /// > target machine instruction of the sequence. + EndSequence, + + /// > The DW_LNE_set_address opcode takes a single relocatable address as an + /// > operand. The size of the operand is the size of an address on the target + /// > machine. It sets the address register to the value given by the + /// > relocatable address and sets the op_index register to 0. + /// > + /// > All of the other line number program opcodes that affect the address + /// > register add a delta to it. This instruction stores a relocatable value + /// > into it instead. + SetAddress(u64), + + /// Defines a new source file in the line number program and appends it to + /// the line number program header's list of source files. + DefineFile(FileEntry), + + /// "The DW_LNE_set_discriminator opcode takes a single parameter, an + /// unsigned LEB128 integer. It sets the discriminator register to the new + /// value." + SetDiscriminator(u64), + + /// An unknown extended opcode and the slice of its unparsed operands. + UnknownExtended(constants::DwLne, R), +} + +impl LineInstruction +where + R: Reader, + Offset: ReaderOffset, +{ + fn parse<'header>( + header: &'header LineProgramHeader, + input: &mut R, + ) -> Result> + where + R: 'header, + { + let opcode = input.read_u8()?; + if opcode == 0 { + let length = input.read_uleb128().and_then(R::Offset::from_u64)?; + let mut instr_rest = input.split(length)?; + let opcode = instr_rest.read_u8()?; + + match constants::DwLne(opcode) { + constants::DW_LNE_end_sequence => Ok(LineInstruction::EndSequence), + + constants::DW_LNE_set_address => { + let address = instr_rest.read_address(header.address_size())?; + Ok(LineInstruction::SetAddress(address)) + } + + constants::DW_LNE_define_file => { + if header.version() <= 4 { + let path_name = instr_rest.read_null_terminated_slice()?; + let entry = FileEntry::parse(&mut instr_rest, path_name)?; + Ok(LineInstruction::DefineFile(entry)) + } else { + Ok(LineInstruction::UnknownExtended( + constants::DW_LNE_define_file, + instr_rest, + )) + } + } + + constants::DW_LNE_set_discriminator => { + let discriminator = instr_rest.read_uleb128()?; + Ok(LineInstruction::SetDiscriminator(discriminator)) + } + + otherwise => Ok(LineInstruction::UnknownExtended(otherwise, instr_rest)), + } + } else if opcode >= header.opcode_base { + Ok(LineInstruction::Special(opcode)) + } else { + match constants::DwLns(opcode) { + constants::DW_LNS_copy => Ok(LineInstruction::Copy), + + constants::DW_LNS_advance_pc => { + let advance = input.read_uleb128()?; + Ok(LineInstruction::AdvancePc(advance)) + } + + constants::DW_LNS_advance_line => { + let increment = input.read_sleb128()?; + Ok(LineInstruction::AdvanceLine(increment)) + } + + constants::DW_LNS_set_file => { + let file = input.read_uleb128()?; + Ok(LineInstruction::SetFile(file)) + } + + constants::DW_LNS_set_column => { + let column = input.read_uleb128()?; + Ok(LineInstruction::SetColumn(column)) + } + + constants::DW_LNS_negate_stmt => Ok(LineInstruction::NegateStatement), + + constants::DW_LNS_set_basic_block => Ok(LineInstruction::SetBasicBlock), + + constants::DW_LNS_const_add_pc => Ok(LineInstruction::ConstAddPc), + + constants::DW_LNS_fixed_advance_pc => { + let advance = input.read_u16()?; + Ok(LineInstruction::FixedAddPc(advance)) + } + + constants::DW_LNS_set_prologue_end => Ok(LineInstruction::SetPrologueEnd), + + constants::DW_LNS_set_epilogue_begin => Ok(LineInstruction::SetEpilogueBegin), + + constants::DW_LNS_set_isa => { + let isa = input.read_uleb128()?; + Ok(LineInstruction::SetIsa(isa)) + } + + otherwise => { + let mut opcode_lengths = header.standard_opcode_lengths().clone(); + opcode_lengths.skip(R::Offset::from_u8(opcode - 1))?; + let num_args = opcode_lengths.read_u8()? as usize; + match num_args { + 0 => Ok(LineInstruction::UnknownStandard0(otherwise)), + 1 => { + let arg = input.read_uleb128()?; + Ok(LineInstruction::UnknownStandard1(otherwise, arg)) + } + _ => { + let mut args = input.clone(); + for _ in 0..num_args { + input.read_uleb128()?; + } + let len = input.offset_from(&args); + args.truncate(len)?; + Ok(LineInstruction::UnknownStandardN(otherwise, args)) + } + } + } + } + } + } +} + +impl fmt::Display for LineInstruction +where + R: Reader, + Offset: ReaderOffset, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> result::Result<(), fmt::Error> { + match *self { + LineInstruction::Special(opcode) => write!(f, "Special opcode {}", opcode), + LineInstruction::Copy => write!(f, "{}", constants::DW_LNS_copy), + LineInstruction::AdvancePc(advance) => { + write!(f, "{} by {}", constants::DW_LNS_advance_pc, advance) + } + LineInstruction::AdvanceLine(increment) => { + write!(f, "{} by {}", constants::DW_LNS_advance_line, increment) + } + LineInstruction::SetFile(file) => { + write!(f, "{} to {}", constants::DW_LNS_set_file, file) + } + LineInstruction::SetColumn(column) => { + write!(f, "{} to {}", constants::DW_LNS_set_column, column) + } + LineInstruction::NegateStatement => write!(f, "{}", constants::DW_LNS_negate_stmt), + LineInstruction::SetBasicBlock => write!(f, "{}", constants::DW_LNS_set_basic_block), + LineInstruction::ConstAddPc => write!(f, "{}", constants::DW_LNS_const_add_pc), + LineInstruction::FixedAddPc(advance) => { + write!(f, "{} by {}", constants::DW_LNS_fixed_advance_pc, advance) + } + LineInstruction::SetPrologueEnd => write!(f, "{}", constants::DW_LNS_set_prologue_end), + LineInstruction::SetEpilogueBegin => { + write!(f, "{}", constants::DW_LNS_set_epilogue_begin) + } + LineInstruction::SetIsa(isa) => write!(f, "{} to {}", constants::DW_LNS_set_isa, isa), + LineInstruction::UnknownStandard0(opcode) => write!(f, "Unknown {}", opcode), + LineInstruction::UnknownStandard1(opcode, arg) => { + write!(f, "Unknown {} with operand {}", opcode, arg) + } + LineInstruction::UnknownStandardN(opcode, ref args) => { + write!(f, "Unknown {} with operands {:?}", opcode, args) + } + LineInstruction::EndSequence => write!(f, "{}", constants::DW_LNE_end_sequence), + LineInstruction::SetAddress(address) => { + write!(f, "{} to {}", constants::DW_LNE_set_address, address) + } + LineInstruction::DefineFile(_) => write!(f, "{}", constants::DW_LNE_define_file), + LineInstruction::SetDiscriminator(discr) => { + write!(f, "{} to {}", constants::DW_LNE_set_discriminator, discr) + } + LineInstruction::UnknownExtended(opcode, _) => write!(f, "Unknown {}", opcode), + } + } +} + +/// Deprecated. `OpcodesIter` has been renamed to `LineInstructions`. +#[deprecated(note = "OpcodesIter has been renamed to LineInstructions, use that instead.")] +pub type OpcodesIter = LineInstructions; + +/// An iterator yielding parsed instructions. +/// +/// See +/// [`LineProgramHeader::instructions`](./struct.LineProgramHeader.html#method.instructions) +/// for more details. +#[derive(Clone, Debug)] +pub struct LineInstructions { + input: R, +} + +impl LineInstructions { + fn remove_trailing(&self, other: &LineInstructions) -> Result> { + let offset = other.input.offset_from(&self.input); + let mut input = self.input.clone(); + input.truncate(offset)?; + Ok(LineInstructions { input }) + } +} + +impl LineInstructions { + /// Advance the iterator and return the next instruction. + /// + /// Returns the newly parsed instruction as `Ok(Some(instruction))`. Returns + /// `Ok(None)` when iteration is complete and all instructions have already been + /// parsed and yielded. If an error occurs while parsing the next attribute, + /// then this error is returned as `Err(e)`, and all subsequent calls return + /// `Ok(None)`. + /// + /// Unfortunately, the `header` parameter means that this cannot be a + /// `FallibleIterator`. + #[inline(always)] + pub fn next_instruction( + &mut self, + header: &LineProgramHeader, + ) -> Result>> { + if self.input.is_empty() { + return Ok(None); + } + + match LineInstruction::parse(header, &mut self.input) { + Ok(instruction) => Ok(Some(instruction)), + Err(e) => { + self.input.empty(); + Err(e) + } + } + } +} + +/// Deprecated. `LineNumberRow` has been renamed to `LineRow`. +#[deprecated(note = "LineNumberRow has been renamed to LineRow, use that instead.")] +pub type LineNumberRow = LineRow; + +/// A row in the line number program's resulting matrix. +/// +/// Each row is a copy of the registers of the state machine, as defined in section 6.2.2. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct LineRow { + tombstone: bool, + address: Wrapping, + op_index: Wrapping, + file: u64, + line: Wrapping, + column: u64, + is_stmt: bool, + basic_block: bool, + end_sequence: bool, + prologue_end: bool, + epilogue_begin: bool, + isa: u64, + discriminator: u64, +} + +impl LineRow { + /// Create a line number row in the initial state for the given program. + pub fn new(header: &LineProgramHeader) -> Self { + LineRow { + // "At the beginning of each sequence within a line number program, the + // state of the registers is:" -- Section 6.2.2 + tombstone: false, + address: Wrapping(0), + op_index: Wrapping(0), + file: 1, + line: Wrapping(1), + column: 0, + // "determined by default_is_stmt in the line number program header" + is_stmt: header.line_encoding.default_is_stmt, + basic_block: false, + end_sequence: false, + prologue_end: false, + epilogue_begin: false, + // "The isa value 0 specifies that the instruction set is the + // architecturally determined default instruction set. This may be fixed + // by the ABI, or it may be specified by other means, for example, by + // the object file description." + isa: 0, + discriminator: 0, + } + } + + /// "The program-counter value corresponding to a machine instruction + /// generated by the compiler." + #[inline] + pub fn address(&self) -> u64 { + self.address.0 + } + + /// > An unsigned integer representing the index of an operation within a VLIW + /// > instruction. The index of the first operation is 0. For non-VLIW + /// > architectures, this register will always be 0. + /// > + /// > The address and op_index registers, taken together, form an operation + /// > pointer that can reference any individual operation with the + /// > instruction stream. + #[inline] + pub fn op_index(&self) -> u64 { + self.op_index.0 + } + + /// "An unsigned integer indicating the identity of the source file + /// corresponding to a machine instruction." + #[inline] + pub fn file_index(&self) -> u64 { + self.file + } + + /// The source file corresponding to the current machine instruction. + #[inline] + pub fn file<'header, R: Reader>( + &self, + header: &'header LineProgramHeader, + ) -> Option<&'header FileEntry> { + header.file(self.file) + } + + /// "An unsigned integer indicating a source line number. Lines are numbered + /// beginning at 1. The compiler may emit the value 0 in cases where an + /// instruction cannot be attributed to any source line." + /// Line number values of 0 are represented as `None`. + #[inline] + pub fn line(&self) -> Option { + NonZeroU64::new(self.line.0) + } + + /// "An unsigned integer indicating a column number within a source + /// line. Columns are numbered beginning at 1. The value 0 is reserved to + /// indicate that a statement begins at the “left edge” of the line." + #[inline] + pub fn column(&self) -> ColumnType { + NonZeroU64::new(self.column) + .map(ColumnType::Column) + .unwrap_or(ColumnType::LeftEdge) + } + + /// "A boolean indicating that the current instruction is a recommended + /// breakpoint location. A recommended breakpoint location is intended to + /// “represent” a line, a statement and/or a semantically distinct subpart + /// of a statement." + #[inline] + pub fn is_stmt(&self) -> bool { + self.is_stmt + } + + /// "A boolean indicating that the current instruction is the beginning of a + /// basic block." + #[inline] + pub fn basic_block(&self) -> bool { + self.basic_block + } + + /// "A boolean indicating that the current address is that of the first byte + /// after the end of a sequence of target machine instructions. end_sequence + /// terminates a sequence of lines; therefore other information in the same + /// row is not meaningful." + #[inline] + pub fn end_sequence(&self) -> bool { + self.end_sequence + } + + /// "A boolean indicating that the current address is one (of possibly many) + /// where execution should be suspended for an entry breakpoint of a + /// function." + #[inline] + pub fn prologue_end(&self) -> bool { + self.prologue_end + } + + /// "A boolean indicating that the current address is one (of possibly many) + /// where execution should be suspended for an exit breakpoint of a + /// function." + #[inline] + pub fn epilogue_begin(&self) -> bool { + self.epilogue_begin + } + + /// Tag for the current instruction set architecture. + /// + /// > An unsigned integer whose value encodes the applicable instruction set + /// > architecture for the current instruction. + /// > + /// > The encoding of instruction sets should be shared by all users of a + /// > given architecture. It is recommended that this encoding be defined by + /// > the ABI authoring committee for each architecture. + #[inline] + pub fn isa(&self) -> u64 { + self.isa + } + + /// "An unsigned integer identifying the block to which the current + /// instruction belongs. Discriminator values are assigned arbitrarily by + /// the DWARF producer and serve to distinguish among multiple blocks that + /// may all be associated with the same source file, line, and column. Where + /// only one block exists for a given source position, the discriminator + /// value should be zero." + #[inline] + pub fn discriminator(&self) -> u64 { + self.discriminator + } + + /// Execute the given instruction, and return true if a new row in the + /// line number matrix needs to be generated. + /// + /// Unknown opcodes are treated as no-ops. + #[inline] + pub fn execute( + &mut self, + instruction: LineInstruction, + program: &mut Program, + ) -> bool + where + Program: LineProgram, + R: Reader, + { + match instruction { + LineInstruction::Special(opcode) => { + self.exec_special_opcode(opcode, program.header()); + true + } + + LineInstruction::Copy => true, + + LineInstruction::AdvancePc(operation_advance) => { + self.apply_operation_advance(operation_advance, program.header()); + false + } + + LineInstruction::AdvanceLine(line_increment) => { + self.apply_line_advance(line_increment); + false + } + + LineInstruction::SetFile(file) => { + self.file = file; + false + } + + LineInstruction::SetColumn(column) => { + self.column = column; + false + } + + LineInstruction::NegateStatement => { + self.is_stmt = !self.is_stmt; + false + } + + LineInstruction::SetBasicBlock => { + self.basic_block = true; + false + } + + LineInstruction::ConstAddPc => { + let adjusted = self.adjust_opcode(255, program.header()); + let operation_advance = adjusted / program.header().line_encoding.line_range; + self.apply_operation_advance(u64::from(operation_advance), program.header()); + false + } + + LineInstruction::FixedAddPc(operand) => { + self.address += Wrapping(u64::from(operand)); + self.op_index.0 = 0; + false + } + + LineInstruction::SetPrologueEnd => { + self.prologue_end = true; + false + } + + LineInstruction::SetEpilogueBegin => { + self.epilogue_begin = true; + false + } + + LineInstruction::SetIsa(isa) => { + self.isa = isa; + false + } + + LineInstruction::EndSequence => { + self.end_sequence = true; + true + } + + LineInstruction::SetAddress(address) => { + let tombstone_address = !0 >> (64 - program.header().encoding.address_size * 8); + self.tombstone = address == tombstone_address; + self.address.0 = address; + self.op_index.0 = 0; + false + } + + LineInstruction::DefineFile(entry) => { + program.add_file(entry); + false + } + + LineInstruction::SetDiscriminator(discriminator) => { + self.discriminator = discriminator; + false + } + + // Compatibility with future opcodes. + LineInstruction::UnknownStandard0(_) + | LineInstruction::UnknownStandard1(_, _) + | LineInstruction::UnknownStandardN(_, _) + | LineInstruction::UnknownExtended(_, _) => false, + } + } + + /// Perform any reset that was required after copying the previous row. + #[inline] + pub fn reset(&mut self, header: &LineProgramHeader) { + if self.end_sequence { + // Previous instruction was EndSequence, so reset everything + // as specified in Section 6.2.5.3. + *self = Self::new(header); + } else { + // Previous instruction was one of: + // - Special - specified in Section 6.2.5.1, steps 4-7 + // - Copy - specified in Section 6.2.5.2 + // The reset behaviour is the same in both cases. + self.discriminator = 0; + self.basic_block = false; + self.prologue_end = false; + self.epilogue_begin = false; + } + } + + /// Step 1 of section 6.2.5.1 + fn apply_line_advance(&mut self, line_increment: i64) { + if line_increment < 0 { + let decrement = -line_increment as u64; + if decrement <= self.line.0 { + self.line.0 -= decrement; + } else { + self.line.0 = 0; + } + } else { + self.line += Wrapping(line_increment as u64); + } + } + + /// Step 2 of section 6.2.5.1 + fn apply_operation_advance( + &mut self, + operation_advance: u64, + header: &LineProgramHeader, + ) { + let operation_advance = Wrapping(operation_advance); + + let minimum_instruction_length = u64::from(header.line_encoding.minimum_instruction_length); + let minimum_instruction_length = Wrapping(minimum_instruction_length); + + let maximum_operations_per_instruction = + u64::from(header.line_encoding.maximum_operations_per_instruction); + let maximum_operations_per_instruction = Wrapping(maximum_operations_per_instruction); + + if maximum_operations_per_instruction.0 == 1 { + self.address += minimum_instruction_length * operation_advance; + self.op_index.0 = 0; + } else { + let op_index_with_advance = self.op_index + operation_advance; + self.address += minimum_instruction_length + * (op_index_with_advance / maximum_operations_per_instruction); + self.op_index = op_index_with_advance % maximum_operations_per_instruction; + } + } + + #[inline] + fn adjust_opcode(&self, opcode: u8, header: &LineProgramHeader) -> u8 { + opcode - header.opcode_base + } + + /// Section 6.2.5.1 + fn exec_special_opcode(&mut self, opcode: u8, header: &LineProgramHeader) { + let adjusted_opcode = self.adjust_opcode(opcode, header); + + let line_range = header.line_encoding.line_range; + let line_advance = adjusted_opcode % line_range; + let operation_advance = adjusted_opcode / line_range; + + // Step 1 + let line_base = i64::from(header.line_encoding.line_base); + self.apply_line_advance(line_base + i64::from(line_advance)); + + // Step 2 + self.apply_operation_advance(u64::from(operation_advance), header); + } +} + +/// The type of column that a row is referring to. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum ColumnType { + /// The `LeftEdge` means that the statement begins at the start of the new + /// line. + LeftEdge, + /// A column number, whose range begins at 1. + Column(NonZeroU64), +} + +/// Deprecated. `LineNumberSequence` has been renamed to `LineSequence`. +#[deprecated(note = "LineNumberSequence has been renamed to LineSequence, use that instead.")] +pub type LineNumberSequence = LineSequence; + +/// A sequence within a line number program. A sequence, as defined in section +/// 6.2.5 of the standard, is a linear subset of a line number program within +/// which addresses are monotonically increasing. +#[derive(Clone, Debug)] +pub struct LineSequence { + /// The first address that is covered by this sequence within the line number + /// program. + pub start: u64, + /// The first address that is *not* covered by this sequence within the line + /// number program. + pub end: u64, + instructions: LineInstructions, +} + +/// Deprecated. `LineNumberProgramHeader` has been renamed to `LineProgramHeader`. +#[deprecated( + note = "LineNumberProgramHeader has been renamed to LineProgramHeader, use that instead." +)] +pub type LineNumberProgramHeader = LineProgramHeader; + +/// A header for a line number program in the `.debug_line` section, as defined +/// in section 6.2.4 of the standard. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct LineProgramHeader::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + encoding: Encoding, + offset: DebugLineOffset, + unit_length: Offset, + + header_length: Offset, + + line_encoding: LineEncoding, + + /// "The number assigned to the first special opcode." + opcode_base: u8, + + /// "This array specifies the number of LEB128 operands for each of the + /// standard opcodes. The first element of the array corresponds to the + /// opcode whose value is 1, and the last element corresponds to the opcode + /// whose value is `opcode_base - 1`." + standard_opcode_lengths: R, + + /// "A sequence of directory entry format descriptions." + directory_entry_format: Vec, + + /// > Entries in this sequence describe each path that was searched for + /// > included source files in this compilation. (The paths include those + /// > directories specified explicitly by the user for the compiler to search + /// > and those the compiler searches without explicit direction.) Each path + /// > entry is either a full path name or is relative to the current directory + /// > of the compilation. + /// > + /// > The last entry is followed by a single null byte. + include_directories: Vec>, + + /// "A sequence of file entry format descriptions." + file_name_entry_format: Vec, + + /// "Entries in this sequence describe source files that contribute to the + /// line number information for this compilation unit or is used in other + /// contexts." + file_names: Vec>, + + /// The encoded line program instructions. + program_buf: R, + + /// The current directory of the compilation. + comp_dir: Option, + + /// The primary source file. + comp_file: Option>, +} + +impl LineProgramHeader +where + R: Reader, + Offset: ReaderOffset, +{ + /// Return the offset of the line number program header in the `.debug_line` section. + pub fn offset(&self) -> DebugLineOffset { + self.offset + } + + /// Return the length of the line number program and header, not including + /// the length of the encoded length itself. + pub fn unit_length(&self) -> R::Offset { + self.unit_length + } + + /// Return the encoding parameters for this header's line program. + pub fn encoding(&self) -> Encoding { + self.encoding + } + + /// Get the version of this header's line program. + pub fn version(&self) -> u16 { + self.encoding.version + } + + /// Get the length of the encoded line number program header, not including + /// the length of the encoded length itself. + pub fn header_length(&self) -> R::Offset { + self.header_length + } + + /// Get the size in bytes of a target machine address. + pub fn address_size(&self) -> u8 { + self.encoding.address_size + } + + /// Whether this line program is encoded in 64- or 32-bit DWARF. + pub fn format(&self) -> Format { + self.encoding.format + } + + /// Get the line encoding parameters for this header's line program. + pub fn line_encoding(&self) -> LineEncoding { + self.line_encoding + } + + /// Get the minimum instruction length any instruction in this header's line + /// program may have. + pub fn minimum_instruction_length(&self) -> u8 { + self.line_encoding.minimum_instruction_length + } + + /// Get the maximum number of operations each instruction in this header's + /// line program may have. + pub fn maximum_operations_per_instruction(&self) -> u8 { + self.line_encoding.maximum_operations_per_instruction + } + + /// Get the default value of the `is_stmt` register for this header's line + /// program. + pub fn default_is_stmt(&self) -> bool { + self.line_encoding.default_is_stmt + } + + /// Get the line base for this header's line program. + pub fn line_base(&self) -> i8 { + self.line_encoding.line_base + } + + /// Get the line range for this header's line program. + pub fn line_range(&self) -> u8 { + self.line_encoding.line_range + } + + /// Get opcode base for this header's line program. + pub fn opcode_base(&self) -> u8 { + self.opcode_base + } + + /// An array of `u8` that specifies the number of LEB128 operands for + /// each of the standard opcodes. + pub fn standard_opcode_lengths(&self) -> &R { + &self.standard_opcode_lengths + } + + /// Get the format of a directory entry. + pub fn directory_entry_format(&self) -> &[FileEntryFormat] { + &self.directory_entry_format[..] + } + + /// Get the set of include directories for this header's line program. + /// + /// For DWARF version <= 4, the compilation's current directory is not included + /// in the return value, but is implicitly considered to be in the set per spec. + pub fn include_directories(&self) -> &[AttributeValue] { + &self.include_directories[..] + } + + /// The include directory with the given directory index. + /// + /// A directory index of 0 corresponds to the compilation unit directory. + pub fn directory(&self, directory: u64) -> Option> { + if self.encoding.version <= 4 { + if directory == 0 { + self.comp_dir.clone().map(AttributeValue::String) + } else { + let directory = directory as usize - 1; + self.include_directories.get(directory).cloned() + } + } else { + self.include_directories.get(directory as usize).cloned() + } + } + + /// Get the format of a file name entry. + pub fn file_name_entry_format(&self) -> &[FileEntryFormat] { + &self.file_name_entry_format[..] + } + + /// Return true if the file entries may have valid timestamps. + /// + /// Only returns false if we definitely know that all timestamp fields + /// are invalid. + pub fn file_has_timestamp(&self) -> bool { + self.encoding.version <= 4 + || self + .file_name_entry_format + .iter() + .any(|x| x.content_type == constants::DW_LNCT_timestamp) + } + + /// Return true if the file entries may have valid sizes. + /// + /// Only returns false if we definitely know that all size fields + /// are invalid. + pub fn file_has_size(&self) -> bool { + self.encoding.version <= 4 + || self + .file_name_entry_format + .iter() + .any(|x| x.content_type == constants::DW_LNCT_size) + } + + /// Return true if the file name entry format contains an MD5 field. + pub fn file_has_md5(&self) -> bool { + self.file_name_entry_format + .iter() + .any(|x| x.content_type == constants::DW_LNCT_MD5) + } + + /// Get the list of source files that appear in this header's line program. + pub fn file_names(&self) -> &[FileEntry] { + &self.file_names[..] + } + + /// The source file with the given file index. + /// + /// A file index of 0 corresponds to the compilation unit file. + /// Note that a file index of 0 is invalid for DWARF version <= 4, + /// but we support it anyway. + pub fn file(&self, file: u64) -> Option<&FileEntry> { + if self.encoding.version <= 4 { + if file == 0 { + self.comp_file.as_ref() + } else { + let file = file as usize - 1; + self.file_names.get(file) + } + } else { + self.file_names.get(file as usize) + } + } + + /// Get the raw, un-parsed `EndianSlice` containing this header's line number + /// program. + /// + /// ``` + /// # fn foo() { + /// use gimli::{LineProgramHeader, EndianSlice, NativeEndian}; + /// + /// fn get_line_number_program_header<'a>() -> LineProgramHeader> { + /// // Get a line number program header from some offset in a + /// // `.debug_line` section... + /// # unimplemented!() + /// } + /// + /// let header = get_line_number_program_header(); + /// let raw_program = header.raw_program_buf(); + /// println!("The length of the raw program in bytes is {}", raw_program.len()); + /// # } + /// ``` + pub fn raw_program_buf(&self) -> R { + self.program_buf.clone() + } + + /// Iterate over the instructions in this header's line number program, parsing + /// them as we go. + pub fn instructions(&self) -> LineInstructions { + LineInstructions { + input: self.program_buf.clone(), + } + } + + fn parse( + input: &mut R, + offset: DebugLineOffset, + mut address_size: u8, + mut comp_dir: Option, + comp_name: Option, + ) -> Result> { + let (unit_length, format) = input.read_initial_length()?; + let rest = &mut input.split(unit_length)?; + + let version = rest.read_u16()?; + if version < 2 || version > 5 { + return Err(Error::UnknownVersion(u64::from(version))); + } + + if version >= 5 { + address_size = rest.read_u8()?; + let segment_selector_size = rest.read_u8()?; + if segment_selector_size != 0 { + return Err(Error::UnsupportedSegmentSize); + } + } + + let encoding = Encoding { + format, + version, + address_size, + }; + + let header_length = rest.read_length(format)?; + + let mut program_buf = rest.clone(); + program_buf.skip(header_length)?; + rest.truncate(header_length)?; + + let minimum_instruction_length = rest.read_u8()?; + if minimum_instruction_length == 0 { + return Err(Error::MinimumInstructionLengthZero); + } + + // This field did not exist before DWARF 4, but is specified to be 1 for + // non-VLIW architectures, which makes it a no-op. + let maximum_operations_per_instruction = if version >= 4 { rest.read_u8()? } else { 1 }; + if maximum_operations_per_instruction == 0 { + return Err(Error::MaximumOperationsPerInstructionZero); + } + + let default_is_stmt = rest.read_u8()? != 0; + let line_base = rest.read_i8()?; + let line_range = rest.read_u8()?; + if line_range == 0 { + return Err(Error::LineRangeZero); + } + let line_encoding = LineEncoding { + minimum_instruction_length, + maximum_operations_per_instruction, + default_is_stmt, + line_base, + line_range, + }; + + let opcode_base = rest.read_u8()?; + if opcode_base == 0 { + return Err(Error::OpcodeBaseZero); + } + + let standard_opcode_count = R::Offset::from_u8(opcode_base - 1); + let standard_opcode_lengths = rest.split(standard_opcode_count)?; + + let directory_entry_format; + let mut include_directories = Vec::new(); + if version <= 4 { + directory_entry_format = Vec::new(); + loop { + let directory = rest.read_null_terminated_slice()?; + if directory.is_empty() { + break; + } + include_directories.push(AttributeValue::String(directory)); + } + } else { + comp_dir = None; + directory_entry_format = FileEntryFormat::parse(rest)?; + let count = rest.read_uleb128()?; + for _ in 0..count { + include_directories.push(parse_directory_v5( + rest, + encoding, + &directory_entry_format, + )?); + } + } + + let comp_file; + let file_name_entry_format; + let mut file_names = Vec::new(); + if version <= 4 { + comp_file = comp_name.map(|name| FileEntry { + path_name: AttributeValue::String(name), + directory_index: 0, + timestamp: 0, + size: 0, + md5: [0; 16], + }); + + file_name_entry_format = Vec::new(); + loop { + let path_name = rest.read_null_terminated_slice()?; + if path_name.is_empty() { + break; + } + file_names.push(FileEntry::parse(rest, path_name)?); + } + } else { + comp_file = None; + file_name_entry_format = FileEntryFormat::parse(rest)?; + let count = rest.read_uleb128()?; + for _ in 0..count { + file_names.push(parse_file_v5(rest, encoding, &file_name_entry_format)?); + } + } + + let header = LineProgramHeader { + encoding, + offset, + unit_length, + header_length, + line_encoding, + opcode_base, + standard_opcode_lengths, + directory_entry_format, + include_directories, + file_name_entry_format, + file_names, + program_buf, + comp_dir, + comp_file, + }; + Ok(header) + } +} + +/// Deprecated. `IncompleteLineNumberProgram` has been renamed to `IncompleteLineProgram`. +#[deprecated( + note = "IncompleteLineNumberProgram has been renamed to IncompleteLineProgram, use that instead." +)] +pub type IncompleteLineNumberProgram = IncompleteLineProgram; + +/// A line number program that has not been run to completion. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct IncompleteLineProgram::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + header: LineProgramHeader, +} + +impl IncompleteLineProgram +where + R: Reader, + Offset: ReaderOffset, +{ + /// Retrieve the `LineProgramHeader` for this program. + pub fn header(&self) -> &LineProgramHeader { + &self.header + } + + /// Construct a new `LineRows` for executing this program to iterate + /// over rows in the line information matrix. + pub fn rows(self) -> OneShotLineRows { + OneShotLineRows::new(self) + } + + /// Execute the line number program, completing the `IncompleteLineProgram` + /// into a `CompleteLineProgram` and producing an array of sequences within + /// the line number program that can later be used with + /// `CompleteLineProgram::resume_from`. + /// + /// ``` + /// # fn foo() { + /// use gimli::{IncompleteLineProgram, EndianSlice, NativeEndian}; + /// + /// fn get_line_number_program<'a>() -> IncompleteLineProgram> { + /// // Get a line number program from some offset in a + /// // `.debug_line` section... + /// # unimplemented!() + /// } + /// + /// let program = get_line_number_program(); + /// let (program, sequences) = program.sequences().unwrap(); + /// println!("There are {} sequences in this line number program", sequences.len()); + /// # } + /// ``` + #[allow(clippy::type_complexity)] + pub fn sequences(self) -> Result<(CompleteLineProgram, Vec>)> { + let mut sequences = Vec::new(); + let mut rows = self.rows(); + let mut instructions = rows.instructions.clone(); + let mut sequence_start_addr = None; + loop { + let sequence_end_addr; + if rows.next_row()?.is_none() { + break; + } + + let row = &rows.row; + if row.end_sequence() { + sequence_end_addr = row.address(); + } else if sequence_start_addr.is_none() { + sequence_start_addr = Some(row.address()); + continue; + } else { + continue; + } + + // We just finished a sequence. + sequences.push(LineSequence { + // In theory one could have multiple DW_LNE_end_sequence instructions + // in a row. + start: sequence_start_addr.unwrap_or(0), + end: sequence_end_addr, + instructions: instructions.remove_trailing(&rows.instructions)?, + }); + sequence_start_addr = None; + instructions = rows.instructions.clone(); + } + + let program = CompleteLineProgram { + header: rows.program.header, + }; + Ok((program, sequences)) + } +} + +/// Deprecated. `CompleteLineNumberProgram` has been renamed to `CompleteLineProgram`. +#[deprecated( + note = "CompleteLineNumberProgram has been renamed to CompleteLineProgram, use that instead." +)] +pub type CompleteLineNumberProgram = CompleteLineProgram; + +/// A line number program that has previously been run to completion. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct CompleteLineProgram::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + header: LineProgramHeader, +} + +impl CompleteLineProgram +where + R: Reader, + Offset: ReaderOffset, +{ + /// Retrieve the `LineProgramHeader` for this program. + pub fn header(&self) -> &LineProgramHeader { + &self.header + } + + /// Construct a new `LineRows` for executing the subset of the line + /// number program identified by 'sequence' and generating the line information + /// matrix. + /// + /// ``` + /// # fn foo() { + /// use gimli::{IncompleteLineProgram, EndianSlice, NativeEndian}; + /// + /// fn get_line_number_program<'a>() -> IncompleteLineProgram> { + /// // Get a line number program from some offset in a + /// // `.debug_line` section... + /// # unimplemented!() + /// } + /// + /// let program = get_line_number_program(); + /// let (program, sequences) = program.sequences().unwrap(); + /// for sequence in &sequences { + /// let mut sm = program.resume_from(sequence); + /// } + /// # } + /// ``` + pub fn resume_from<'program>( + &'program self, + sequence: &LineSequence, + ) -> ResumedLineRows<'program, R, Offset> { + ResumedLineRows::resume(self, sequence) + } +} + +/// An entry in the `LineProgramHeader`'s `file_names` set. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct FileEntry::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + path_name: AttributeValue, + directory_index: u64, + timestamp: u64, + size: u64, + md5: [u8; 16], +} + +impl FileEntry +where + R: Reader, + Offset: ReaderOffset, +{ + // version 2-4 + fn parse(input: &mut R, path_name: R) -> Result> { + let directory_index = input.read_uleb128()?; + let timestamp = input.read_uleb128()?; + let size = input.read_uleb128()?; + + let entry = FileEntry { + path_name: AttributeValue::String(path_name), + directory_index, + timestamp, + size, + md5: [0; 16], + }; + + Ok(entry) + } + + /// > A slice containing the full or relative path name of + /// > a source file. If the entry contains a file name or a relative path + /// > name, the file is located relative to either the compilation directory + /// > (as specified by the DW_AT_comp_dir attribute given in the compilation + /// > unit) or one of the directories in the include_directories section. + pub fn path_name(&self) -> AttributeValue { + self.path_name.clone() + } + + /// > An unsigned LEB128 number representing the directory index of the + /// > directory in which the file was found. + /// > + /// > ... + /// > + /// > The directory index represents an entry in the include_directories + /// > section of the line number program header. The index is 0 if the file + /// > was found in the current directory of the compilation, 1 if it was found + /// > in the first directory in the include_directories section, and so + /// > on. The directory index is ignored for file names that represent full + /// > path names. + pub fn directory_index(&self) -> u64 { + self.directory_index + } + + /// Get this file's directory. + /// + /// A directory index of 0 corresponds to the compilation unit directory. + pub fn directory(&self, header: &LineProgramHeader) -> Option> { + header.directory(self.directory_index) + } + + /// The implementation-defined time of last modification of the file, + /// or 0 if not available. + pub fn timestamp(&self) -> u64 { + self.timestamp + } + + /// "An unsigned LEB128 number representing the time of last modification of + /// the file, or 0 if not available." + // Terminology changed in DWARF version 5. + #[doc(hidden)] + pub fn last_modification(&self) -> u64 { + self.timestamp + } + + /// The size of the file in bytes, or 0 if not available. + pub fn size(&self) -> u64 { + self.size + } + + /// "An unsigned LEB128 number representing the length in bytes of the file, + /// or 0 if not available." + // Terminology changed in DWARF version 5. + #[doc(hidden)] + pub fn length(&self) -> u64 { + self.size + } + + /// A 16-byte MD5 digest of the file contents. + /// + /// Only valid if `LineProgramHeader::file_has_md5` returns `true`. + pub fn md5(&self) -> &[u8; 16] { + &self.md5 + } +} + +/// The format of a component of an include directory or file name entry. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct FileEntryFormat { + /// The type of information that is represented by the component. + pub content_type: constants::DwLnct, + + /// The encoding form of the component value. + pub form: constants::DwForm, +} + +impl FileEntryFormat { + fn parse(input: &mut R) -> Result> { + let format_count = input.read_u8()? as usize; + let mut format = Vec::with_capacity(format_count); + let mut path_count = 0; + for _ in 0..format_count { + let content_type = input.read_uleb128()?; + let content_type = if content_type > u64::from(u16::max_value()) { + constants::DwLnct(u16::max_value()) + } else { + constants::DwLnct(content_type as u16) + }; + if content_type == constants::DW_LNCT_path { + path_count += 1; + } + + let form = constants::DwForm(input.read_uleb128_u16()?); + + format.push(FileEntryFormat { content_type, form }); + } + if path_count != 1 { + return Err(Error::MissingFileEntryFormatPath); + } + Ok(format) + } +} + +fn parse_directory_v5( + input: &mut R, + encoding: Encoding, + formats: &[FileEntryFormat], +) -> Result> { + let mut path_name = None; + + for format in formats { + let value = parse_attribute(input, encoding, format.form)?; + if format.content_type == constants::DW_LNCT_path { + path_name = Some(value); + } + } + + Ok(path_name.unwrap()) +} + +fn parse_file_v5( + input: &mut R, + encoding: Encoding, + formats: &[FileEntryFormat], +) -> Result> { + let mut path_name = None; + let mut directory_index = 0; + let mut timestamp = 0; + let mut size = 0; + let mut md5 = [0; 16]; + + for format in formats { + let value = parse_attribute(input, encoding, format.form)?; + match format.content_type { + constants::DW_LNCT_path => path_name = Some(value), + constants::DW_LNCT_directory_index => { + if let Some(value) = value.udata_value() { + directory_index = value; + } + } + constants::DW_LNCT_timestamp => { + if let Some(value) = value.udata_value() { + timestamp = value; + } + } + constants::DW_LNCT_size => { + if let Some(value) = value.udata_value() { + size = value; + } + } + constants::DW_LNCT_MD5 => { + if let AttributeValue::Block(mut value) = value { + if value.len().into_u64() == 16 { + md5 = value.read_u8_array()?; + } + } + } + // Ignore unknown content types. + _ => {} + } + } + + Ok(FileEntry { + path_name: path_name.unwrap(), + directory_index, + timestamp, + size, + md5, + }) +} + +// TODO: this should be shared with unit::parse_attribute(), but that is hard to do. +fn parse_attribute( + input: &mut R, + encoding: Encoding, + form: constants::DwForm, +) -> Result> { + Ok(match form { + constants::DW_FORM_block1 => { + let len = input.read_u8().map(R::Offset::from_u8)?; + let block = input.split(len)?; + AttributeValue::Block(block) + } + constants::DW_FORM_block2 => { + let len = input.read_u16().map(R::Offset::from_u16)?; + let block = input.split(len)?; + AttributeValue::Block(block) + } + constants::DW_FORM_block4 => { + let len = input.read_u32().map(R::Offset::from_u32)?; + let block = input.split(len)?; + AttributeValue::Block(block) + } + constants::DW_FORM_block => { + let len = input.read_uleb128().and_then(R::Offset::from_u64)?; + let block = input.split(len)?; + AttributeValue::Block(block) + } + constants::DW_FORM_data1 => { + let data = input.read_u8()?; + AttributeValue::Data1(data) + } + constants::DW_FORM_data2 => { + let data = input.read_u16()?; + AttributeValue::Data2(data) + } + constants::DW_FORM_data4 => { + let data = input.read_u32()?; + AttributeValue::Data4(data) + } + constants::DW_FORM_data8 => { + let data = input.read_u64()?; + AttributeValue::Data8(data) + } + constants::DW_FORM_data16 => { + let block = input.split(R::Offset::from_u8(16))?; + AttributeValue::Block(block) + } + constants::DW_FORM_udata => { + let data = input.read_uleb128()?; + AttributeValue::Udata(data) + } + constants::DW_FORM_sdata => { + let data = input.read_sleb128()?; + AttributeValue::Sdata(data) + } + constants::DW_FORM_flag => { + let present = input.read_u8()?; + AttributeValue::Flag(present != 0) + } + constants::DW_FORM_sec_offset => { + let offset = input.read_offset(encoding.format)?; + AttributeValue::SecOffset(offset) + } + constants::DW_FORM_string => { + let string = input.read_null_terminated_slice()?; + AttributeValue::String(string) + } + constants::DW_FORM_strp => { + let offset = input.read_offset(encoding.format)?; + AttributeValue::DebugStrRef(DebugStrOffset(offset)) + } + constants::DW_FORM_strp_sup | constants::DW_FORM_GNU_strp_alt => { + let offset = input.read_offset(encoding.format)?; + AttributeValue::DebugStrRefSup(DebugStrOffset(offset)) + } + constants::DW_FORM_line_strp => { + let offset = input.read_offset(encoding.format)?; + AttributeValue::DebugLineStrRef(DebugLineStrOffset(offset)) + } + constants::DW_FORM_strx | constants::DW_FORM_GNU_str_index => { + let index = input.read_uleb128().and_then(R::Offset::from_u64)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + constants::DW_FORM_strx1 => { + let index = input.read_u8().map(R::Offset::from_u8)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + constants::DW_FORM_strx2 => { + let index = input.read_u16().map(R::Offset::from_u16)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + constants::DW_FORM_strx3 => { + let index = input.read_uint(3).and_then(R::Offset::from_u64)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + constants::DW_FORM_strx4 => { + let index = input.read_u32().map(R::Offset::from_u32)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + _ => { + return Err(Error::UnknownForm(form)); + } + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::constants; + use crate::endianity::LittleEndian; + use crate::read::{EndianSlice, Error}; + use crate::test_util::GimliSectionMethods; + use core::u64; + use core::u8; + use test_assembler::{Endian, Label, LabelMaker, Section}; + + #[test] + fn test_parse_debug_line_32_ok() { + #[rustfmt::skip] + let buf = [ + // 32-bit length = 62. + 0x3e, 0x00, 0x00, 0x00, + // Version. + 0x04, 0x00, + // Header length = 40. + 0x28, 0x00, 0x00, 0x00, + // Minimum instruction length. + 0x01, + // Maximum operations per byte. + 0x01, + // Default is_stmt. + 0x01, + // Line base. + 0x00, + // Line range. + 0x01, + // Opcode base. + 0x03, + // Standard opcode lengths for opcodes 1 .. opcode base - 1. + 0x01, 0x02, + // Include directories = '/', 'i', 'n', 'c', '\0', '/', 'i', 'n', 'c', '2', '\0', '\0' + 0x2f, 0x69, 0x6e, 0x63, 0x00, 0x2f, 0x69, 0x6e, 0x63, 0x32, 0x00, 0x00, + // File names + // foo.rs + 0x66, 0x6f, 0x6f, 0x2e, 0x72, 0x73, 0x00, + 0x00, + 0x00, + 0x00, + // bar.h + 0x62, 0x61, 0x72, 0x2e, 0x68, 0x00, + 0x01, + 0x00, + 0x00, + // End file names. + 0x00, + + // Dummy line program data. + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // Dummy next line program. + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + ]; + + let rest = &mut EndianSlice::new(&buf, LittleEndian); + let comp_dir = EndianSlice::new(b"/comp_dir", LittleEndian); + let comp_name = EndianSlice::new(b"/comp_name", LittleEndian); + + let header = + LineProgramHeader::parse(rest, DebugLineOffset(0), 4, Some(comp_dir), Some(comp_name)) + .expect("should parse header ok"); + + assert_eq!( + *rest, + EndianSlice::new(&buf[buf.len() - 16..], LittleEndian) + ); + + assert_eq!(header.offset, DebugLineOffset(0)); + assert_eq!(header.version(), 4); + assert_eq!(header.minimum_instruction_length(), 1); + assert_eq!(header.maximum_operations_per_instruction(), 1); + assert!(header.default_is_stmt()); + assert_eq!(header.line_base(), 0); + assert_eq!(header.line_range(), 1); + assert_eq!(header.opcode_base(), 3); + assert_eq!(header.directory(0), Some(AttributeValue::String(comp_dir))); + assert_eq!( + header.file(0).unwrap().path_name, + AttributeValue::String(comp_name) + ); + + let expected_lengths = [1, 2]; + assert_eq!(header.standard_opcode_lengths().slice(), &expected_lengths); + + let expected_include_directories = [ + AttributeValue::String(EndianSlice::new(b"/inc", LittleEndian)), + AttributeValue::String(EndianSlice::new(b"/inc2", LittleEndian)), + ]; + assert_eq!(header.include_directories(), &expected_include_directories); + + let expected_file_names = [ + FileEntry { + path_name: AttributeValue::String(EndianSlice::new(b"foo.rs", LittleEndian)), + directory_index: 0, + timestamp: 0, + size: 0, + md5: [0; 16], + }, + FileEntry { + path_name: AttributeValue::String(EndianSlice::new(b"bar.h", LittleEndian)), + directory_index: 1, + timestamp: 0, + size: 0, + md5: [0; 16], + }, + ]; + assert_eq!(header.file_names(), &expected_file_names); + } + + #[test] + fn test_parse_debug_line_header_length_too_short() { + #[rustfmt::skip] + let buf = [ + // 32-bit length = 62. + 0x3e, 0x00, 0x00, 0x00, + // Version. + 0x04, 0x00, + // Header length = 20. TOO SHORT!!! + 0x15, 0x00, 0x00, 0x00, + // Minimum instruction length. + 0x01, + // Maximum operations per byte. + 0x01, + // Default is_stmt. + 0x01, + // Line base. + 0x00, + // Line range. + 0x01, + // Opcode base. + 0x03, + // Standard opcode lengths for opcodes 1 .. opcode base - 1. + 0x01, 0x02, + // Include directories = '/', 'i', 'n', 'c', '\0', '/', 'i', 'n', 'c', '2', '\0', '\0' + 0x2f, 0x69, 0x6e, 0x63, 0x00, 0x2f, 0x69, 0x6e, 0x63, 0x32, 0x00, 0x00, + // File names + // foo.rs + 0x66, 0x6f, 0x6f, 0x2e, 0x72, 0x73, 0x00, + 0x00, + 0x00, + 0x00, + // bar.h + 0x62, 0x61, 0x72, 0x2e, 0x68, 0x00, + 0x01, + 0x00, + 0x00, + // End file names. + 0x00, + + // Dummy line program data. + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // Dummy next line program. + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + ]; + + let input = &mut EndianSlice::new(&buf, LittleEndian); + + match LineProgramHeader::parse(input, DebugLineOffset(0), 4, None, None) { + Err(Error::UnexpectedEof(_)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + } + } + + #[test] + fn test_parse_debug_line_unit_length_too_short() { + #[rustfmt::skip] + let buf = [ + // 32-bit length = 40. TOO SHORT!!! + 0x28, 0x00, 0x00, 0x00, + // Version. + 0x04, 0x00, + // Header length = 40. + 0x28, 0x00, 0x00, 0x00, + // Minimum instruction length. + 0x01, + // Maximum operations per byte. + 0x01, + // Default is_stmt. + 0x01, + // Line base. + 0x00, + // Line range. + 0x01, + // Opcode base. + 0x03, + // Standard opcode lengths for opcodes 1 .. opcode base - 1. + 0x01, 0x02, + // Include directories = '/', 'i', 'n', 'c', '\0', '/', 'i', 'n', 'c', '2', '\0', '\0' + 0x2f, 0x69, 0x6e, 0x63, 0x00, 0x2f, 0x69, 0x6e, 0x63, 0x32, 0x00, 0x00, + // File names + // foo.rs + 0x66, 0x6f, 0x6f, 0x2e, 0x72, 0x73, 0x00, + 0x00, + 0x00, + 0x00, + // bar.h + 0x62, 0x61, 0x72, 0x2e, 0x68, 0x00, + 0x01, + 0x00, + 0x00, + // End file names. + 0x00, + + // Dummy line program data. + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // Dummy next line program. + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + ]; + + let input = &mut EndianSlice::new(&buf, LittleEndian); + + match LineProgramHeader::parse(input, DebugLineOffset(0), 4, None, None) { + Err(Error::UnexpectedEof(_)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + } + } + + const OPCODE_BASE: u8 = 13; + const STANDARD_OPCODE_LENGTHS: &[u8] = &[0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1]; + + fn make_test_header( + buf: EndianSlice<'_, LittleEndian>, + ) -> LineProgramHeader> { + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 8, + }; + let line_encoding = LineEncoding { + line_base: -3, + line_range: 12, + ..Default::default() + }; + LineProgramHeader { + encoding, + offset: DebugLineOffset(0), + unit_length: 1, + header_length: 1, + line_encoding, + opcode_base: OPCODE_BASE, + standard_opcode_lengths: EndianSlice::new(STANDARD_OPCODE_LENGTHS, LittleEndian), + file_names: vec![ + FileEntry { + path_name: AttributeValue::String(EndianSlice::new(b"foo.c", LittleEndian)), + directory_index: 0, + timestamp: 0, + size: 0, + md5: [0; 16], + }, + FileEntry { + path_name: AttributeValue::String(EndianSlice::new(b"bar.rs", LittleEndian)), + directory_index: 0, + timestamp: 0, + size: 0, + md5: [0; 16], + }, + ], + include_directories: vec![], + directory_entry_format: vec![], + file_name_entry_format: vec![], + program_buf: buf, + comp_dir: None, + comp_file: None, + } + } + + fn make_test_program( + buf: EndianSlice<'_, LittleEndian>, + ) -> IncompleteLineProgram> { + IncompleteLineProgram { + header: make_test_header(buf), + } + } + + #[test] + fn test_parse_special_opcodes() { + for i in OPCODE_BASE..u8::MAX { + let input = [i, 0, 0, 0]; + let input = EndianSlice::new(&input, LittleEndian); + let header = make_test_header(input); + + let mut rest = input; + let opcode = + LineInstruction::parse(&header, &mut rest).expect("Should parse the opcode OK"); + + assert_eq!(*rest, *input.range_from(1..)); + assert_eq!(opcode, LineInstruction::Special(i)); + } + } + + #[test] + fn test_parse_standard_opcodes() { + fn test( + raw: constants::DwLns, + operands: Operands, + expected: LineInstruction>, + ) where + Operands: AsRef<[u8]>, + { + let mut input = Vec::new(); + input.push(raw.0); + input.extend_from_slice(operands.as_ref()); + + let expected_rest = [0, 1, 2, 3, 4]; + input.extend_from_slice(&expected_rest); + + let input = EndianSlice::new(&input, LittleEndian); + let header = make_test_header(input); + + let mut rest = input; + let opcode = + LineInstruction::parse(&header, &mut rest).expect("Should parse the opcode OK"); + + assert_eq!(opcode, expected); + assert_eq!(*rest, expected_rest); + } + + test(constants::DW_LNS_copy, [], LineInstruction::Copy); + test( + constants::DW_LNS_advance_pc, + [42], + LineInstruction::AdvancePc(42), + ); + test( + constants::DW_LNS_advance_line, + [9], + LineInstruction::AdvanceLine(9), + ); + test(constants::DW_LNS_set_file, [7], LineInstruction::SetFile(7)); + test( + constants::DW_LNS_set_column, + [1], + LineInstruction::SetColumn(1), + ); + test( + constants::DW_LNS_negate_stmt, + [], + LineInstruction::NegateStatement, + ); + test( + constants::DW_LNS_set_basic_block, + [], + LineInstruction::SetBasicBlock, + ); + test( + constants::DW_LNS_const_add_pc, + [], + LineInstruction::ConstAddPc, + ); + test( + constants::DW_LNS_fixed_advance_pc, + [42, 0], + LineInstruction::FixedAddPc(42), + ); + test( + constants::DW_LNS_set_prologue_end, + [], + LineInstruction::SetPrologueEnd, + ); + test( + constants::DW_LNS_set_isa, + [57 + 0x80, 100], + LineInstruction::SetIsa(12857), + ); + } + + #[test] + fn test_parse_unknown_standard_opcode_no_args() { + let input = [OPCODE_BASE, 1, 2, 3]; + let input = EndianSlice::new(&input, LittleEndian); + let mut standard_opcode_lengths = Vec::new(); + let mut header = make_test_header(input); + standard_opcode_lengths.extend(header.standard_opcode_lengths.slice()); + standard_opcode_lengths.push(0); + header.opcode_base += 1; + header.standard_opcode_lengths = EndianSlice::new(&standard_opcode_lengths, LittleEndian); + + let mut rest = input; + let opcode = + LineInstruction::parse(&header, &mut rest).expect("Should parse the opcode OK"); + + assert_eq!( + opcode, + LineInstruction::UnknownStandard0(constants::DwLns(OPCODE_BASE)) + ); + assert_eq!(*rest, *input.range_from(1..)); + } + + #[test] + fn test_parse_unknown_standard_opcode_one_arg() { + let input = [OPCODE_BASE, 1, 2, 3]; + let input = EndianSlice::new(&input, LittleEndian); + let mut standard_opcode_lengths = Vec::new(); + let mut header = make_test_header(input); + standard_opcode_lengths.extend(header.standard_opcode_lengths.slice()); + standard_opcode_lengths.push(1); + header.opcode_base += 1; + header.standard_opcode_lengths = EndianSlice::new(&standard_opcode_lengths, LittleEndian); + + let mut rest = input; + let opcode = + LineInstruction::parse(&header, &mut rest).expect("Should parse the opcode OK"); + + assert_eq!( + opcode, + LineInstruction::UnknownStandard1(constants::DwLns(OPCODE_BASE), 1) + ); + assert_eq!(*rest, *input.range_from(2..)); + } + + #[test] + fn test_parse_unknown_standard_opcode_many_args() { + let input = [OPCODE_BASE, 1, 2, 3]; + let input = EndianSlice::new(&input, LittleEndian); + let args = input.range_from(1..); + let mut standard_opcode_lengths = Vec::new(); + let mut header = make_test_header(input); + standard_opcode_lengths.extend(header.standard_opcode_lengths.slice()); + standard_opcode_lengths.push(3); + header.opcode_base += 1; + header.standard_opcode_lengths = EndianSlice::new(&standard_opcode_lengths, LittleEndian); + + let mut rest = input; + let opcode = + LineInstruction::parse(&header, &mut rest).expect("Should parse the opcode OK"); + + assert_eq!( + opcode, + LineInstruction::UnknownStandardN(constants::DwLns(OPCODE_BASE), args) + ); + assert_eq!(*rest, []); + } + + #[test] + fn test_parse_extended_opcodes() { + fn test( + raw: constants::DwLne, + operands: Operands, + expected: LineInstruction>, + ) where + Operands: AsRef<[u8]>, + { + let mut input = Vec::new(); + input.push(0); + + let operands = operands.as_ref(); + input.push(1 + operands.len() as u8); + + input.push(raw.0); + input.extend_from_slice(operands); + + let expected_rest = [0, 1, 2, 3, 4]; + input.extend_from_slice(&expected_rest); + + let input = EndianSlice::new(&input, LittleEndian); + let header = make_test_header(input); + + let mut rest = input; + let opcode = + LineInstruction::parse(&header, &mut rest).expect("Should parse the opcode OK"); + + assert_eq!(opcode, expected); + assert_eq!(*rest, expected_rest); + } + + test( + constants::DW_LNE_end_sequence, + [], + LineInstruction::EndSequence, + ); + test( + constants::DW_LNE_set_address, + [1, 2, 3, 4, 5, 6, 7, 8], + LineInstruction::SetAddress(578_437_695_752_307_201), + ); + test( + constants::DW_LNE_set_discriminator, + [42], + LineInstruction::SetDiscriminator(42), + ); + + let mut file = Vec::new(); + // "foo.c" + let path_name = [b'f', b'o', b'o', b'.', b'c', 0]; + file.extend_from_slice(&path_name); + // Directory index. + file.push(0); + // Last modification of file. + file.push(1); + // Size of file. + file.push(2); + + test( + constants::DW_LNE_define_file, + file, + LineInstruction::DefineFile(FileEntry { + path_name: AttributeValue::String(EndianSlice::new(b"foo.c", LittleEndian)), + directory_index: 0, + timestamp: 1, + size: 2, + md5: [0; 16], + }), + ); + + // Unknown extended opcode. + let operands = [1, 2, 3, 4, 5, 6]; + let opcode = constants::DwLne(99); + test( + opcode, + operands, + LineInstruction::UnknownExtended(opcode, EndianSlice::new(&operands, LittleEndian)), + ); + } + + #[test] + fn test_file_entry_directory() { + let path_name = [b'f', b'o', b'o', b'.', b'r', b's', 0]; + + let mut file = FileEntry { + path_name: AttributeValue::String(EndianSlice::new(&path_name, LittleEndian)), + directory_index: 1, + timestamp: 0, + size: 0, + md5: [0; 16], + }; + + let mut header = make_test_header(EndianSlice::new(&[], LittleEndian)); + + let dir = AttributeValue::String(EndianSlice::new(b"dir", LittleEndian)); + header.include_directories.push(dir); + + assert_eq!(file.directory(&header), Some(dir)); + + // Now test the compilation's current directory. + file.directory_index = 0; + assert_eq!(file.directory(&header), None); + } + + fn assert_exec_opcode<'input>( + header: LineProgramHeader>, + mut registers: LineRow, + opcode: LineInstruction>, + expected_registers: LineRow, + expect_new_row: bool, + ) { + let mut program = IncompleteLineProgram { header }; + let is_new_row = registers.execute(opcode, &mut program); + + assert_eq!(is_new_row, expect_new_row); + assert_eq!(registers, expected_registers); + } + + #[test] + fn test_exec_special_noop() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::Special(16); + let expected_registers = initial_registers; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, true); + } + + #[test] + fn test_exec_special_negative_line_advance() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + + let mut initial_registers = LineRow::new(&header); + initial_registers.line.0 = 10; + + let opcode = LineInstruction::Special(13); + + let mut expected_registers = initial_registers; + expected_registers.line.0 -= 3; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, true); + } + + #[test] + fn test_exec_special_positive_line_advance() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + + let initial_registers = LineRow::new(&header); + + let opcode = LineInstruction::Special(19); + + let mut expected_registers = initial_registers; + expected_registers.line.0 += 3; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, true); + } + + #[test] + fn test_exec_special_positive_address_advance() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + + let initial_registers = LineRow::new(&header); + + let opcode = LineInstruction::Special(52); + + let mut expected_registers = initial_registers; + expected_registers.address.0 += 3; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, true); + } + + #[test] + fn test_exec_special_positive_address_and_line_advance() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + + let initial_registers = LineRow::new(&header); + + let opcode = LineInstruction::Special(55); + + let mut expected_registers = initial_registers; + expected_registers.address.0 += 3; + expected_registers.line.0 += 3; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, true); + } + + #[test] + fn test_exec_special_positive_address_and_negative_line_advance() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + + let mut initial_registers = LineRow::new(&header); + initial_registers.line.0 = 10; + + let opcode = LineInstruction::Special(49); + + let mut expected_registers = initial_registers; + expected_registers.address.0 += 3; + expected_registers.line.0 -= 3; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, true); + } + + #[test] + fn test_exec_special_line_underflow() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + + let mut initial_registers = LineRow::new(&header); + initial_registers.line.0 = 2; + + // -3 line advance. + let opcode = LineInstruction::Special(13); + + let mut expected_registers = initial_registers; + // Clamp at 0. No idea if this is the best way to handle this situation + // or not... + expected_registers.line.0 = 0; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, true); + } + + #[test] + fn test_exec_copy() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + + let mut initial_registers = LineRow::new(&header); + initial_registers.address.0 = 1337; + initial_registers.line.0 = 42; + + let opcode = LineInstruction::Copy; + + let expected_registers = initial_registers; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, true); + } + + #[test] + fn test_exec_advance_pc() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::AdvancePc(42); + + let mut expected_registers = initial_registers; + expected_registers.address.0 += 42; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_advance_pc_overflow() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let opcode = LineInstruction::AdvancePc(42); + + let mut initial_registers = LineRow::new(&header); + initial_registers.address.0 = u64::MAX; + + let mut expected_registers = initial_registers; + expected_registers.address.0 = 41; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_advance_line() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::AdvanceLine(42); + + let mut expected_registers = initial_registers; + expected_registers.line.0 += 42; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_advance_line_overflow() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let opcode = LineInstruction::AdvanceLine(42); + + let mut initial_registers = LineRow::new(&header); + initial_registers.line.0 = u64::MAX; + + let mut expected_registers = initial_registers; + expected_registers.line.0 = 41; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_set_file_in_bounds() { + for file_idx in 1..3 { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::SetFile(file_idx); + + let mut expected_registers = initial_registers; + expected_registers.file = file_idx; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + } + + #[test] + fn test_exec_set_file_out_of_bounds() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::SetFile(100); + + // The spec doesn't say anything about rejecting input programs + // that set the file register out of bounds of the actual number + // of files that have been defined. Instead, we cross our + // fingers and hope that one gets defined before + // `LineRow::file` gets called and handle the error at + // that time if need be. + let mut expected_registers = initial_registers; + expected_registers.file = 100; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_file_entry_file_index_out_of_bounds() { + // These indices are 1-based, so 0 is invalid. 100 is way more than the + // number of files defined in the header. + let out_of_bounds_indices = [0, 100]; + + for file_idx in &out_of_bounds_indices[..] { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let mut row = LineRow::new(&header); + + row.file = *file_idx; + + assert_eq!(row.file(&header), None); + } + } + + #[test] + fn test_file_entry_file_index_in_bounds() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let mut row = LineRow::new(&header); + + row.file = 2; + + assert_eq!(row.file(&header), Some(&header.file_names()[1])); + } + + #[test] + fn test_exec_set_column() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::SetColumn(42); + + let mut expected_registers = initial_registers; + expected_registers.column = 42; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_negate_statement() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::NegateStatement; + + let mut expected_registers = initial_registers; + expected_registers.is_stmt = !initial_registers.is_stmt; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_set_basic_block() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + + let mut initial_registers = LineRow::new(&header); + initial_registers.basic_block = false; + + let opcode = LineInstruction::SetBasicBlock; + + let mut expected_registers = initial_registers; + expected_registers.basic_block = true; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_const_add_pc() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::ConstAddPc; + + let mut expected_registers = initial_registers; + expected_registers.address.0 += 20; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_fixed_add_pc() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + + let mut initial_registers = LineRow::new(&header); + initial_registers.op_index.0 = 1; + + let opcode = LineInstruction::FixedAddPc(10); + + let mut expected_registers = initial_registers; + expected_registers.address.0 += 10; + expected_registers.op_index.0 = 0; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_set_prologue_end() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + + let mut initial_registers = LineRow::new(&header); + initial_registers.prologue_end = false; + + let opcode = LineInstruction::SetPrologueEnd; + + let mut expected_registers = initial_registers; + expected_registers.prologue_end = true; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_set_isa() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::SetIsa(1993); + + let mut expected_registers = initial_registers; + expected_registers.isa = 1993; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_unknown_standard_0() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::UnknownStandard0(constants::DwLns(111)); + let expected_registers = initial_registers; + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_unknown_standard_1() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::UnknownStandard1(constants::DwLns(111), 2); + let expected_registers = initial_registers; + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_unknown_standard_n() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::UnknownStandardN( + constants::DwLns(111), + EndianSlice::new(&[2, 2, 2], LittleEndian), + ); + let expected_registers = initial_registers; + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_end_sequence() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::EndSequence; + + let mut expected_registers = initial_registers; + expected_registers.end_sequence = true; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, true); + } + + #[test] + fn test_exec_set_address() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::SetAddress(3030); + + let mut expected_registers = initial_registers; + expected_registers.address.0 = 3030; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_set_address_tombstone() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::SetAddress(!0); + + let mut expected_registers = initial_registers; + expected_registers.tombstone = true; + expected_registers.address.0 = !0; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_define_file() { + let mut program = make_test_program(EndianSlice::new(&[], LittleEndian)); + let mut row = LineRow::new(program.header()); + + let file = FileEntry { + path_name: AttributeValue::String(EndianSlice::new(b"test.cpp", LittleEndian)), + directory_index: 0, + timestamp: 0, + size: 0, + md5: [0; 16], + }; + + let opcode = LineInstruction::DefineFile(file); + let is_new_row = row.execute(opcode, &mut program); + + assert!(!is_new_row); + assert_eq!(Some(&file), program.header().file_names.last()); + } + + #[test] + fn test_exec_set_discriminator() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::SetDiscriminator(9); + + let mut expected_registers = initial_registers; + expected_registers.discriminator = 9; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + #[test] + fn test_exec_unknown_extended() { + let header = make_test_header(EndianSlice::new(&[], LittleEndian)); + let initial_registers = LineRow::new(&header); + let opcode = LineInstruction::UnknownExtended( + constants::DwLne(74), + EndianSlice::new(&[], LittleEndian), + ); + let expected_registers = initial_registers; + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); + } + + /// Ensure that `LineRows` is covariant wrt R. + /// This only needs to compile. + #[allow(dead_code, unreachable_code, unused_variables)] + #[allow(clippy::diverging_sub_expression)] + fn test_line_rows_variance<'a, 'b>(_: &'a [u8], _: &'b [u8]) + where + 'a: 'b, + { + let a: &OneShotLineRows> = unimplemented!(); + let _: &OneShotLineRows> = a; + } + + #[test] + fn test_parse_debug_line_v5_ok() { + let expected_lengths = &[1, 2]; + let expected_program = &[0, 1, 2, 3, 4]; + let expected_rest = &[5, 6, 7, 8, 9]; + let expected_include_directories = [ + AttributeValue::String(EndianSlice::new(b"dir1", LittleEndian)), + AttributeValue::String(EndianSlice::new(b"dir2", LittleEndian)), + ]; + let expected_file_names = [ + FileEntry { + path_name: AttributeValue::String(EndianSlice::new(b"file1", LittleEndian)), + directory_index: 0, + timestamp: 0, + size: 0, + md5: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + }, + FileEntry { + path_name: AttributeValue::String(EndianSlice::new(b"file2", LittleEndian)), + directory_index: 1, + timestamp: 0, + size: 0, + md5: [ + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + ], + }, + ]; + + for format in [Format::Dwarf32, Format::Dwarf64] { + let length = Label::new(); + let header_length = Label::new(); + let start = Label::new(); + let header_start = Label::new(); + let end = Label::new(); + let header_end = Label::new(); + let section = Section::with_endian(Endian::Little) + .initial_length(format, &length, &start) + .D16(5) + // Address size. + .D8(4) + // Segment selector size. + .D8(0) + .word_label(format.word_size(), &header_length) + .mark(&header_start) + // Minimum instruction length. + .D8(1) + // Maximum operations per byte. + .D8(1) + // Default is_stmt. + .D8(1) + // Line base. + .D8(0) + // Line range. + .D8(1) + // Opcode base. + .D8(expected_lengths.len() as u8 + 1) + // Standard opcode lengths for opcodes 1 .. opcode base - 1. + .append_bytes(expected_lengths) + // Directory entry format count. + .D8(1) + .uleb(constants::DW_LNCT_path.0 as u64) + .uleb(constants::DW_FORM_string.0 as u64) + // Directory count. + .D8(2) + .append_bytes(b"dir1\0") + .append_bytes(b"dir2\0") + // File entry format count. + .D8(3) + .uleb(constants::DW_LNCT_path.0 as u64) + .uleb(constants::DW_FORM_string.0 as u64) + .uleb(constants::DW_LNCT_directory_index.0 as u64) + .uleb(constants::DW_FORM_data1.0 as u64) + .uleb(constants::DW_LNCT_MD5.0 as u64) + .uleb(constants::DW_FORM_data16.0 as u64) + // File count. + .D8(2) + .append_bytes(b"file1\0") + .D8(0) + .append_bytes(&expected_file_names[0].md5) + .append_bytes(b"file2\0") + .D8(1) + .append_bytes(&expected_file_names[1].md5) + .mark(&header_end) + // Dummy line program data. + .append_bytes(expected_program) + .mark(&end) + // Dummy trailing data. + .append_bytes(expected_rest); + length.set_const((&end - &start) as u64); + header_length.set_const((&header_end - &header_start) as u64); + let section = section.get_contents().unwrap(); + + let input = &mut EndianSlice::new(§ion, LittleEndian); + + let header = LineProgramHeader::parse(input, DebugLineOffset(0), 0, None, None) + .expect("should parse header ok"); + + assert_eq!(header.raw_program_buf().slice(), expected_program); + assert_eq!(input.slice(), expected_rest); + + assert_eq!(header.offset, DebugLineOffset(0)); + assert_eq!(header.version(), 5); + assert_eq!(header.address_size(), 4); + assert_eq!(header.minimum_instruction_length(), 1); + assert_eq!(header.maximum_operations_per_instruction(), 1); + assert!(header.default_is_stmt()); + assert_eq!(header.line_base(), 0); + assert_eq!(header.line_range(), 1); + assert_eq!(header.opcode_base(), expected_lengths.len() as u8 + 1); + assert_eq!(header.standard_opcode_lengths().slice(), expected_lengths); + assert_eq!( + header.directory_entry_format(), + &[FileEntryFormat { + content_type: constants::DW_LNCT_path, + form: constants::DW_FORM_string, + }] + ); + assert_eq!(header.include_directories(), expected_include_directories); + assert_eq!(header.directory(0), Some(expected_include_directories[0])); + assert_eq!( + header.file_name_entry_format(), + &[ + FileEntryFormat { + content_type: constants::DW_LNCT_path, + form: constants::DW_FORM_string, + }, + FileEntryFormat { + content_type: constants::DW_LNCT_directory_index, + form: constants::DW_FORM_data1, + }, + FileEntryFormat { + content_type: constants::DW_LNCT_MD5, + form: constants::DW_FORM_data16, + } + ] + ); + assert_eq!(header.file_names(), expected_file_names); + assert_eq!(header.file(0), Some(&expected_file_names[0])); + } + } + + #[test] + fn test_sequences() { + #[rustfmt::skip] + let buf = [ + // 32-bit length + 94, 0x00, 0x00, 0x00, + // Version. + 0x04, 0x00, + // Header length = 40. + 0x28, 0x00, 0x00, 0x00, + // Minimum instruction length. + 0x01, + // Maximum operations per byte. + 0x01, + // Default is_stmt. + 0x01, + // Line base. + 0x00, + // Line range. + 0x01, + // Opcode base. + 0x03, + // Standard opcode lengths for opcodes 1 .. opcode base - 1. + 0x01, 0x02, + // Include directories = '/', 'i', 'n', 'c', '\0', '/', 'i', 'n', 'c', '2', '\0', '\0' + 0x2f, 0x69, 0x6e, 0x63, 0x00, 0x2f, 0x69, 0x6e, 0x63, 0x32, 0x00, 0x00, + // File names + // foo.rs + 0x66, 0x6f, 0x6f, 0x2e, 0x72, 0x73, 0x00, + 0x00, + 0x00, + 0x00, + // bar.h + 0x62, 0x61, 0x72, 0x2e, 0x68, 0x00, + 0x01, + 0x00, + 0x00, + // End file names. + 0x00, + + 0, 5, constants::DW_LNE_set_address.0, 1, 0, 0, 0, + constants::DW_LNS_copy.0, + constants::DW_LNS_advance_pc.0, 1, + constants::DW_LNS_copy.0, + constants::DW_LNS_advance_pc.0, 2, + 0, 1, constants::DW_LNE_end_sequence.0, + + // Tombstone + 0, 5, constants::DW_LNE_set_address.0, 0xff, 0xff, 0xff, 0xff, + constants::DW_LNS_copy.0, + constants::DW_LNS_advance_pc.0, 1, + constants::DW_LNS_copy.0, + constants::DW_LNS_advance_pc.0, 2, + 0, 1, constants::DW_LNE_end_sequence.0, + + 0, 5, constants::DW_LNE_set_address.0, 11, 0, 0, 0, + constants::DW_LNS_copy.0, + constants::DW_LNS_advance_pc.0, 1, + constants::DW_LNS_copy.0, + constants::DW_LNS_advance_pc.0, 2, + 0, 1, constants::DW_LNE_end_sequence.0, + ]; + assert_eq!(buf[0] as usize, buf.len() - 4); + + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + let header = LineProgramHeader::parse(rest, DebugLineOffset(0), 4, None, None) + .expect("should parse header ok"); + let program = IncompleteLineProgram { header }; + + let sequences = program.sequences().unwrap().1; + assert_eq!(sequences.len(), 2); + assert_eq!(sequences[0].start, 1); + assert_eq!(sequences[0].end, 4); + assert_eq!(sequences[1].start, 11); + assert_eq!(sequences[1].end, 14); + } +} diff --git a/third_party/rust/gimli/src/read/lists.rs b/third_party/rust/gimli/src/read/lists.rs new file mode 100644 index 000000000000..898a757d379b --- /dev/null +++ b/third_party/rust/gimli/src/read/lists.rs @@ -0,0 +1,68 @@ +use crate::common::{Encoding, Format}; +use crate::read::{Error, Reader, Result}; + +#[derive(Debug, Clone, Copy)] +pub(crate) struct ListsHeader { + encoding: Encoding, + #[allow(dead_code)] + offset_entry_count: u32, +} + +impl Default for ListsHeader { + fn default() -> Self { + ListsHeader { + encoding: Encoding { + format: Format::Dwarf32, + version: 5, + address_size: 0, + }, + offset_entry_count: 0, + } + } +} + +impl ListsHeader { + /// Return the serialized size of the table header. + #[allow(dead_code)] + #[inline] + fn size(self) -> u8 { + // initial_length + version + address_size + segment_selector_size + offset_entry_count + ListsHeader::size_for_encoding(self.encoding) + } + + /// Return the serialized size of the table header. + #[inline] + pub(crate) fn size_for_encoding(encoding: Encoding) -> u8 { + // initial_length + version + address_size + segment_selector_size + offset_entry_count + encoding.format.initial_length_size() + 2 + 1 + 1 + 4 + } +} + +// TODO: add an iterator over headers in the appropriate sections section +#[allow(dead_code)] +fn parse_header(input: &mut R) -> Result { + let (length, format) = input.read_initial_length()?; + input.truncate(length)?; + + let version = input.read_u16()?; + if version != 5 { + return Err(Error::UnknownVersion(u64::from(version))); + } + + let address_size = input.read_u8()?; + let segment_selector_size = input.read_u8()?; + if segment_selector_size != 0 { + return Err(Error::UnsupportedSegmentSize); + } + let offset_entry_count = input.read_u32()?; + + let encoding = Encoding { + format, + version, + address_size, + }; + Ok(ListsHeader { + encoding, + offset_entry_count, + }) +} diff --git a/third_party/rust/gimli/src/read/loclists.rs b/third_party/rust/gimli/src/read/loclists.rs new file mode 100644 index 000000000000..66fd0eb9ae4b --- /dev/null +++ b/third_party/rust/gimli/src/read/loclists.rs @@ -0,0 +1,1640 @@ +use crate::common::{ + DebugAddrBase, DebugAddrIndex, DebugLocListsBase, DebugLocListsIndex, DwarfFileType, Encoding, + LocationListsOffset, SectionId, +}; +use crate::constants; +use crate::endianity::Endianity; +use crate::read::{ + lists::ListsHeader, DebugAddr, EndianSlice, Error, Expression, Range, RawRange, Reader, + ReaderOffset, ReaderOffsetId, Result, Section, +}; + +/// The raw contents of the `.debug_loc` section. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugLoc { + pub(crate) section: R, +} + +impl<'input, Endian> DebugLoc> +where + Endian: Endianity, +{ + /// Construct a new `DebugLoc` instance from the data in the `.debug_loc` + /// section. + /// + /// It is the caller's responsibility to read the `.debug_loc` section and + /// present it as a `&[u8]` slice. That means using some ELF loader on + /// Linux, a Mach-O loader on macOS, etc. + /// + /// ``` + /// use gimli::{DebugLoc, LittleEndian}; + /// + /// # let buf = [0x00, 0x01, 0x02, 0x03]; + /// # let read_debug_loc_section_somehow = || &buf; + /// let debug_loc = DebugLoc::new(read_debug_loc_section_somehow(), LittleEndian); + /// ``` + pub fn new(section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(section, endian)) + } +} + +impl DebugLoc { + /// Create a `DebugLoc` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfSections::borrow`. + pub(crate) fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugLoc + where + F: FnMut(&'a T) -> R, + { + borrow(&self.section).into() + } +} + +impl Section for DebugLoc { + fn id() -> SectionId { + SectionId::DebugLoc + } + + fn reader(&self) -> &R { + &self.section + } +} + +impl From for DebugLoc { + fn from(section: R) -> Self { + DebugLoc { section } + } +} + +/// The `DebugLocLists` struct represents the DWARF data +/// found in the `.debug_loclists` section. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugLocLists { + section: R, +} + +impl<'input, Endian> DebugLocLists> +where + Endian: Endianity, +{ + /// Construct a new `DebugLocLists` instance from the data in the `.debug_loclists` + /// section. + /// + /// It is the caller's responsibility to read the `.debug_loclists` section and + /// present it as a `&[u8]` slice. That means using some ELF loader on + /// Linux, a Mach-O loader on macOS, etc. + /// + /// ``` + /// use gimli::{DebugLocLists, LittleEndian}; + /// + /// # let buf = [0x00, 0x01, 0x02, 0x03]; + /// # let read_debug_loclists_section_somehow = || &buf; + /// let debug_loclists = DebugLocLists::new(read_debug_loclists_section_somehow(), LittleEndian); + /// ``` + pub fn new(section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(section, endian)) + } +} + +impl DebugLocLists { + /// Create a `DebugLocLists` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfSections::borrow`. + pub(crate) fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugLocLists + where + F: FnMut(&'a T) -> R, + { + borrow(&self.section).into() + } +} + +impl Section for DebugLocLists { + fn id() -> SectionId { + SectionId::DebugLocLists + } + + fn reader(&self) -> &R { + &self.section + } +} + +impl From for DebugLocLists { + fn from(section: R) -> Self { + DebugLocLists { section } + } +} + +pub(crate) type LocListsHeader = ListsHeader; + +impl DebugLocListsBase +where + Offset: ReaderOffset, +{ + /// Returns a `DebugLocListsBase` with the default value of DW_AT_loclists_base + /// for the given `Encoding` and `DwarfFileType`. + pub fn default_for_encoding_and_file( + encoding: Encoding, + file_type: DwarfFileType, + ) -> DebugLocListsBase { + if encoding.version >= 5 && file_type == DwarfFileType::Dwo { + // In .dwo files, the compiler omits the DW_AT_loclists_base attribute (because there is + // only a single unit in the file) but we must skip past the header, which the attribute + // would normally do for us. + DebugLocListsBase(Offset::from_u8(LocListsHeader::size_for_encoding(encoding))) + } else { + DebugLocListsBase(Offset::from_u8(0)) + } + } +} + +/// The DWARF data found in `.debug_loc` and `.debug_loclists` sections. +#[derive(Debug, Default, Clone, Copy)] +pub struct LocationLists { + debug_loc: DebugLoc, + debug_loclists: DebugLocLists, +} + +impl LocationLists { + /// Construct a new `LocationLists` instance from the data in the `.debug_loc` and + /// `.debug_loclists` sections. + pub fn new(debug_loc: DebugLoc, debug_loclists: DebugLocLists) -> LocationLists { + LocationLists { + debug_loc, + debug_loclists, + } + } +} + +impl LocationLists { + /// Create a `LocationLists` that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `Dwarf::borrow`. + pub fn borrow<'a, F, R>(&'a self, mut borrow: F) -> LocationLists + where + F: FnMut(&'a T) -> R, + { + LocationLists { + debug_loc: borrow(&self.debug_loc.section).into(), + debug_loclists: borrow(&self.debug_loclists.section).into(), + } + } +} + +impl LocationLists { + /// Iterate over the `LocationListEntry`s starting at the given offset. + /// + /// The `unit_encoding` must match the compilation unit that the + /// offset was contained in. + /// + /// The `base_address` should be obtained from the `DW_AT_low_pc` attribute in the + /// `DW_TAG_compile_unit` entry for the compilation unit that contains this location + /// list. + /// + /// Can be [used with + /// `FallibleIterator`](./index.html#using-with-fallibleiterator). + pub fn locations( + &self, + offset: LocationListsOffset, + unit_encoding: Encoding, + base_address: u64, + debug_addr: &DebugAddr, + debug_addr_base: DebugAddrBase, + ) -> Result> { + Ok(LocListIter::new( + self.raw_locations(offset, unit_encoding)?, + base_address, + debug_addr.clone(), + debug_addr_base, + )) + } + + /// Similar to `locations`, but with special handling for .dwo files. + /// This should only been used when this `LocationLists` was loaded from a + /// .dwo file. + pub fn locations_dwo( + &self, + offset: LocationListsOffset, + unit_encoding: Encoding, + base_address: u64, + debug_addr: &DebugAddr, + debug_addr_base: DebugAddrBase, + ) -> Result> { + Ok(LocListIter::new( + self.raw_locations_dwo(offset, unit_encoding)?, + base_address, + debug_addr.clone(), + debug_addr_base, + )) + } + + /// Iterate over the raw `LocationListEntry`s starting at the given offset. + /// + /// The `unit_encoding` must match the compilation unit that the + /// offset was contained in. + /// + /// This iterator does not perform any processing of the location entries, + /// such as handling base addresses. + /// + /// Can be [used with + /// `FallibleIterator`](./index.html#using-with-fallibleiterator). + pub fn raw_locations( + &self, + offset: LocationListsOffset, + unit_encoding: Encoding, + ) -> Result> { + let (mut input, format) = if unit_encoding.version <= 4 { + (self.debug_loc.section.clone(), LocListsFormat::Bare) + } else { + (self.debug_loclists.section.clone(), LocListsFormat::Lle) + }; + input.skip(offset.0)?; + Ok(RawLocListIter::new(input, unit_encoding, format)) + } + + /// Similar to `raw_locations`, but with special handling for .dwo files. + /// This should only been used when this `LocationLists` was loaded from a + /// .dwo file. + pub fn raw_locations_dwo( + &self, + offset: LocationListsOffset, + unit_encoding: Encoding, + ) -> Result> { + let mut input = if unit_encoding.version <= 4 { + // In the GNU split dwarf extension the locations are present in the + // .debug_loc section but are encoded with the DW_LLE values used + // for the DWARF 5 .debug_loclists section. + self.debug_loc.section.clone() + } else { + self.debug_loclists.section.clone() + }; + input.skip(offset.0)?; + Ok(RawLocListIter::new( + input, + unit_encoding, + LocListsFormat::Lle, + )) + } + + /// Returns the `.debug_loclists` offset at the given `base` and `index`. + /// + /// The `base` must be the `DW_AT_loclists_base` value from the compilation unit DIE. + /// This is an offset that points to the first entry following the header. + /// + /// The `index` is the value of a `DW_FORM_loclistx` attribute. + pub fn get_offset( + &self, + unit_encoding: Encoding, + base: DebugLocListsBase, + index: DebugLocListsIndex, + ) -> Result> { + let format = unit_encoding.format; + let input = &mut self.debug_loclists.section.clone(); + input.skip(base.0)?; + input.skip(R::Offset::from_u64( + index.0.into_u64() * u64::from(format.word_size()), + )?)?; + input + .read_offset(format) + .map(|x| LocationListsOffset(base.0 + x)) + } + + /// Call `Reader::lookup_offset_id` for each section, and return the first match. + pub fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option<(SectionId, R::Offset)> { + self.debug_loc + .lookup_offset_id(id) + .or_else(|| self.debug_loclists.lookup_offset_id(id)) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum LocListsFormat { + /// The bare location list format used before DWARF 5. + Bare, + /// The DW_LLE encoded range list format used in DWARF 5 and the non-standard GNU + /// split dwarf extension. + Lle, +} + +/// A raw iterator over a location list. +/// +/// This iterator does not perform any processing of the location entries, +/// such as handling base addresses. +#[derive(Debug)] +pub struct RawLocListIter { + input: R, + encoding: Encoding, + format: LocListsFormat, +} + +/// A raw entry in .debug_loclists. +#[derive(Clone, Debug)] +pub enum RawLocListEntry { + /// A location from DWARF version <= 4. + AddressOrOffsetPair { + /// Start of range. May be an address or an offset. + begin: u64, + /// End of range. May be an address or an offset. + end: u64, + /// expression + data: Expression, + }, + /// DW_LLE_base_address + BaseAddress { + /// base address + addr: u64, + }, + /// DW_LLE_base_addressx + BaseAddressx { + /// base address + addr: DebugAddrIndex, + }, + /// DW_LLE_startx_endx + StartxEndx { + /// start of range + begin: DebugAddrIndex, + /// end of range + end: DebugAddrIndex, + /// expression + data: Expression, + }, + /// DW_LLE_startx_length + StartxLength { + /// start of range + begin: DebugAddrIndex, + /// length of range + length: u64, + /// expression + data: Expression, + }, + /// DW_LLE_offset_pair + OffsetPair { + /// start of range + begin: u64, + /// end of range + end: u64, + /// expression + data: Expression, + }, + /// DW_LLE_default_location + DefaultLocation { + /// expression + data: Expression, + }, + /// DW_LLE_start_end + StartEnd { + /// start of range + begin: u64, + /// end of range + end: u64, + /// expression + data: Expression, + }, + /// DW_LLE_start_length + StartLength { + /// start of range + begin: u64, + /// length of range + length: u64, + /// expression + data: Expression, + }, +} + +fn parse_data(input: &mut R, encoding: Encoding) -> Result> { + if encoding.version >= 5 { + let len = R::Offset::from_u64(input.read_uleb128()?)?; + Ok(Expression(input.split(len)?)) + } else { + // In the GNU split-dwarf extension this is a fixed 2 byte value. + let len = R::Offset::from_u16(input.read_u16()?); + Ok(Expression(input.split(len)?)) + } +} + +impl RawLocListEntry { + /// Parse a location list entry from `.debug_loclists` + fn parse(input: &mut R, encoding: Encoding, format: LocListsFormat) -> Result> { + Ok(match format { + LocListsFormat::Bare => { + let range = RawRange::parse(input, encoding.address_size)?; + if range.is_end() { + None + } else if range.is_base_address(encoding.address_size) { + Some(RawLocListEntry::BaseAddress { addr: range.end }) + } else { + let len = R::Offset::from_u16(input.read_u16()?); + let data = Expression(input.split(len)?); + Some(RawLocListEntry::AddressOrOffsetPair { + begin: range.begin, + end: range.end, + data, + }) + } + } + LocListsFormat::Lle => match constants::DwLle(input.read_u8()?) { + constants::DW_LLE_end_of_list => None, + constants::DW_LLE_base_addressx => Some(RawLocListEntry::BaseAddressx { + addr: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + }), + constants::DW_LLE_startx_endx => Some(RawLocListEntry::StartxEndx { + begin: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + end: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + data: parse_data(input, encoding)?, + }), + constants::DW_LLE_startx_length => Some(RawLocListEntry::StartxLength { + begin: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + length: if encoding.version >= 5 { + input.read_uleb128()? + } else { + // In the GNU split-dwarf extension this is a fixed 4 byte value. + input.read_u32()? as u64 + }, + data: parse_data(input, encoding)?, + }), + constants::DW_LLE_offset_pair => Some(RawLocListEntry::OffsetPair { + begin: input.read_uleb128()?, + end: input.read_uleb128()?, + data: parse_data(input, encoding)?, + }), + constants::DW_LLE_default_location => Some(RawLocListEntry::DefaultLocation { + data: parse_data(input, encoding)?, + }), + constants::DW_LLE_base_address => Some(RawLocListEntry::BaseAddress { + addr: input.read_address(encoding.address_size)?, + }), + constants::DW_LLE_start_end => Some(RawLocListEntry::StartEnd { + begin: input.read_address(encoding.address_size)?, + end: input.read_address(encoding.address_size)?, + data: parse_data(input, encoding)?, + }), + constants::DW_LLE_start_length => Some(RawLocListEntry::StartLength { + begin: input.read_address(encoding.address_size)?, + length: input.read_uleb128()?, + data: parse_data(input, encoding)?, + }), + entry => { + return Err(Error::UnknownLocListsEntry(entry)); + } + }, + }) + } +} + +impl RawLocListIter { + /// Construct a `RawLocListIter`. + fn new(input: R, encoding: Encoding, format: LocListsFormat) -> RawLocListIter { + RawLocListIter { + input, + encoding, + format, + } + } + + /// Advance the iterator to the next location. + pub fn next(&mut self) -> Result>> { + if self.input.is_empty() { + return Ok(None); + } + + match RawLocListEntry::parse(&mut self.input, self.encoding, self.format) { + Ok(entry) => { + if entry.is_none() { + self.input.empty(); + } + Ok(entry) + } + Err(e) => { + self.input.empty(); + Err(e) + } + } + } +} + +#[cfg(feature = "fallible-iterator")] +impl fallible_iterator::FallibleIterator for RawLocListIter { + type Item = RawLocListEntry; + type Error = Error; + + fn next(&mut self) -> ::core::result::Result, Self::Error> { + RawLocListIter::next(self) + } +} + +/// An iterator over a location list. +/// +/// This iterator internally handles processing of base address selection entries +/// and list end entries. Thus, it only returns location entries that are valid +/// and already adjusted for the base address. +#[derive(Debug)] +pub struct LocListIter { + raw: RawLocListIter, + base_address: u64, + debug_addr: DebugAddr, + debug_addr_base: DebugAddrBase, +} + +impl LocListIter { + /// Construct a `LocListIter`. + fn new( + raw: RawLocListIter, + base_address: u64, + debug_addr: DebugAddr, + debug_addr_base: DebugAddrBase, + ) -> LocListIter { + LocListIter { + raw, + base_address, + debug_addr, + debug_addr_base, + } + } + + #[inline] + fn get_address(&self, index: DebugAddrIndex) -> Result { + self.debug_addr + .get_address(self.raw.encoding.address_size, self.debug_addr_base, index) + } + + /// Advance the iterator to the next location. + pub fn next(&mut self) -> Result>> { + loop { + let raw_loc = match self.raw.next()? { + Some(loc) => loc, + None => return Ok(None), + }; + + let loc = self.convert_raw(raw_loc)?; + if loc.is_some() { + return Ok(loc); + } + } + } + + /// Return the next raw location. + /// + /// The raw location should be passed to `convert_raw`. + #[doc(hidden)] + pub fn next_raw(&mut self) -> Result>> { + self.raw.next() + } + + /// Convert a raw location into a location, and update the state of the iterator. + /// + /// The raw location should have been obtained from `next_raw`. + #[doc(hidden)] + pub fn convert_raw( + &mut self, + raw_loc: RawLocListEntry, + ) -> Result>> { + let mask = !0 >> (64 - self.raw.encoding.address_size * 8); + let tombstone = if self.raw.encoding.version <= 4 { + mask - 1 + } else { + mask + }; + + let (range, data) = match raw_loc { + RawLocListEntry::BaseAddress { addr } => { + self.base_address = addr; + return Ok(None); + } + RawLocListEntry::BaseAddressx { addr } => { + self.base_address = self.get_address(addr)?; + return Ok(None); + } + RawLocListEntry::StartxEndx { begin, end, data } => { + let begin = self.get_address(begin)?; + let end = self.get_address(end)?; + (Range { begin, end }, data) + } + RawLocListEntry::StartxLength { + begin, + length, + data, + } => { + let begin = self.get_address(begin)?; + let end = begin.wrapping_add(length) & mask; + (Range { begin, end }, data) + } + RawLocListEntry::DefaultLocation { data } => ( + Range { + begin: 0, + end: u64::max_value(), + }, + data, + ), + RawLocListEntry::AddressOrOffsetPair { begin, end, data } + | RawLocListEntry::OffsetPair { begin, end, data } => { + if self.base_address == tombstone { + return Ok(None); + } + let mut range = Range { begin, end }; + range.add_base_address(self.base_address, self.raw.encoding.address_size); + (range, data) + } + RawLocListEntry::StartEnd { begin, end, data } => (Range { begin, end }, data), + RawLocListEntry::StartLength { + begin, + length, + data, + } => { + let end = begin.wrapping_add(length) & mask; + (Range { begin, end }, data) + } + }; + + if range.begin == tombstone || range.begin > range.end { + return Ok(None); + } + + Ok(Some(LocationListEntry { range, data })) + } +} + +#[cfg(feature = "fallible-iterator")] +impl fallible_iterator::FallibleIterator for LocListIter { + type Item = LocationListEntry; + type Error = Error; + + fn next(&mut self) -> ::core::result::Result, Self::Error> { + LocListIter::next(self) + } +} + +/// A location list entry from the `.debug_loc` or `.debug_loclists` sections. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct LocationListEntry { + /// The address range that this location is valid for. + pub range: Range, + + /// The data containing a single location description. + pub data: Expression, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::common::Format; + use crate::endianity::LittleEndian; + use crate::read::{EndianSlice, Range}; + use crate::test_util::GimliSectionMethods; + use test_assembler::{Endian, Label, LabelMaker, Section}; + + #[test] + fn test_loclists_32() { + let tombstone = !0u32; + let encoding = Encoding { + format: Format::Dwarf32, + version: 5, + address_size: 4, + }; + + let section = Section::with_endian(Endian::Little) + .L32(0x0300_0000) + .L32(0x0301_0300) + .L32(0x0301_0400) + .L32(0x0301_0500) + .L32(tombstone) + .L32(0x0301_0600); + let buf = section.get_contents().unwrap(); + let debug_addr = &DebugAddr::from(EndianSlice::new(&buf, LittleEndian)); + let debug_addr_base = DebugAddrBase(0); + + let start = Label::new(); + let first = Label::new(); + let size = Label::new(); + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + // Header + .mark(&start) + .L32(&size) + .L16(encoding.version) + .L8(encoding.address_size) + .L8(0) + .L32(0) + .mark(&first) + // OffsetPair + .L8(4).uleb(0x10200).uleb(0x10300).uleb(4).L32(2) + // A base address selection followed by an OffsetPair. + .L8(6).L32(0x0200_0000) + .L8(4).uleb(0x10400).uleb(0x10500).uleb(4).L32(3) + // An empty OffsetPair followed by a normal OffsetPair. + .L8(4).uleb(0x10600).uleb(0x10600).uleb(4).L32(4) + .L8(4).uleb(0x10800).uleb(0x10900).uleb(4).L32(5) + // A StartEnd + .L8(7).L32(0x201_0a00).L32(0x201_0b00).uleb(4).L32(6) + // A StartLength + .L8(8).L32(0x201_0c00).uleb(0x100).uleb(4).L32(7) + // An OffsetPair that starts at 0. + .L8(4).uleb(0).uleb(1).uleb(4).L32(8) + // An OffsetPair that ends at -1. + .L8(6).L32(0) + .L8(4).uleb(0).uleb(0xffff_ffff).uleb(4).L32(9) + // A DefaultLocation + .L8(5).uleb(4).L32(10) + // A BaseAddressx + OffsetPair + .L8(1).uleb(0) + .L8(4).uleb(0x10100).uleb(0x10200).uleb(4).L32(11) + // A StartxEndx + .L8(2).uleb(1).uleb(2).uleb(4).L32(12) + // A StartxLength + .L8(3).uleb(3).uleb(0x100).uleb(4).L32(13) + + // Tombstone entries, all of which should be ignored. + // A BaseAddressx that is a tombstone. + .L8(1).uleb(4) + .L8(4).uleb(0x11100).uleb(0x11200).uleb(4).L32(20) + // A BaseAddress that is a tombstone. + .L8(6).L32(tombstone) + .L8(4).uleb(0x11300).uleb(0x11400).uleb(4).L32(21) + // A StartxEndx that is a tombstone. + .L8(2).uleb(4).uleb(5).uleb(4).L32(22) + // A StartxLength that is a tombstone. + .L8(3).uleb(4).uleb(0x100).uleb(4).L32(23) + // A StartEnd that is a tombstone. + .L8(7).L32(tombstone).L32(0x201_1500).uleb(4).L32(24) + // A StartLength that is a tombstone. + .L8(8).L32(tombstone).uleb(0x100).uleb(4).L32(25) + // A StartEnd (not ignored) + .L8(7).L32(0x201_1600).L32(0x201_1700).uleb(4).L32(26) + + // A range end. + .L8(0) + // Some extra data. + .L32(0xffff_ffff); + size.set_const((§ion.here() - &start - 4) as u64); + + let buf = section.get_contents().unwrap(); + let debug_loc = DebugLoc::new(&[], LittleEndian); + let debug_loclists = DebugLocLists::new(&buf, LittleEndian); + let loclists = LocationLists::new(debug_loc, debug_loclists); + let offset = LocationListsOffset((&first - &start) as usize); + let mut locations = loclists + .locations(offset, encoding, 0x0100_0000, debug_addr, debug_addr_base) + .unwrap(); + + // A normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0101_0200, + end: 0x0101_0300, + }, + data: Expression(EndianSlice::new(&[2, 0, 0, 0], LittleEndian)), + })) + ); + + // A base address selection followed by a normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0400, + end: 0x0201_0500, + }, + data: Expression(EndianSlice::new(&[3, 0, 0, 0], LittleEndian)), + })) + ); + + // An empty location range followed by a normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0600, + end: 0x0201_0600, + }, + data: Expression(EndianSlice::new(&[4, 0, 0, 0], LittleEndian)), + })) + ); + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0800, + end: 0x0201_0900, + }, + data: Expression(EndianSlice::new(&[5, 0, 0, 0], LittleEndian)), + })) + ); + + // A normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0a00, + end: 0x0201_0b00, + }, + data: Expression(EndianSlice::new(&[6, 0, 0, 0], LittleEndian)), + })) + ); + + // A normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0c00, + end: 0x0201_0d00, + }, + data: Expression(EndianSlice::new(&[7, 0, 0, 0], LittleEndian)), + })) + ); + + // A location range that starts at 0. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0200_0000, + end: 0x0200_0001, + }, + data: Expression(EndianSlice::new(&[8, 0, 0, 0], LittleEndian)), + })) + ); + + // A location range that ends at -1. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0000_0000, + end: 0xffff_ffff, + }, + data: Expression(EndianSlice::new(&[9, 0, 0, 0], LittleEndian)), + })) + ); + + // A DefaultLocation. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0, + end: u64::max_value(), + }, + data: Expression(EndianSlice::new(&[10, 0, 0, 0], LittleEndian)), + })) + ); + + // A BaseAddressx + OffsetPair + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0301_0100, + end: 0x0301_0200, + }, + data: Expression(EndianSlice::new(&[11, 0, 0, 0], LittleEndian)), + })) + ); + + // A StartxEndx + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0301_0300, + end: 0x0301_0400, + }, + data: Expression(EndianSlice::new(&[12, 0, 0, 0], LittleEndian)), + })) + ); + + // A StartxLength + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0301_0500, + end: 0x0301_0600, + }, + data: Expression(EndianSlice::new(&[13, 0, 0, 0], LittleEndian)), + })) + ); + + // A StartEnd location following the tombstones + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_1600, + end: 0x0201_1700, + }, + data: Expression(EndianSlice::new(&[26, 0, 0, 0], LittleEndian)), + })) + ); + + // A location list end. + assert_eq!(locations.next(), Ok(None)); + + // An offset at the end of buf. + let mut locations = loclists + .locations( + LocationListsOffset(buf.len()), + encoding, + 0x0100_0000, + debug_addr, + debug_addr_base, + ) + .unwrap(); + assert_eq!(locations.next(), Ok(None)); + } + + #[test] + fn test_loclists_64() { + let tombstone = !0u64; + let encoding = Encoding { + format: Format::Dwarf64, + version: 5, + address_size: 8, + }; + + let section = Section::with_endian(Endian::Little) + .L64(0x0300_0000) + .L64(0x0301_0300) + .L64(0x0301_0400) + .L64(0x0301_0500) + .L64(tombstone) + .L64(0x0301_0600); + let buf = section.get_contents().unwrap(); + let debug_addr = &DebugAddr::from(EndianSlice::new(&buf, LittleEndian)); + let debug_addr_base = DebugAddrBase(0); + + let start = Label::new(); + let first = Label::new(); + let size = Label::new(); + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + // Header + .mark(&start) + .L32(0xffff_ffff) + .L64(&size) + .L16(encoding.version) + .L8(encoding.address_size) + .L8(0) + .L32(0) + .mark(&first) + // OffsetPair + .L8(4).uleb(0x10200).uleb(0x10300).uleb(4).L32(2) + // A base address selection followed by an OffsetPair. + .L8(6).L64(0x0200_0000) + .L8(4).uleb(0x10400).uleb(0x10500).uleb(4).L32(3) + // An empty OffsetPair followed by a normal OffsetPair. + .L8(4).uleb(0x10600).uleb(0x10600).uleb(4).L32(4) + .L8(4).uleb(0x10800).uleb(0x10900).uleb(4).L32(5) + // A StartEnd + .L8(7).L64(0x201_0a00).L64(0x201_0b00).uleb(4).L32(6) + // A StartLength + .L8(8).L64(0x201_0c00).uleb(0x100).uleb(4).L32(7) + // An OffsetPair that starts at 0. + .L8(4).uleb(0).uleb(1).uleb(4).L32(8) + // An OffsetPair that ends at -1. + .L8(6).L64(0) + .L8(4).uleb(0).uleb(0xffff_ffff).uleb(4).L32(9) + // A DefaultLocation + .L8(5).uleb(4).L32(10) + // A BaseAddressx + OffsetPair + .L8(1).uleb(0) + .L8(4).uleb(0x10100).uleb(0x10200).uleb(4).L32(11) + // A StartxEndx + .L8(2).uleb(1).uleb(2).uleb(4).L32(12) + // A StartxLength + .L8(3).uleb(3).uleb(0x100).uleb(4).L32(13) + + // Tombstone entries, all of which should be ignored. + // A BaseAddressx that is a tombstone. + .L8(1).uleb(4) + .L8(4).uleb(0x11100).uleb(0x11200).uleb(4).L32(20) + // A BaseAddress that is a tombstone. + .L8(6).L64(tombstone) + .L8(4).uleb(0x11300).uleb(0x11400).uleb(4).L32(21) + // A StartxEndx that is a tombstone. + .L8(2).uleb(4).uleb(5).uleb(4).L32(22) + // A StartxLength that is a tombstone. + .L8(3).uleb(4).uleb(0x100).uleb(4).L32(23) + // A StartEnd that is a tombstone. + .L8(7).L64(tombstone).L64(0x201_1500).uleb(4).L32(24) + // A StartLength that is a tombstone. + .L8(8).L64(tombstone).uleb(0x100).uleb(4).L32(25) + // A StartEnd (not ignored) + .L8(7).L64(0x201_1600).L64(0x201_1700).uleb(4).L32(26) + + // A range end. + .L8(0) + // Some extra data. + .L32(0xffff_ffff); + size.set_const((§ion.here() - &start - 12) as u64); + + let buf = section.get_contents().unwrap(); + let debug_loc = DebugLoc::new(&[], LittleEndian); + let debug_loclists = DebugLocLists::new(&buf, LittleEndian); + let loclists = LocationLists::new(debug_loc, debug_loclists); + let offset = LocationListsOffset((&first - &start) as usize); + let mut locations = loclists + .locations(offset, encoding, 0x0100_0000, debug_addr, debug_addr_base) + .unwrap(); + + // A normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0101_0200, + end: 0x0101_0300, + }, + data: Expression(EndianSlice::new(&[2, 0, 0, 0], LittleEndian)), + })) + ); + + // A base address selection followed by a normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0400, + end: 0x0201_0500, + }, + data: Expression(EndianSlice::new(&[3, 0, 0, 0], LittleEndian)), + })) + ); + + // An empty location range followed by a normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0600, + end: 0x0201_0600, + }, + data: Expression(EndianSlice::new(&[4, 0, 0, 0], LittleEndian)), + })) + ); + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0800, + end: 0x0201_0900, + }, + data: Expression(EndianSlice::new(&[5, 0, 0, 0], LittleEndian)), + })) + ); + + // A normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0a00, + end: 0x0201_0b00, + }, + data: Expression(EndianSlice::new(&[6, 0, 0, 0], LittleEndian)), + })) + ); + + // A normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0c00, + end: 0x0201_0d00, + }, + data: Expression(EndianSlice::new(&[7, 0, 0, 0], LittleEndian)), + })) + ); + + // A location range that starts at 0. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0200_0000, + end: 0x0200_0001, + }, + data: Expression(EndianSlice::new(&[8, 0, 0, 0], LittleEndian)), + })) + ); + + // A location range that ends at -1. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0000_0000, + end: 0xffff_ffff, + }, + data: Expression(EndianSlice::new(&[9, 0, 0, 0], LittleEndian)), + })) + ); + + // A DefaultLocation. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0, + end: u64::max_value(), + }, + data: Expression(EndianSlice::new(&[10, 0, 0, 0], LittleEndian)), + })) + ); + + // A BaseAddressx + OffsetPair + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0301_0100, + end: 0x0301_0200, + }, + data: Expression(EndianSlice::new(&[11, 0, 0, 0], LittleEndian)), + })) + ); + + // A StartxEndx + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0301_0300, + end: 0x0301_0400, + }, + data: Expression(EndianSlice::new(&[12, 0, 0, 0], LittleEndian)), + })) + ); + + // A StartxLength + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0301_0500, + end: 0x0301_0600, + }, + data: Expression(EndianSlice::new(&[13, 0, 0, 0], LittleEndian)), + })) + ); + + // A StartEnd location following the tombstones + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_1600, + end: 0x0201_1700, + }, + data: Expression(EndianSlice::new(&[26, 0, 0, 0], LittleEndian)), + })) + ); + + // A location list end. + assert_eq!(locations.next(), Ok(None)); + + // An offset at the end of buf. + let mut locations = loclists + .locations( + LocationListsOffset(buf.len()), + encoding, + 0x0100_0000, + debug_addr, + debug_addr_base, + ) + .unwrap(); + assert_eq!(locations.next(), Ok(None)); + } + + #[test] + fn test_location_list_32() { + let tombstone = !0u32 - 1; + let start = Label::new(); + let first = Label::new(); + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + // A location before the offset. + .mark(&start) + .L32(0x10000).L32(0x10100).L16(4).L32(1) + .mark(&first) + // A normal location. + .L32(0x10200).L32(0x10300).L16(4).L32(2) + // A base address selection followed by a normal location. + .L32(0xffff_ffff).L32(0x0200_0000) + .L32(0x10400).L32(0x10500).L16(4).L32(3) + // An empty location range followed by a normal location. + .L32(0x10600).L32(0x10600).L16(4).L32(4) + .L32(0x10800).L32(0x10900).L16(4).L32(5) + // A location range that starts at 0. + .L32(0).L32(1).L16(4).L32(6) + // A location range that ends at -1. + .L32(0xffff_ffff).L32(0x0000_0000) + .L32(0).L32(0xffff_ffff).L16(4).L32(7) + // A normal location with tombstone. + .L32(tombstone).L32(tombstone).L16(4).L32(8) + // A base address selection with tombstone followed by a normal location. + .L32(0xffff_ffff).L32(tombstone) + .L32(0x10a00).L32(0x10b00).L16(4).L32(9) + // A location list end. + .L32(0).L32(0) + // Some extra data. + .L32(0); + + let buf = section.get_contents().unwrap(); + let debug_loc = DebugLoc::new(&buf, LittleEndian); + let debug_loclists = DebugLocLists::new(&[], LittleEndian); + let loclists = LocationLists::new(debug_loc, debug_loclists); + let offset = LocationListsOffset((&first - &start) as usize); + let debug_addr = &DebugAddr::from(EndianSlice::new(&[], LittleEndian)); + let debug_addr_base = DebugAddrBase(0); + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let mut locations = loclists + .locations(offset, encoding, 0x0100_0000, debug_addr, debug_addr_base) + .unwrap(); + + // A normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0101_0200, + end: 0x0101_0300, + }, + data: Expression(EndianSlice::new(&[2, 0, 0, 0], LittleEndian)), + })) + ); + + // A base address selection followed by a normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0400, + end: 0x0201_0500, + }, + data: Expression(EndianSlice::new(&[3, 0, 0, 0], LittleEndian)), + })) + ); + + // An empty location range followed by a normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0600, + end: 0x0201_0600, + }, + data: Expression(EndianSlice::new(&[4, 0, 0, 0], LittleEndian)), + })) + ); + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0800, + end: 0x0201_0900, + }, + data: Expression(EndianSlice::new(&[5, 0, 0, 0], LittleEndian)), + })) + ); + + // A location range that starts at 0. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0200_0000, + end: 0x0200_0001, + }, + data: Expression(EndianSlice::new(&[6, 0, 0, 0], LittleEndian)), + })) + ); + + // A location range that ends at -1. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0000_0000, + end: 0xffff_ffff, + }, + data: Expression(EndianSlice::new(&[7, 0, 0, 0], LittleEndian)), + })) + ); + + // A location list end. + assert_eq!(locations.next(), Ok(None)); + + // An offset at the end of buf. + let mut locations = loclists + .locations( + LocationListsOffset(buf.len()), + encoding, + 0x0100_0000, + debug_addr, + debug_addr_base, + ) + .unwrap(); + assert_eq!(locations.next(), Ok(None)); + } + + #[test] + fn test_location_list_64() { + let tombstone = !0u64 - 1; + let start = Label::new(); + let first = Label::new(); + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + // A location before the offset. + .mark(&start) + .L64(0x10000).L64(0x10100).L16(4).L32(1) + .mark(&first) + // A normal location. + .L64(0x10200).L64(0x10300).L16(4).L32(2) + // A base address selection followed by a normal location. + .L64(0xffff_ffff_ffff_ffff).L64(0x0200_0000) + .L64(0x10400).L64(0x10500).L16(4).L32(3) + // An empty location range followed by a normal location. + .L64(0x10600).L64(0x10600).L16(4).L32(4) + .L64(0x10800).L64(0x10900).L16(4).L32(5) + // A location range that starts at 0. + .L64(0).L64(1).L16(4).L32(6) + // A location range that ends at -1. + .L64(0xffff_ffff_ffff_ffff).L64(0x0000_0000) + .L64(0).L64(0xffff_ffff_ffff_ffff).L16(4).L32(7) + // A normal location with tombstone. + .L64(tombstone).L64(tombstone).L16(4).L32(8) + // A base address selection with tombstone followed by a normal location. + .L64(0xffff_ffff_ffff_ffff).L64(tombstone) + .L64(0x10a00).L64(0x10b00).L16(4).L32(9) + // A location list end. + .L64(0).L64(0) + // Some extra data. + .L64(0); + + let buf = section.get_contents().unwrap(); + let debug_loc = DebugLoc::new(&buf, LittleEndian); + let debug_loclists = DebugLocLists::new(&[], LittleEndian); + let loclists = LocationLists::new(debug_loc, debug_loclists); + let offset = LocationListsOffset((&first - &start) as usize); + let debug_addr = &DebugAddr::from(EndianSlice::new(&[], LittleEndian)); + let debug_addr_base = DebugAddrBase(0); + let encoding = Encoding { + format: Format::Dwarf64, + version: 4, + address_size: 8, + }; + let mut locations = loclists + .locations(offset, encoding, 0x0100_0000, debug_addr, debug_addr_base) + .unwrap(); + + // A normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0101_0200, + end: 0x0101_0300, + }, + data: Expression(EndianSlice::new(&[2, 0, 0, 0], LittleEndian)), + })) + ); + + // A base address selection followed by a normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0400, + end: 0x0201_0500, + }, + data: Expression(EndianSlice::new(&[3, 0, 0, 0], LittleEndian)), + })) + ); + + // An empty location range followed by a normal location. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0600, + end: 0x0201_0600, + }, + data: Expression(EndianSlice::new(&[4, 0, 0, 0], LittleEndian)), + })) + ); + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0201_0800, + end: 0x0201_0900, + }, + data: Expression(EndianSlice::new(&[5, 0, 0, 0], LittleEndian)), + })) + ); + + // A location range that starts at 0. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0200_0000, + end: 0x0200_0001, + }, + data: Expression(EndianSlice::new(&[6, 0, 0, 0], LittleEndian)), + })) + ); + + // A location range that ends at -1. + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0, + end: 0xffff_ffff_ffff_ffff, + }, + data: Expression(EndianSlice::new(&[7, 0, 0, 0], LittleEndian)), + })) + ); + + // A location list end. + assert_eq!(locations.next(), Ok(None)); + + // An offset at the end of buf. + let mut locations = loclists + .locations( + LocationListsOffset(buf.len()), + encoding, + 0x0100_0000, + debug_addr, + debug_addr_base, + ) + .unwrap(); + assert_eq!(locations.next(), Ok(None)); + } + + #[test] + fn test_locations_invalid() { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + // An invalid location range. + .L32(0x20000).L32(0x10000).L16(4).L32(1) + // An invalid range after wrapping. + .L32(0x20000).L32(0xff01_0000).L16(4).L32(2); + + let buf = section.get_contents().unwrap(); + let debug_loc = DebugLoc::new(&buf, LittleEndian); + let debug_loclists = DebugLocLists::new(&[], LittleEndian); + let loclists = LocationLists::new(debug_loc, debug_loclists); + let debug_addr = &DebugAddr::from(EndianSlice::new(&[], LittleEndian)); + let debug_addr_base = DebugAddrBase(0); + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + + // An invalid location range. + let mut locations = loclists + .locations( + LocationListsOffset(0x0), + encoding, + 0x0100_0000, + debug_addr, + debug_addr_base, + ) + .unwrap(); + assert_eq!(locations.next(), Ok(None)); + + // An invalid location range after wrapping. + let mut locations = loclists + .locations( + LocationListsOffset(14), + encoding, + 0x0100_0000, + debug_addr, + debug_addr_base, + ) + .unwrap(); + assert_eq!(locations.next(), Ok(None)); + + // An invalid offset. + match loclists.locations( + LocationListsOffset(buf.len() + 1), + encoding, + 0x0100_0000, + debug_addr, + debug_addr_base, + ) { + Err(Error::UnexpectedEof(_)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + } + } + + #[test] + fn test_get_offset() { + for format in [Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version: 5, + address_size: 4, + }; + + let zero = Label::new(); + let length = Label::new(); + let start = Label::new(); + let first = Label::new(); + let end = Label::new(); + let mut section = Section::with_endian(Endian::Little) + .mark(&zero) + .initial_length(format, &length, &start) + .D16(encoding.version) + .D8(encoding.address_size) + .D8(0) + .D32(20) + .mark(&first); + for i in 0..20 { + section = section.word(format.word_size(), 1000 + i); + } + section = section.mark(&end); + length.set_const((&end - &start) as u64); + let section = section.get_contents().unwrap(); + + let debug_loc = DebugLoc::from(EndianSlice::new(&[], LittleEndian)); + let debug_loclists = DebugLocLists::from(EndianSlice::new(§ion, LittleEndian)); + let locations = LocationLists::new(debug_loc, debug_loclists); + + let base = DebugLocListsBase((&first - &zero) as usize); + assert_eq!( + locations.get_offset(encoding, base, DebugLocListsIndex(0)), + Ok(LocationListsOffset(base.0 + 1000)) + ); + assert_eq!( + locations.get_offset(encoding, base, DebugLocListsIndex(19)), + Ok(LocationListsOffset(base.0 + 1019)) + ); + } + } + + #[test] + fn test_loclists_gnu_v4_split_dwarf() { + #[rustfmt::skip] + let buf = [ + 0x03, // DW_LLE_startx_length + 0x00, // ULEB encoded b7 + 0x08, 0x00, 0x00, 0x00, // Fixed 4 byte length of 8 + 0x03, 0x00, // Fixed two byte length of the location + 0x11, 0x00, // DW_OP_constu 0 + 0x9f, // DW_OP_stack_value + // Padding data + //0x99, 0x99, 0x99, 0x99 + ]; + let data_buf = [0x11, 0x00, 0x9f]; + let expected_data = EndianSlice::new(&data_buf, LittleEndian); + let debug_loc = DebugLoc::new(&buf, LittleEndian); + let debug_loclists = DebugLocLists::new(&[], LittleEndian); + let loclists = LocationLists::new(debug_loc, debug_loclists); + let debug_addr = + &DebugAddr::from(EndianSlice::new(&[0x01, 0x02, 0x03, 0x04], LittleEndian)); + let debug_addr_base = DebugAddrBase(0); + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + + // An invalid location range. + let mut locations = loclists + .locations_dwo( + LocationListsOffset(0x0), + encoding, + 0, + debug_addr, + debug_addr_base, + ) + .unwrap(); + assert_eq!( + locations.next(), + Ok(Some(LocationListEntry { + range: Range { + begin: 0x0403_0201, + end: 0x0403_0209 + }, + data: Expression(expected_data), + })) + ); + } +} diff --git a/third_party/rust/gimli/src/read/lookup.rs b/third_party/rust/gimli/src/read/lookup.rs new file mode 100644 index 000000000000..1d082f24f495 --- /dev/null +++ b/third_party/rust/gimli/src/read/lookup.rs @@ -0,0 +1,202 @@ +use core::marker::PhantomData; + +use crate::common::{DebugInfoOffset, Format}; +use crate::read::{parse_debug_info_offset, Error, Reader, ReaderOffset, Result, UnitOffset}; + +// The various "Accelerated Access" sections (DWARF standard v4 Section 6.1) all have +// similar structures. They consist of a header with metadata and an offset into the +// .debug_info section for the entire compilation unit, and a series +// of following entries that list addresses (for .debug_aranges) or names +// (for .debug_pubnames and .debug_pubtypes) that are covered. +// +// Because these three tables all have similar structures, we abstract out some of +// the parsing mechanics. + +pub trait LookupParser { + /// The type of the produced header. + type Header; + /// The type of the produced entry. + type Entry; + + /// Parse a header from `input`. Returns a tuple of `input` sliced to contain just the entries + /// corresponding to this header (without the header itself), and the parsed representation of + /// the header itself. + fn parse_header(input: &mut R) -> Result<(R, Self::Header)>; + + /// Parse a single entry from `input`. Returns either a parsed representation of the entry + /// or None if `input` is exhausted. + fn parse_entry(input: &mut R, header: &Self::Header) -> Result>; +} + +#[derive(Clone, Debug)] +pub struct DebugLookup +where + R: Reader, + Parser: LookupParser, +{ + input_buffer: R, + phantom: PhantomData, +} + +impl From for DebugLookup +where + R: Reader, + Parser: LookupParser, +{ + fn from(input_buffer: R) -> Self { + DebugLookup { + input_buffer, + phantom: PhantomData, + } + } +} + +impl DebugLookup +where + R: Reader, + Parser: LookupParser, +{ + pub fn items(&self) -> LookupEntryIter { + LookupEntryIter { + current_set: None, + remaining_input: self.input_buffer.clone(), + } + } + + pub fn reader(&self) -> &R { + &self.input_buffer + } +} + +#[derive(Clone, Debug)] +pub struct LookupEntryIter +where + R: Reader, + Parser: LookupParser, +{ + current_set: Option<(R, Parser::Header)>, // Only none at the very beginning and end. + remaining_input: R, +} + +impl LookupEntryIter +where + R: Reader, + Parser: LookupParser, +{ + /// Advance the iterator and return the next entry. + /// + /// Returns the newly parsed entry as `Ok(Some(Parser::Entry))`. Returns + /// `Ok(None)` when iteration is complete and all entries have already been + /// parsed and yielded. If an error occurs while parsing the next entry, + /// then this error is returned as `Err(e)`, and all subsequent calls return + /// `Ok(None)`. + /// + /// Can be [used with `FallibleIterator`](./index.html#using-with-fallibleiterator). + pub fn next(&mut self) -> Result> { + loop { + if let Some((ref mut input, ref header)) = self.current_set { + if !input.is_empty() { + match Parser::parse_entry(input, header) { + Ok(Some(entry)) => return Ok(Some(entry)), + Ok(None) => {} + Err(e) => { + input.empty(); + self.remaining_input.empty(); + return Err(e); + } + } + } + } + if self.remaining_input.is_empty() { + self.current_set = None; + return Ok(None); + } + match Parser::parse_header(&mut self.remaining_input) { + Ok(set) => { + self.current_set = Some(set); + } + Err(e) => { + self.current_set = None; + self.remaining_input.empty(); + return Err(e); + } + } + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PubStuffHeader { + format: Format, + length: T, + version: u16, + unit_offset: DebugInfoOffset, + unit_length: T, +} + +pub trait PubStuffEntry { + fn new( + die_offset: UnitOffset, + name: R, + unit_header_offset: DebugInfoOffset, + ) -> Self; +} + +#[derive(Clone, Debug)] +pub struct PubStuffParser +where + R: Reader, + Entry: PubStuffEntry, +{ + // This struct is never instantiated. + phantom: PhantomData<(R, Entry)>, +} + +impl LookupParser for PubStuffParser +where + R: Reader, + Entry: PubStuffEntry, +{ + type Header = PubStuffHeader; + type Entry = Entry; + + /// Parse an pubthings set header. Returns a tuple of the + /// pubthings to be parsed for this set, and the newly created PubThingHeader struct. + fn parse_header(input: &mut R) -> Result<(R, Self::Header)> { + let (length, format) = input.read_initial_length()?; + let mut rest = input.split(length)?; + + let version = rest.read_u16()?; + if version != 2 { + return Err(Error::UnknownVersion(u64::from(version))); + } + + let unit_offset = parse_debug_info_offset(&mut rest, format)?; + let unit_length = rest.read_length(format)?; + + let header = PubStuffHeader { + format, + length, + version, + unit_offset, + unit_length, + }; + Ok((rest, header)) + } + + /// Parse a single pubthing. Return `None` for the null pubthing, `Some` for an actual pubthing. + fn parse_entry(input: &mut R, header: &Self::Header) -> Result> { + let offset = input.read_offset(header.format)?; + if offset.into_u64() == 0 { + input.empty(); + Ok(None) + } else { + let name = input.read_null_terminated_slice()?; + Ok(Some(Self::Entry::new( + UnitOffset(offset), + name, + header.unit_offset, + ))) + } + } +} diff --git a/third_party/rust/gimli/src/read/mod.rs b/third_party/rust/gimli/src/read/mod.rs new file mode 100644 index 000000000000..42542cc32a0e --- /dev/null +++ b/third_party/rust/gimli/src/read/mod.rs @@ -0,0 +1,838 @@ +//! Read DWARF debugging information. +//! +//! * [Example Usage](#example-usage) +//! * [API Structure](#api-structure) +//! * [Using with `FallibleIterator`](#using-with-fallibleiterator) +//! +//! ## Example Usage +//! +//! Print out all of the functions in the debuggee program: +//! +//! ```rust,no_run +//! # fn example() -> Result<(), gimli::Error> { +//! # type R = gimli::EndianSlice<'static, gimli::LittleEndian>; +//! # let get_file_section_reader = |name| -> Result { unimplemented!() }; +//! # let get_sup_file_section_reader = |name| -> Result { unimplemented!() }; +//! // Read the DWARF sections with whatever object loader you're using. +//! // These closures should return a `Reader` instance (e.g. `EndianSlice`). +//! let loader = |section: gimli::SectionId| { get_file_section_reader(section.name()) }; +//! let sup_loader = |section: gimli::SectionId| { get_sup_file_section_reader(section.name()) }; +//! let mut dwarf = gimli::Dwarf::load(loader)?; +//! dwarf.load_sup(sup_loader)?; +//! +//! // Iterate over all compilation units. +//! let mut iter = dwarf.units(); +//! while let Some(header) = iter.next()? { +//! // Parse the abbreviations and other information for this compilation unit. +//! let unit = dwarf.unit(header)?; +//! +//! // Iterate over all of this compilation unit's entries. +//! let mut entries = unit.entries(); +//! while let Some((_, entry)) = entries.next_dfs()? { +//! // If we find an entry for a function, print it. +//! if entry.tag() == gimli::DW_TAG_subprogram { +//! println!("Found a function: {:?}", entry); +//! } +//! } +//! } +//! # unreachable!() +//! # } +//! ``` +//! +//! Full example programs: +//! +//! * [A simple parser](https://github.com/gimli-rs/gimli/blob/master/crates/examples/src/bin/simple.rs) +//! +//! * [A `dwarfdump` +//! clone](https://github.com/gimli-rs/gimli/blob/master/crates/examples/src/bin/dwarfdump.rs) +//! +//! * [An `addr2line` clone](https://github.com/gimli-rs/addr2line) +//! +//! * [`ddbug`](https://github.com/gimli-rs/ddbug), a utility giving insight into +//! code generation by making debugging information readable +//! +//! * [`dwprod`](https://github.com/fitzgen/dwprod), a tiny utility to list the +//! compilers used to create each compilation unit within a shared library or +//! executable (via `DW_AT_producer`) +//! +//! * [`dwarf-validate`](https://github.com/gimli-rs/gimli/blob/master/crates/examples/src/bin/dwarf-validate.rs), +//! a program to validate the integrity of some DWARF and its references +//! between sections and compilation units. +//! +//! ## API Structure +//! +//! * Basic familiarity with DWARF is assumed. +//! +//! * The [`Dwarf`](./struct.Dwarf.html) type contains the commonly used DWARF +//! sections. It has methods that simplify access to debugging data that spans +//! multiple sections. Use of this type is optional, but recommended. +//! +//! * The [`DwarfPackage`](./struct.Dwarf.html) type contains the DWARF +//! package (DWP) sections. It has methods to find a DWARF object (DWO) +//! within the package. +//! +//! * Each section gets its own type. Consider these types the entry points to +//! the library: +//! +//! * [`DebugAbbrev`](./struct.DebugAbbrev.html): The `.debug_abbrev` section. +//! +//! * [`DebugAddr`](./struct.DebugAddr.html): The `.debug_addr` section. +//! +//! * [`DebugAranges`](./struct.DebugAranges.html): The `.debug_aranges` +//! section. +//! +//! * [`DebugFrame`](./struct.DebugFrame.html): The `.debug_frame` section. +//! +//! * [`DebugInfo`](./struct.DebugInfo.html): The `.debug_info` section. +//! +//! * [`DebugLine`](./struct.DebugLine.html): The `.debug_line` section. +//! +//! * [`DebugLineStr`](./struct.DebugLineStr.html): The `.debug_line_str` section. +//! +//! * [`DebugLoc`](./struct.DebugLoc.html): The `.debug_loc` section. +//! +//! * [`DebugLocLists`](./struct.DebugLocLists.html): The `.debug_loclists` section. +//! +//! * [`DebugPubNames`](./struct.DebugPubNames.html): The `.debug_pubnames` +//! section. +//! +//! * [`DebugPubTypes`](./struct.DebugPubTypes.html): The `.debug_pubtypes` +//! section. +//! +//! * [`DebugRanges`](./struct.DebugRanges.html): The `.debug_ranges` section. +//! +//! * [`DebugRngLists`](./struct.DebugRngLists.html): The `.debug_rnglists` section. +//! +//! * [`DebugStr`](./struct.DebugStr.html): The `.debug_str` section. +//! +//! * [`DebugStrOffsets`](./struct.DebugStrOffsets.html): The `.debug_str_offsets` section. +//! +//! * [`DebugTypes`](./struct.DebugTypes.html): The `.debug_types` section. +//! +//! * [`DebugCuIndex`](./struct.DebugCuIndex.html): The `.debug_cu_index` section. +//! +//! * [`DebugTuIndex`](./struct.DebugTuIndex.html): The `.debug_tu_index` section. +//! +//! * [`EhFrame`](./struct.EhFrame.html): The `.eh_frame` section. +//! +//! * [`EhFrameHdr`](./struct.EhFrameHdr.html): The `.eh_frame_hdr` section. +//! +//! * Each section type exposes methods for accessing the debugging data encoded +//! in that section. For example, the [`DebugInfo`](./struct.DebugInfo.html) +//! struct has the [`units`](./struct.DebugInfo.html#method.units) method for +//! iterating over the compilation units defined within it. +//! +//! * Offsets into a section are strongly typed: an offset into `.debug_info` is +//! the [`DebugInfoOffset`](./struct.DebugInfoOffset.html) type. It cannot be +//! used to index into the [`DebugLine`](./struct.DebugLine.html) type because +//! `DebugLine` represents the `.debug_line` section. There are similar types +//! for offsets relative to a compilation unit rather than a section. +//! +//! ## Using with `FallibleIterator` +//! +//! The standard library's `Iterator` trait and related APIs do not play well +//! with iterators where the `next` operation is fallible. One can make the +//! `Iterator`'s associated `Item` type be a `Result`, however the +//! provided methods cannot gracefully handle the case when an `Err` is +//! returned. +//! +//! This situation led to the +//! [`fallible-iterator`](https://crates.io/crates/fallible-iterator) crate's +//! existence. You can read more of the rationale for its existence in its +//! docs. The crate provides the helpers you have come to expect (eg `map`, +//! `filter`, etc) for iterators that can fail. +//! +//! `gimli`'s many lazy parsing iterators are a perfect match for the +//! `fallible-iterator` crate's `FallibleIterator` trait because parsing is not +//! done eagerly. Parse errors later in the input might only be discovered after +//! having iterated through many items. +//! +//! To use `gimli` iterators with `FallibleIterator`, import the crate and trait +//! into your code: +//! +//! ``` +//! # #[cfg(feature = "fallible-iterator")] +//! # fn foo() { +//! // Use the `FallibleIterator` trait so its methods are in scope! +//! use fallible_iterator::FallibleIterator; +//! use gimli::{DebugAranges, EndianSlice, LittleEndian}; +//! +//! fn find_sum_of_address_range_lengths(aranges: DebugAranges>) +//! -> gimli::Result +//! { +//! // `DebugAranges::headers` returns a `FallibleIterator`! +//! aranges.headers() +//! // `flat_map` is provided by `FallibleIterator`! +//! .flat_map(|header| Ok(header.entries())) +//! // `map` is provided by `FallibleIterator`! +//! .map(|arange| Ok(arange.length())) +//! // `fold` is provided by `FallibleIterator`! +//! .fold(0, |sum, len| Ok(sum + len)) +//! } +//! # } +//! # fn main() {} +//! ``` + +use core::fmt::{self, Debug}; +use core::result; +#[cfg(feature = "std")] +use std::{error, io}; + +use crate::common::{Register, SectionId}; +use crate::constants; + +mod util; +pub use util::*; + +mod addr; +pub use self::addr::*; + +mod cfi; +pub use self::cfi::*; + +#[cfg(feature = "read")] +mod dwarf; +#[cfg(feature = "read")] +pub use self::dwarf::*; + +mod endian_slice; +pub use self::endian_slice::*; + +#[cfg(feature = "endian-reader")] +mod endian_reader; +#[cfg(feature = "endian-reader")] +pub use self::endian_reader::*; + +mod reader; +pub use self::reader::*; + +mod relocate; +pub use self::relocate::*; + +#[cfg(feature = "read")] +mod abbrev; +#[cfg(feature = "read")] +pub use self::abbrev::*; + +mod aranges; +pub use self::aranges::*; + +mod index; +pub use self::index::*; + +#[cfg(feature = "read")] +mod line; +#[cfg(feature = "read")] +pub use self::line::*; + +mod lists; + +mod loclists; +pub use self::loclists::*; + +#[cfg(feature = "read")] +mod lookup; + +mod op; +pub use self::op::*; + +#[cfg(feature = "read")] +mod pubnames; +#[cfg(feature = "read")] +pub use self::pubnames::*; + +#[cfg(feature = "read")] +mod pubtypes; +#[cfg(feature = "read")] +pub use self::pubtypes::*; + +mod rnglists; +pub use self::rnglists::*; + +mod str; +pub use self::str::*; + +/// An offset into the current compilation or type unit. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)] +pub struct UnitOffset(pub T); + +#[cfg(feature = "read")] +mod unit; +#[cfg(feature = "read")] +pub use self::unit::*; + +mod value; +pub use self::value::*; + +/// Indicates that storage should be allocated on heap. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct StoreOnHeap; + +/// `EndianBuf` has been renamed to `EndianSlice`. For ease of upgrading across +/// `gimli` versions, we export this type alias. +#[deprecated(note = "EndianBuf has been renamed to EndianSlice, use that instead.")] +pub type EndianBuf<'input, Endian> = EndianSlice<'input, Endian>; + +/// An error that occurred when parsing. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Error { + /// An I/O error occurred while reading. + Io, + /// Found a PC relative pointer, but the section base is undefined. + PcRelativePointerButSectionBaseIsUndefined, + /// Found a `.text` relative pointer, but the `.text` base is undefined. + TextRelativePointerButTextBaseIsUndefined, + /// Found a data relative pointer, but the data base is undefined. + DataRelativePointerButDataBaseIsUndefined, + /// Found a function relative pointer in a context that does not have a + /// function base. + FuncRelativePointerInBadContext, + /// Cannot parse a pointer with a `DW_EH_PE_omit` encoding. + CannotParseOmitPointerEncoding, + /// An error parsing an unsigned LEB128 value. + BadUnsignedLeb128, + /// An error parsing a signed LEB128 value. + BadSignedLeb128, + /// An abbreviation declared that its tag is zero, but zero is reserved for + /// null records. + AbbreviationTagZero, + /// An attribute specification declared that its form is zero, but zero is + /// reserved for null records. + AttributeFormZero, + /// The abbreviation's has-children byte was not one of + /// `DW_CHILDREN_{yes,no}`. + BadHasChildren, + /// The specified length is impossible. + BadLength, + /// Found an unknown `DW_FORM_*` type. + UnknownForm(constants::DwForm), + /// Expected a zero, found something else. + ExpectedZero, + /// Found an abbreviation code that has already been used. + DuplicateAbbreviationCode, + /// Found a duplicate arange. + DuplicateArange, + /// Found an unknown reserved length value. + UnknownReservedLength, + /// Found an unknown DWARF version. + UnknownVersion(u64), + /// Found a record with an unknown abbreviation code. + UnknownAbbreviation(u64), + /// Hit the end of input before it was expected. + UnexpectedEof(ReaderOffsetId), + /// Read a null entry before it was expected. + UnexpectedNull, + /// Found an unknown standard opcode. + UnknownStandardOpcode(constants::DwLns), + /// Found an unknown extended opcode. + UnknownExtendedOpcode(constants::DwLne), + /// Found an unknown location-lists format. + UnknownLocListsEntry(constants::DwLle), + /// Found an unknown range-lists format. + UnknownRangeListsEntry(constants::DwRle), + /// The specified address size is not supported. + UnsupportedAddressSize(u8), + /// The specified offset size is not supported. + UnsupportedOffsetSize(u8), + /// The specified field size is not supported. + UnsupportedFieldSize(u8), + /// The minimum instruction length must not be zero. + MinimumInstructionLengthZero, + /// The maximum operations per instruction must not be zero. + MaximumOperationsPerInstructionZero, + /// The line range must not be zero. + LineRangeZero, + /// The opcode base must not be zero. + OpcodeBaseZero, + /// Found an invalid UTF-8 string. + BadUtf8, + /// Expected to find the CIE ID, but found something else. + NotCieId, + /// Expected to find a pointer to a CIE, but found the CIE ID instead. + NotCiePointer, + /// Expected to find a pointer to an FDE, but found a CIE instead. + NotFdePointer, + /// Invalid branch target for a DW_OP_bra or DW_OP_skip. + BadBranchTarget(u64), + /// DW_OP_push_object_address used but no address passed in. + InvalidPushObjectAddress, + /// Not enough items on the stack when evaluating an expression. + NotEnoughStackItems, + /// Too many iterations to compute the expression. + TooManyIterations, + /// An unrecognized operation was found while parsing a DWARF + /// expression. + InvalidExpression(constants::DwOp), + /// An unsupported operation was found while evaluating a DWARF expression. + UnsupportedEvaluation, + /// The expression had a piece followed by an expression + /// terminator without a piece. + InvalidPiece, + /// An expression-terminating operation was followed by something + /// other than the end of the expression or a piece operation. + InvalidExpressionTerminator(u64), + /// Division or modulus by zero when evaluating an expression. + DivisionByZero, + /// An expression operation used mismatching types. + TypeMismatch, + /// An expression operation required an integral type but saw a + /// floating point type. + IntegralTypeRequired, + /// An expression operation used types that are not supported. + UnsupportedTypeOperation, + /// The shift value in an expression must be a non-negative integer. + InvalidShiftExpression, + /// An unknown DW_CFA_* instruction. + UnknownCallFrameInstruction(constants::DwCfa), + /// The end of an address range was before the beginning. + InvalidAddressRange, + /// Encountered a call frame instruction in a context in which it is not + /// valid. + CfiInstructionInInvalidContext, + /// When evaluating call frame instructions, found a `DW_CFA_restore_state` + /// stack pop instruction, but the stack was empty, and had nothing to pop. + PopWithEmptyStack, + /// Do not have unwind info for the given address. + NoUnwindInfoForAddress, + /// An offset value was larger than the maximum supported value. + UnsupportedOffset, + /// The given pointer encoding is either unknown or invalid. + UnknownPointerEncoding(constants::DwEhPe), + /// Did not find an entry at the given offset. + NoEntryAtGivenOffset, + /// The given offset is out of bounds. + OffsetOutOfBounds, + /// Found an unknown CFI augmentation. + UnknownAugmentation, + /// We do not support the given pointer encoding yet. + UnsupportedPointerEncoding, + /// Registers larger than `u16` are not supported. + UnsupportedRegister(u64), + /// The CFI program defined more register rules than we have storage for. + TooManyRegisterRules, + /// Attempted to push onto the CFI or evaluation stack, but it was already + /// at full capacity. + StackFull, + /// The `.eh_frame_hdr` binary search table claims to be variable-length encoded, + /// which makes binary search impossible. + VariableLengthSearchTable, + /// The `DW_UT_*` value for this unit is not supported yet. + UnsupportedUnitType, + /// Ranges using AddressIndex are not supported yet. + UnsupportedAddressIndex, + /// Nonzero segment selector sizes aren't supported yet. + UnsupportedSegmentSize, + /// A compilation unit or type unit is missing its top level DIE. + MissingUnitDie, + /// A DIE attribute used an unsupported form. + UnsupportedAttributeForm, + /// Missing DW_LNCT_path in file entry format. + MissingFileEntryFormatPath, + /// Expected an attribute value to be a string form. + ExpectedStringAttributeValue, + /// `DW_FORM_implicit_const` used in an invalid context. + InvalidImplicitConst, + /// Invalid section count in `.dwp` index. + InvalidIndexSectionCount, + /// Invalid slot count in `.dwp` index. + InvalidIndexSlotCount, + /// Invalid hash row in `.dwp` index. + InvalidIndexRow, + /// Unknown section type in `.dwp` index. + UnknownIndexSection(constants::DwSect), + /// Unknown section type in version 2 `.dwp` index. + UnknownIndexSectionV2(constants::DwSectV2), +} + +impl fmt::Display for Error { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> ::core::result::Result<(), fmt::Error> { + write!(f, "{}", self.description()) + } +} + +impl Error { + /// A short description of the error. + pub fn description(&self) -> &str { + match *self { + Error::Io => "An I/O error occurred while reading.", + Error::PcRelativePointerButSectionBaseIsUndefined => { + "Found a PC relative pointer, but the section base is undefined." + } + Error::TextRelativePointerButTextBaseIsUndefined => { + "Found a `.text` relative pointer, but the `.text` base is undefined." + } + Error::DataRelativePointerButDataBaseIsUndefined => { + "Found a data relative pointer, but the data base is undefined." + } + Error::FuncRelativePointerInBadContext => { + "Found a function relative pointer in a context that does not have a function base." + } + Error::CannotParseOmitPointerEncoding => { + "Cannot parse a pointer with a `DW_EH_PE_omit` encoding." + } + Error::BadUnsignedLeb128 => "An error parsing an unsigned LEB128 value", + Error::BadSignedLeb128 => "An error parsing a signed LEB128 value", + Error::AbbreviationTagZero => { + "An abbreviation declared that its tag is zero, + but zero is reserved for null records" + } + Error::AttributeFormZero => { + "An attribute specification declared that its form is zero, + but zero is reserved for null records" + } + Error::BadHasChildren => { + "The abbreviation's has-children byte was not one of + `DW_CHILDREN_{yes,no}`" + } + Error::BadLength => "The specified length is impossible", + Error::UnknownForm(_) => "Found an unknown `DW_FORM_*` type", + Error::ExpectedZero => "Expected a zero, found something else", + Error::DuplicateAbbreviationCode => { + "Found an abbreviation code that has already been used" + } + Error::DuplicateArange => "Found a duplicate arange", + Error::UnknownReservedLength => "Found an unknown reserved length value", + Error::UnknownVersion(_) => "Found an unknown DWARF version", + Error::UnknownAbbreviation(_) => "Found a record with an unknown abbreviation code", + Error::UnexpectedEof(_) => "Hit the end of input before it was expected", + Error::UnexpectedNull => "Read a null entry before it was expected.", + Error::UnknownStandardOpcode(_) => "Found an unknown standard opcode", + Error::UnknownExtendedOpcode(_) => "Found an unknown extended opcode", + Error::UnknownLocListsEntry(_) => "Found an unknown location lists entry", + Error::UnknownRangeListsEntry(_) => "Found an unknown range lists entry", + Error::UnsupportedAddressSize(_) => "The specified address size is not supported", + Error::UnsupportedOffsetSize(_) => "The specified offset size is not supported", + Error::UnsupportedFieldSize(_) => "The specified field size is not supported", + Error::MinimumInstructionLengthZero => { + "The minimum instruction length must not be zero." + } + Error::MaximumOperationsPerInstructionZero => { + "The maximum operations per instruction must not be zero." + } + Error::LineRangeZero => "The line range must not be zero.", + Error::OpcodeBaseZero => "The opcode base must not be zero.", + Error::BadUtf8 => "Found an invalid UTF-8 string.", + Error::NotCieId => "Expected to find the CIE ID, but found something else.", + Error::NotCiePointer => "Expected to find a CIE pointer, but found the CIE ID instead.", + Error::NotFdePointer => { + "Expected to find an FDE pointer, but found a CIE pointer instead." + } + Error::BadBranchTarget(_) => "Invalid branch target in DWARF expression", + Error::InvalidPushObjectAddress => { + "DW_OP_push_object_address used but no object address given" + } + Error::NotEnoughStackItems => "Not enough items on stack when evaluating expression", + Error::TooManyIterations => "Too many iterations to evaluate DWARF expression", + Error::InvalidExpression(_) => "Invalid opcode in DWARF expression", + Error::UnsupportedEvaluation => "Unsupported operation when evaluating expression", + Error::InvalidPiece => { + "DWARF expression has piece followed by non-piece expression at end" + } + Error::InvalidExpressionTerminator(_) => "Expected DW_OP_piece or DW_OP_bit_piece", + Error::DivisionByZero => "Division or modulus by zero when evaluating expression", + Error::TypeMismatch => "Type mismatch when evaluating expression", + Error::IntegralTypeRequired => "Integral type expected when evaluating expression", + Error::UnsupportedTypeOperation => { + "An expression operation used types that are not supported" + } + Error::InvalidShiftExpression => { + "The shift value in an expression must be a non-negative integer." + } + Error::UnknownCallFrameInstruction(_) => "An unknown DW_CFA_* instructiion", + Error::InvalidAddressRange => { + "The end of an address range must not be before the beginning." + } + Error::CfiInstructionInInvalidContext => { + "Encountered a call frame instruction in a context in which it is not valid." + } + Error::PopWithEmptyStack => { + "When evaluating call frame instructions, found a `DW_CFA_restore_state` stack pop \ + instruction, but the stack was empty, and had nothing to pop." + } + Error::NoUnwindInfoForAddress => "Do not have unwind info for the given address.", + Error::UnsupportedOffset => { + "An offset value was larger than the maximum supported value." + } + Error::UnknownPointerEncoding(_) => { + "The given pointer encoding is either unknown or invalid." + } + Error::NoEntryAtGivenOffset => "Did not find an entry at the given offset.", + Error::OffsetOutOfBounds => "The given offset is out of bounds.", + Error::UnknownAugmentation => "Found an unknown CFI augmentation.", + Error::UnsupportedPointerEncoding => { + "We do not support the given pointer encoding yet." + } + Error::UnsupportedRegister(_) => "Registers larger than `u16` are not supported.", + Error::TooManyRegisterRules => { + "The CFI program defined more register rules than we have storage for." + } + Error::StackFull => { + "Attempted to push onto the CFI stack, but it was already at full capacity." + } + Error::VariableLengthSearchTable => { + "The `.eh_frame_hdr` binary search table claims to be variable-length encoded, \ + which makes binary search impossible." + } + Error::UnsupportedUnitType => "The `DW_UT_*` value for this unit is not supported yet", + Error::UnsupportedAddressIndex => "Ranges involving AddressIndex are not supported yet", + Error::UnsupportedSegmentSize => "Nonzero segment size not supported yet", + Error::MissingUnitDie => { + "A compilation unit or type unit is missing its top level DIE." + } + Error::UnsupportedAttributeForm => "A DIE attribute used an unsupported form.", + Error::MissingFileEntryFormatPath => "Missing DW_LNCT_path in file entry format.", + Error::ExpectedStringAttributeValue => { + "Expected an attribute value to be a string form." + } + Error::InvalidImplicitConst => "DW_FORM_implicit_const used in an invalid context.", + Error::InvalidIndexSectionCount => "Invalid section count in `.dwp` index.", + Error::InvalidIndexSlotCount => "Invalid slot count in `.dwp` index.", + Error::InvalidIndexRow => "Invalid hash row in `.dwp` index.", + Error::UnknownIndexSection(_) => "Unknown section type in `.dwp` index.", + Error::UnknownIndexSectionV2(_) => "Unknown section type in version 2 `.dwp` index.", + } + } +} + +#[cfg(feature = "std")] +impl error::Error for Error {} + +#[cfg(feature = "std")] +impl From for Error { + fn from(_: io::Error) -> Self { + Error::Io + } +} + +/// The result of a parse. +pub type Result = result::Result; + +/// A convenience trait for loading DWARF sections from object files. To be +/// used like: +/// +/// ``` +/// use gimli::{DebugInfo, EndianSlice, LittleEndian, Reader, Section}; +/// +/// let buf = [0x00, 0x01, 0x02, 0x03]; +/// let reader = EndianSlice::new(&buf, LittleEndian); +/// let loader = |name| -> Result<_, ()> { Ok(reader) }; +/// +/// let debug_info: DebugInfo<_> = Section::load(loader).unwrap(); +/// ``` +pub trait Section: From { + /// Returns the section id for this type. + fn id() -> SectionId; + + /// Returns the ELF section name for this type. + fn section_name() -> &'static str { + Self::id().name() + } + + /// Returns the ELF section name (if any) for this type when used in a dwo + /// file. + fn dwo_section_name() -> Option<&'static str> { + Self::id().dwo_name() + } + + /// Returns the XCOFF section name (if any) for this type when used in a XCOFF + /// file. + fn xcoff_section_name() -> Option<&'static str> { + Self::id().xcoff_name() + } + + /// Try to load the section using the given loader function. + fn load(f: F) -> core::result::Result + where + F: FnOnce(SectionId) -> core::result::Result, + { + f(Self::id()).map(From::from) + } + + /// Returns the `Reader` for this section. + fn reader(&self) -> &R + where + R: Reader; + + /// Returns the subrange of the section that is the contribution of + /// a unit in a `.dwp` file. + fn dwp_range(&self, offset: u32, size: u32) -> Result + where + R: Reader, + { + let mut data = self.reader().clone(); + data.skip(R::Offset::from_u32(offset))?; + data.truncate(R::Offset::from_u32(size))?; + Ok(data.into()) + } + + /// Returns the `Reader` for this section. + fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option<(SectionId, R::Offset)> + where + R: Reader, + { + self.reader() + .lookup_offset_id(id) + .map(|offset| (Self::id(), offset)) + } +} + +impl Register { + pub(crate) fn from_u64(x: u64) -> Result { + let y = x as u16; + if u64::from(y) == x { + Ok(Register(y)) + } else { + Err(Error::UnsupportedRegister(x)) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::common::Format; + use crate::endianity::LittleEndian; + use test_assembler::{Endian, Section}; + + #[test] + fn test_parse_initial_length_32_ok() { + let section = Section::with_endian(Endian::Little).L32(0x7856_3412); + let buf = section.get_contents().unwrap(); + + let input = &mut EndianSlice::new(&buf, LittleEndian); + match input.read_initial_length() { + Ok((length, format)) => { + assert_eq!(input.len(), 0); + assert_eq!(format, Format::Dwarf32); + assert_eq!(0x7856_3412, length); + } + otherwise => panic!("Unexpected result: {:?}", otherwise), + } + } + + #[test] + fn test_parse_initial_length_64_ok() { + let section = Section::with_endian(Endian::Little) + // Dwarf_64_INITIAL_UNIT_LENGTH + .L32(0xffff_ffff) + // Actual length + .L64(0xffde_bc9a_7856_3412); + let buf = section.get_contents().unwrap(); + let input = &mut EndianSlice::new(&buf, LittleEndian); + + #[cfg(target_pointer_width = "64")] + match input.read_initial_length() { + Ok((length, format)) => { + assert_eq!(input.len(), 0); + assert_eq!(format, Format::Dwarf64); + assert_eq!(0xffde_bc9a_7856_3412, length); + } + otherwise => panic!("Unexpected result: {:?}", otherwise), + } + + #[cfg(target_pointer_width = "32")] + match input.read_initial_length() { + Err(Error::UnsupportedOffset) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_initial_length_unknown_reserved_value() { + let section = Section::with_endian(Endian::Little).L32(0xffff_fffe); + let buf = section.get_contents().unwrap(); + + let input = &mut EndianSlice::new(&buf, LittleEndian); + match input.read_initial_length() { + Err(Error::UnknownReservedLength) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_initial_length_incomplete() { + let buf = [0xff, 0xff, 0xff]; // Need at least 4 bytes. + + let input = &mut EndianSlice::new(&buf, LittleEndian); + match input.read_initial_length() { + Err(Error::UnexpectedEof(_)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_initial_length_64_incomplete() { + let section = Section::with_endian(Endian::Little) + // Dwarf_64_INITIAL_UNIT_LENGTH + .L32(0xffff_ffff) + // Actual length is not long enough. + .L32(0x7856_3412); + let buf = section.get_contents().unwrap(); + + let input = &mut EndianSlice::new(&buf, LittleEndian); + match input.read_initial_length() { + Err(Error::UnexpectedEof(_)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_offset_32() { + let section = Section::with_endian(Endian::Little).L32(0x0123_4567); + let buf = section.get_contents().unwrap(); + + let input = &mut EndianSlice::new(&buf, LittleEndian); + match input.read_offset(Format::Dwarf32) { + Ok(val) => { + assert_eq!(input.len(), 0); + assert_eq!(val, 0x0123_4567); + } + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_offset_64_small() { + let section = Section::with_endian(Endian::Little).L64(0x0123_4567); + let buf = section.get_contents().unwrap(); + + let input = &mut EndianSlice::new(&buf, LittleEndian); + match input.read_offset(Format::Dwarf64) { + Ok(val) => { + assert_eq!(input.len(), 0); + assert_eq!(val, 0x0123_4567); + } + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_offset_64_large() { + let section = Section::with_endian(Endian::Little).L64(0x0123_4567_89ab_cdef); + let buf = section.get_contents().unwrap(); + + let input = &mut EndianSlice::new(&buf, LittleEndian); + match input.read_offset(Format::Dwarf64) { + Ok(val) => { + assert_eq!(input.len(), 0); + assert_eq!(val, 0x0123_4567_89ab_cdef); + } + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + #[cfg(target_pointer_width = "32")] + fn test_parse_offset_64_large() { + let section = Section::with_endian(Endian::Little).L64(0x0123_4567_89ab_cdef); + let buf = section.get_contents().unwrap(); + + let input = &mut EndianSlice::new(&buf, LittleEndian); + match input.read_offset(Format::Dwarf64) { + Err(Error::UnsupportedOffset) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } +} diff --git a/third_party/rust/gimli/src/read/op.rs b/third_party/rust/gimli/src/read/op.rs new file mode 100644 index 000000000000..2d2ba46c1fe8 --- /dev/null +++ b/third_party/rust/gimli/src/read/op.rs @@ -0,0 +1,4140 @@ +//! Functions for parsing and evaluating DWARF expressions. + +#[cfg(feature = "read")] +use alloc::vec::Vec; +use core::mem; + +use super::util::{ArrayLike, ArrayVec}; +use crate::common::{DebugAddrIndex, DebugInfoOffset, Encoding, Register}; +use crate::constants; +use crate::read::{Error, Reader, ReaderOffset, Result, StoreOnHeap, UnitOffset, Value, ValueType}; + +/// A reference to a DIE, either relative to the current CU or +/// relative to the section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DieReference { + /// A CU-relative reference. + UnitRef(UnitOffset), + /// A section-relative reference. + DebugInfoRef(DebugInfoOffset), +} + +/// A single decoded DWARF expression operation. +/// +/// DWARF expression evaluation is done in two parts: first the raw +/// bytes of the next part of the expression are decoded; and then the +/// decoded operation is evaluated. This approach lets other +/// consumers inspect the DWARF expression without reimplementing the +/// decoding operation. +/// +/// Multiple DWARF opcodes may decode into a single `Operation`. For +/// example, both `DW_OP_deref` and `DW_OP_xderef` are represented +/// using `Operation::Deref`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Operation::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + /// Dereference the topmost value of the stack. + Deref { + /// The DIE of the base type or 0 to indicate the generic type + base_type: UnitOffset, + /// The size of the data to dereference. + size: u8, + /// True if the dereference operation takes an address space + /// argument from the stack; false otherwise. + space: bool, + }, + /// Drop an item from the stack. + Drop, + /// Pick an item from the stack and push it on top of the stack. + /// This operation handles `DW_OP_pick`, `DW_OP_dup`, and + /// `DW_OP_over`. + Pick { + /// The index, from the top of the stack, of the item to copy. + index: u8, + }, + /// Swap the top two stack items. + Swap, + /// Rotate the top three stack items. + Rot, + /// Take the absolute value of the top of the stack. + Abs, + /// Bitwise `and` of the top two values on the stack. + And, + /// Divide the top two values on the stack. + Div, + /// Subtract the top two values on the stack. + Minus, + /// Modulus of the top two values on the stack. + Mod, + /// Multiply the top two values on the stack. + Mul, + /// Negate the top of the stack. + Neg, + /// Bitwise `not` of the top of the stack. + Not, + /// Bitwise `or` of the top two values on the stack. + Or, + /// Add the top two values on the stack. + Plus, + /// Add a constant to the topmost value on the stack. + PlusConstant { + /// The value to add. + value: u64, + }, + /// Logical left shift of the 2nd value on the stack by the number + /// of bits given by the topmost value on the stack. + Shl, + /// Right shift of the 2nd value on the stack by the number of + /// bits given by the topmost value on the stack. + Shr, + /// Arithmetic left shift of the 2nd value on the stack by the + /// number of bits given by the topmost value on the stack. + Shra, + /// Bitwise `xor` of the top two values on the stack. + Xor, + /// Branch to the target location if the top of stack is nonzero. + Bra { + /// The relative offset to the target bytecode. + target: i16, + }, + /// Compare the top two stack values for equality. + Eq, + /// Compare the top two stack values using `>=`. + Ge, + /// Compare the top two stack values using `>`. + Gt, + /// Compare the top two stack values using `<=`. + Le, + /// Compare the top two stack values using `<`. + Lt, + /// Compare the top two stack values using `!=`. + Ne, + /// Unconditional branch to the target location. + Skip { + /// The relative offset to the target bytecode. + target: i16, + }, + /// Push an unsigned constant value on the stack. This handles multiple + /// DWARF opcodes. + UnsignedConstant { + /// The value to push. + value: u64, + }, + /// Push a signed constant value on the stack. This handles multiple + /// DWARF opcodes. + SignedConstant { + /// The value to push. + value: i64, + }, + /// Indicate that this piece's location is in the given register. + /// + /// Completes the piece or expression. + Register { + /// The register number. + register: Register, + }, + /// Find the value of the given register, add the offset, and then + /// push the resulting sum on the stack. + RegisterOffset { + /// The register number. + register: Register, + /// The offset to add. + offset: i64, + /// The DIE of the base type or 0 to indicate the generic type + base_type: UnitOffset, + }, + /// Compute the frame base (using `DW_AT_frame_base`), add the + /// given offset, and then push the resulting sum on the stack. + FrameOffset { + /// The offset to add. + offset: i64, + }, + /// No operation. + Nop, + /// Push the object address on the stack. + PushObjectAddress, + /// Evaluate a DWARF expression as a subroutine. The expression + /// comes from the `DW_AT_location` attribute of the indicated + /// DIE. + Call { + /// The DIE to use. + offset: DieReference, + }, + /// Compute the address of a thread-local variable and push it on + /// the stack. + TLS, + /// Compute the call frame CFA and push it on the stack. + CallFrameCFA, + /// Terminate a piece. + Piece { + /// The size of this piece in bits. + size_in_bits: u64, + /// The bit offset of this piece. If `None`, then this piece + /// was specified using `DW_OP_piece` and should start at the + /// next byte boundary. + bit_offset: Option, + }, + /// The object has no location, but has a known constant value. + /// + /// Represents `DW_OP_implicit_value`. + /// Completes the piece or expression. + ImplicitValue { + /// The implicit value to use. + data: R, + }, + /// The object has no location, but its value is at the top of the stack. + /// + /// Represents `DW_OP_stack_value`. + /// Completes the piece or expression. + StackValue, + /// The object is a pointer to a value which has no actual location, + /// such as an implicit value or a stack value. + /// + /// Represents `DW_OP_implicit_pointer`. + /// Completes the piece or expression. + ImplicitPointer { + /// The `.debug_info` offset of the value that this is an implicit pointer into. + value: DebugInfoOffset, + /// The byte offset into the value that the implicit pointer points to. + byte_offset: i64, + }, + /// Evaluate an expression at the entry to the current subprogram, and push it on the stack. + /// + /// Represents `DW_OP_entry_value`. + EntryValue { + /// The expression to be evaluated. + expression: R, + }, + /// This represents a parameter that was optimized out. + /// + /// The offset points to the definition of the parameter, and is + /// matched to the `DW_TAG_GNU_call_site_parameter` in the caller that also + /// points to the same definition of the parameter. + /// + /// Represents `DW_OP_GNU_parameter_ref`. + ParameterRef { + /// The DIE to use. + offset: UnitOffset, + }, + /// Relocate the address if needed, and push it on the stack. + /// + /// Represents `DW_OP_addr`. + Address { + /// The offset to add. + address: u64, + }, + /// Read the address at the given index in `.debug_addr, relocate the address if needed, + /// and push it on the stack. + /// + /// Represents `DW_OP_addrx`. + AddressIndex { + /// The index of the address in `.debug_addr`. + index: DebugAddrIndex, + }, + /// Read the address at the given index in `.debug_addr, and push it on the stack. + /// Do not relocate the address. + /// + /// Represents `DW_OP_constx`. + ConstantIndex { + /// The index of the address in `.debug_addr`. + index: DebugAddrIndex, + }, + /// Interpret the value bytes as a constant of a given type, and push it on the stack. + /// + /// Represents `DW_OP_const_type`. + TypedLiteral { + /// The DIE of the base type. + base_type: UnitOffset, + /// The value bytes. + value: R, + }, + /// Pop the top stack entry, convert it to a different type, and push it on the stack. + /// + /// Represents `DW_OP_convert`. + Convert { + /// The DIE of the base type. + base_type: UnitOffset, + }, + /// Pop the top stack entry, reinterpret the bits in its value as a different type, + /// and push it on the stack. + /// + /// Represents `DW_OP_reinterpret`. + Reinterpret { + /// The DIE of the base type. + base_type: UnitOffset, + }, + /// The index of a local in the currently executing function. + /// + /// Represents `DW_OP_WASM_location 0x00`. + /// Completes the piece or expression. + WasmLocal { + /// The index of the local. + index: u32, + }, + /// The index of a global. + /// + /// Represents `DW_OP_WASM_location 0x01` or `DW_OP_WASM_location 0x03`. + /// Completes the piece or expression. + WasmGlobal { + /// The index of the global. + index: u32, + }, + /// The index of an item on the operand stack. + /// + /// Represents `DW_OP_WASM_location 0x02`. + /// Completes the piece or expression. + WasmStack { + /// The index of the stack item. 0 is the bottom of the operand stack. + index: u32, + }, +} + +#[derive(Debug)] +enum OperationEvaluationResult { + Piece, + Incomplete, + Complete { location: Location }, + Waiting(EvaluationWaiting, EvaluationResult), +} + +/// A single location of a piece of the result of a DWARF expression. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Location::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + /// The piece is empty. Ordinarily this means the piece has been + /// optimized away. + Empty, + /// The piece is found in a register. + Register { + /// The register number. + register: Register, + }, + /// The piece is found in memory. + Address { + /// The address. + address: u64, + }, + /// The piece has no location but its value is known. + Value { + /// The value. + value: Value, + }, + /// The piece is represented by some constant bytes. + Bytes { + /// The value. + value: R, + }, + /// The piece is a pointer to a value which has no actual location. + ImplicitPointer { + /// The `.debug_info` offset of the value that this is an implicit pointer into. + value: DebugInfoOffset, + /// The byte offset into the value that the implicit pointer points to. + byte_offset: i64, + }, +} + +impl Location +where + R: Reader, + Offset: ReaderOffset, +{ + /// Return true if the piece is empty. + pub fn is_empty(&self) -> bool { + matches!(*self, Location::Empty) + } +} + +/// The description of a single piece of the result of a DWARF +/// expression. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct Piece::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + /// If given, the size of the piece in bits. If `None`, there + /// must be only one piece whose size is all of the object. + pub size_in_bits: Option, + /// If given, the bit offset of the piece within the location. + /// If the location is a `Location::Register` or `Location::Value`, + /// then this offset is from the least significant bit end of + /// the register or value. + /// If the location is a `Location::Address` then the offset uses + /// the bit numbering and direction conventions of the language + /// and target system. + /// + /// If `None`, the piece starts at the location. If the + /// location is a register whose size is larger than the piece, + /// then placement within the register is defined by the ABI. + pub bit_offset: Option, + /// Where this piece is to be found. + pub location: Location, +} + +// A helper function to handle branch offsets. +fn compute_pc(pc: &R, bytecode: &R, offset: i16) -> Result { + let pc_offset = pc.offset_from(bytecode); + let new_pc_offset = pc_offset.wrapping_add(R::Offset::from_i16(offset)); + if new_pc_offset > bytecode.len() { + Err(Error::BadBranchTarget(new_pc_offset.into_u64())) + } else { + let mut new_pc = bytecode.clone(); + new_pc.skip(new_pc_offset)?; + Ok(new_pc) + } +} + +fn generic_type() -> UnitOffset { + UnitOffset(O::from_u64(0).unwrap()) +} + +impl Operation +where + R: Reader, + Offset: ReaderOffset, +{ + /// Parse a single DWARF expression operation. + /// + /// This is useful when examining a DWARF expression for reasons other + /// than direct evaluation. + /// + /// `bytes` points to a the operation to decode. It should point into + /// the same array as `bytecode`, which should be the entire + /// expression. + pub fn parse(bytes: &mut R, encoding: Encoding) -> Result> { + let opcode = bytes.read_u8()?; + let name = constants::DwOp(opcode); + match name { + constants::DW_OP_addr => { + let address = bytes.read_address(encoding.address_size)?; + Ok(Operation::Address { address }) + } + constants::DW_OP_deref => Ok(Operation::Deref { + base_type: generic_type(), + size: encoding.address_size, + space: false, + }), + constants::DW_OP_const1u => { + let value = bytes.read_u8()?; + Ok(Operation::UnsignedConstant { + value: u64::from(value), + }) + } + constants::DW_OP_const1s => { + let value = bytes.read_i8()?; + Ok(Operation::SignedConstant { + value: i64::from(value), + }) + } + constants::DW_OP_const2u => { + let value = bytes.read_u16()?; + Ok(Operation::UnsignedConstant { + value: u64::from(value), + }) + } + constants::DW_OP_const2s => { + let value = bytes.read_i16()?; + Ok(Operation::SignedConstant { + value: i64::from(value), + }) + } + constants::DW_OP_const4u => { + let value = bytes.read_u32()?; + Ok(Operation::UnsignedConstant { + value: u64::from(value), + }) + } + constants::DW_OP_const4s => { + let value = bytes.read_i32()?; + Ok(Operation::SignedConstant { + value: i64::from(value), + }) + } + constants::DW_OP_const8u => { + let value = bytes.read_u64()?; + Ok(Operation::UnsignedConstant { value }) + } + constants::DW_OP_const8s => { + let value = bytes.read_i64()?; + Ok(Operation::SignedConstant { value }) + } + constants::DW_OP_constu => { + let value = bytes.read_uleb128()?; + Ok(Operation::UnsignedConstant { value }) + } + constants::DW_OP_consts => { + let value = bytes.read_sleb128()?; + Ok(Operation::SignedConstant { value }) + } + constants::DW_OP_dup => Ok(Operation::Pick { index: 0 }), + constants::DW_OP_drop => Ok(Operation::Drop), + constants::DW_OP_over => Ok(Operation::Pick { index: 1 }), + constants::DW_OP_pick => { + let value = bytes.read_u8()?; + Ok(Operation::Pick { index: value }) + } + constants::DW_OP_swap => Ok(Operation::Swap), + constants::DW_OP_rot => Ok(Operation::Rot), + constants::DW_OP_xderef => Ok(Operation::Deref { + base_type: generic_type(), + size: encoding.address_size, + space: true, + }), + constants::DW_OP_abs => Ok(Operation::Abs), + constants::DW_OP_and => Ok(Operation::And), + constants::DW_OP_div => Ok(Operation::Div), + constants::DW_OP_minus => Ok(Operation::Minus), + constants::DW_OP_mod => Ok(Operation::Mod), + constants::DW_OP_mul => Ok(Operation::Mul), + constants::DW_OP_neg => Ok(Operation::Neg), + constants::DW_OP_not => Ok(Operation::Not), + constants::DW_OP_or => Ok(Operation::Or), + constants::DW_OP_plus => Ok(Operation::Plus), + constants::DW_OP_plus_uconst => { + let value = bytes.read_uleb128()?; + Ok(Operation::PlusConstant { value }) + } + constants::DW_OP_shl => Ok(Operation::Shl), + constants::DW_OP_shr => Ok(Operation::Shr), + constants::DW_OP_shra => Ok(Operation::Shra), + constants::DW_OP_xor => Ok(Operation::Xor), + constants::DW_OP_bra => { + let target = bytes.read_i16()?; + Ok(Operation::Bra { target }) + } + constants::DW_OP_eq => Ok(Operation::Eq), + constants::DW_OP_ge => Ok(Operation::Ge), + constants::DW_OP_gt => Ok(Operation::Gt), + constants::DW_OP_le => Ok(Operation::Le), + constants::DW_OP_lt => Ok(Operation::Lt), + constants::DW_OP_ne => Ok(Operation::Ne), + constants::DW_OP_skip => { + let target = bytes.read_i16()?; + Ok(Operation::Skip { target }) + } + constants::DW_OP_lit0 + | constants::DW_OP_lit1 + | constants::DW_OP_lit2 + | constants::DW_OP_lit3 + | constants::DW_OP_lit4 + | constants::DW_OP_lit5 + | constants::DW_OP_lit6 + | constants::DW_OP_lit7 + | constants::DW_OP_lit8 + | constants::DW_OP_lit9 + | constants::DW_OP_lit10 + | constants::DW_OP_lit11 + | constants::DW_OP_lit12 + | constants::DW_OP_lit13 + | constants::DW_OP_lit14 + | constants::DW_OP_lit15 + | constants::DW_OP_lit16 + | constants::DW_OP_lit17 + | constants::DW_OP_lit18 + | constants::DW_OP_lit19 + | constants::DW_OP_lit20 + | constants::DW_OP_lit21 + | constants::DW_OP_lit22 + | constants::DW_OP_lit23 + | constants::DW_OP_lit24 + | constants::DW_OP_lit25 + | constants::DW_OP_lit26 + | constants::DW_OP_lit27 + | constants::DW_OP_lit28 + | constants::DW_OP_lit29 + | constants::DW_OP_lit30 + | constants::DW_OP_lit31 => Ok(Operation::UnsignedConstant { + value: (opcode - constants::DW_OP_lit0.0).into(), + }), + constants::DW_OP_reg0 + | constants::DW_OP_reg1 + | constants::DW_OP_reg2 + | constants::DW_OP_reg3 + | constants::DW_OP_reg4 + | constants::DW_OP_reg5 + | constants::DW_OP_reg6 + | constants::DW_OP_reg7 + | constants::DW_OP_reg8 + | constants::DW_OP_reg9 + | constants::DW_OP_reg10 + | constants::DW_OP_reg11 + | constants::DW_OP_reg12 + | constants::DW_OP_reg13 + | constants::DW_OP_reg14 + | constants::DW_OP_reg15 + | constants::DW_OP_reg16 + | constants::DW_OP_reg17 + | constants::DW_OP_reg18 + | constants::DW_OP_reg19 + | constants::DW_OP_reg20 + | constants::DW_OP_reg21 + | constants::DW_OP_reg22 + | constants::DW_OP_reg23 + | constants::DW_OP_reg24 + | constants::DW_OP_reg25 + | constants::DW_OP_reg26 + | constants::DW_OP_reg27 + | constants::DW_OP_reg28 + | constants::DW_OP_reg29 + | constants::DW_OP_reg30 + | constants::DW_OP_reg31 => Ok(Operation::Register { + register: Register((opcode - constants::DW_OP_reg0.0).into()), + }), + constants::DW_OP_breg0 + | constants::DW_OP_breg1 + | constants::DW_OP_breg2 + | constants::DW_OP_breg3 + | constants::DW_OP_breg4 + | constants::DW_OP_breg5 + | constants::DW_OP_breg6 + | constants::DW_OP_breg7 + | constants::DW_OP_breg8 + | constants::DW_OP_breg9 + | constants::DW_OP_breg10 + | constants::DW_OP_breg11 + | constants::DW_OP_breg12 + | constants::DW_OP_breg13 + | constants::DW_OP_breg14 + | constants::DW_OP_breg15 + | constants::DW_OP_breg16 + | constants::DW_OP_breg17 + | constants::DW_OP_breg18 + | constants::DW_OP_breg19 + | constants::DW_OP_breg20 + | constants::DW_OP_breg21 + | constants::DW_OP_breg22 + | constants::DW_OP_breg23 + | constants::DW_OP_breg24 + | constants::DW_OP_breg25 + | constants::DW_OP_breg26 + | constants::DW_OP_breg27 + | constants::DW_OP_breg28 + | constants::DW_OP_breg29 + | constants::DW_OP_breg30 + | constants::DW_OP_breg31 => { + let value = bytes.read_sleb128()?; + Ok(Operation::RegisterOffset { + register: Register((opcode - constants::DW_OP_breg0.0).into()), + offset: value, + base_type: generic_type(), + }) + } + constants::DW_OP_regx => { + let register = bytes.read_uleb128().and_then(Register::from_u64)?; + Ok(Operation::Register { register }) + } + constants::DW_OP_fbreg => { + let value = bytes.read_sleb128()?; + Ok(Operation::FrameOffset { offset: value }) + } + constants::DW_OP_bregx => { + let register = bytes.read_uleb128().and_then(Register::from_u64)?; + let offset = bytes.read_sleb128()?; + Ok(Operation::RegisterOffset { + register, + offset, + base_type: generic_type(), + }) + } + constants::DW_OP_piece => { + let size = bytes.read_uleb128()?; + Ok(Operation::Piece { + size_in_bits: 8 * size, + bit_offset: None, + }) + } + constants::DW_OP_deref_size => { + let size = bytes.read_u8()?; + Ok(Operation::Deref { + base_type: generic_type(), + size, + space: false, + }) + } + constants::DW_OP_xderef_size => { + let size = bytes.read_u8()?; + Ok(Operation::Deref { + base_type: generic_type(), + size, + space: true, + }) + } + constants::DW_OP_nop => Ok(Operation::Nop), + constants::DW_OP_push_object_address => Ok(Operation::PushObjectAddress), + constants::DW_OP_call2 => { + let value = bytes.read_u16().map(R::Offset::from_u16)?; + Ok(Operation::Call { + offset: DieReference::UnitRef(UnitOffset(value)), + }) + } + constants::DW_OP_call4 => { + let value = bytes.read_u32().map(R::Offset::from_u32)?; + Ok(Operation::Call { + offset: DieReference::UnitRef(UnitOffset(value)), + }) + } + constants::DW_OP_call_ref => { + let value = bytes.read_offset(encoding.format)?; + Ok(Operation::Call { + offset: DieReference::DebugInfoRef(DebugInfoOffset(value)), + }) + } + constants::DW_OP_form_tls_address | constants::DW_OP_GNU_push_tls_address => { + Ok(Operation::TLS) + } + constants::DW_OP_call_frame_cfa => Ok(Operation::CallFrameCFA), + constants::DW_OP_bit_piece => { + let size = bytes.read_uleb128()?; + let offset = bytes.read_uleb128()?; + Ok(Operation::Piece { + size_in_bits: size, + bit_offset: Some(offset), + }) + } + constants::DW_OP_implicit_value => { + let len = bytes.read_uleb128().and_then(R::Offset::from_u64)?; + let data = bytes.split(len)?; + Ok(Operation::ImplicitValue { data }) + } + constants::DW_OP_stack_value => Ok(Operation::StackValue), + constants::DW_OP_implicit_pointer | constants::DW_OP_GNU_implicit_pointer => { + let value = if encoding.version == 2 { + bytes + .read_address(encoding.address_size) + .and_then(Offset::from_u64)? + } else { + bytes.read_offset(encoding.format)? + }; + let byte_offset = bytes.read_sleb128()?; + Ok(Operation::ImplicitPointer { + value: DebugInfoOffset(value), + byte_offset, + }) + } + constants::DW_OP_addrx | constants::DW_OP_GNU_addr_index => { + let index = bytes.read_uleb128().and_then(R::Offset::from_u64)?; + Ok(Operation::AddressIndex { + index: DebugAddrIndex(index), + }) + } + constants::DW_OP_constx | constants::DW_OP_GNU_const_index => { + let index = bytes.read_uleb128().and_then(R::Offset::from_u64)?; + Ok(Operation::ConstantIndex { + index: DebugAddrIndex(index), + }) + } + constants::DW_OP_entry_value | constants::DW_OP_GNU_entry_value => { + let len = bytes.read_uleb128().and_then(R::Offset::from_u64)?; + let expression = bytes.split(len)?; + Ok(Operation::EntryValue { expression }) + } + constants::DW_OP_GNU_parameter_ref => { + let value = bytes.read_u32().map(R::Offset::from_u32)?; + Ok(Operation::ParameterRef { + offset: UnitOffset(value), + }) + } + constants::DW_OP_const_type | constants::DW_OP_GNU_const_type => { + let base_type = bytes.read_uleb128().and_then(R::Offset::from_u64)?; + let len = bytes.read_u8()?; + let value = bytes.split(R::Offset::from_u8(len))?; + Ok(Operation::TypedLiteral { + base_type: UnitOffset(base_type), + value, + }) + } + constants::DW_OP_regval_type | constants::DW_OP_GNU_regval_type => { + let register = bytes.read_uleb128().and_then(Register::from_u64)?; + let base_type = bytes.read_uleb128().and_then(R::Offset::from_u64)?; + Ok(Operation::RegisterOffset { + register, + offset: 0, + base_type: UnitOffset(base_type), + }) + } + constants::DW_OP_deref_type | constants::DW_OP_GNU_deref_type => { + let size = bytes.read_u8()?; + let base_type = bytes.read_uleb128().and_then(R::Offset::from_u64)?; + Ok(Operation::Deref { + base_type: UnitOffset(base_type), + size, + space: false, + }) + } + constants::DW_OP_xderef_type => { + let size = bytes.read_u8()?; + let base_type = bytes.read_uleb128().and_then(R::Offset::from_u64)?; + Ok(Operation::Deref { + base_type: UnitOffset(base_type), + size, + space: true, + }) + } + constants::DW_OP_convert | constants::DW_OP_GNU_convert => { + let base_type = bytes.read_uleb128().and_then(R::Offset::from_u64)?; + Ok(Operation::Convert { + base_type: UnitOffset(base_type), + }) + } + constants::DW_OP_reinterpret | constants::DW_OP_GNU_reinterpret => { + let base_type = bytes.read_uleb128().and_then(R::Offset::from_u64)?; + Ok(Operation::Reinterpret { + base_type: UnitOffset(base_type), + }) + } + constants::DW_OP_WASM_location => match bytes.read_u8()? { + 0x0 => { + let index = bytes.read_uleb128_u32()?; + Ok(Operation::WasmLocal { index }) + } + 0x1 => { + let index = bytes.read_uleb128_u32()?; + Ok(Operation::WasmGlobal { index }) + } + 0x2 => { + let index = bytes.read_uleb128_u32()?; + Ok(Operation::WasmStack { index }) + } + 0x3 => { + let index = bytes.read_u32()?; + Ok(Operation::WasmGlobal { index }) + } + _ => Err(Error::InvalidExpression(name)), + }, + _ => Err(Error::InvalidExpression(name)), + } + } +} + +#[derive(Debug)] +enum EvaluationState { + Start(Option), + Ready, + Error(Error), + Complete, + Waiting(EvaluationWaiting), +} + +#[derive(Debug)] +enum EvaluationWaiting { + Memory, + Register { offset: i64 }, + FrameBase { offset: i64 }, + Tls, + Cfa, + AtLocation, + EntryValue, + ParameterRef, + RelocatedAddress, + IndexedAddress, + TypedLiteral { value: R }, + Convert, + Reinterpret, +} + +/// The state of an `Evaluation` after evaluating a DWARF expression. +/// The evaluation is either `Complete`, or it requires more data +/// to continue, as described by the variant. +#[derive(Debug, PartialEq)] +pub enum EvaluationResult { + /// The `Evaluation` is complete, and `Evaluation::result()` can be called. + Complete, + /// The `Evaluation` needs a value from memory to proceed further. Once the + /// caller determines what value to provide it should resume the `Evaluation` + /// by calling `Evaluation::resume_with_memory`. + RequiresMemory { + /// The address of the value required. + address: u64, + /// The size of the value required. This is guaranteed to be at most the + /// word size of the target architecture. + size: u8, + /// If not `None`, a target-specific address space value. + space: Option, + /// The DIE of the base type or 0 to indicate the generic type + base_type: UnitOffset, + }, + /// The `Evaluation` needs a value from a register to proceed further. Once + /// the caller determines what value to provide it should resume the + /// `Evaluation` by calling `Evaluation::resume_with_register`. + RequiresRegister { + /// The register number. + register: Register, + /// The DIE of the base type or 0 to indicate the generic type + base_type: UnitOffset, + }, + /// The `Evaluation` needs the frame base address to proceed further. Once + /// the caller determines what value to provide it should resume the + /// `Evaluation` by calling `Evaluation::resume_with_frame_base`. The frame + /// base address is the address produced by the location description in the + /// `DW_AT_frame_base` attribute of the current function. + RequiresFrameBase, + /// The `Evaluation` needs a value from TLS to proceed further. Once the + /// caller determines what value to provide it should resume the + /// `Evaluation` by calling `Evaluation::resume_with_tls`. + RequiresTls(u64), + /// The `Evaluation` needs the CFA to proceed further. Once the caller + /// determines what value to provide it should resume the `Evaluation` by + /// calling `Evaluation::resume_with_call_frame_cfa`. + RequiresCallFrameCfa, + /// The `Evaluation` needs the DWARF expression at the given location to + /// proceed further. Once the caller determines what value to provide it + /// should resume the `Evaluation` by calling + /// `Evaluation::resume_with_at_location`. + RequiresAtLocation(DieReference), + /// The `Evaluation` needs the value produced by evaluating a DWARF + /// expression at the entry point of the current subprogram. Once the + /// caller determines what value to provide it should resume the + /// `Evaluation` by calling `Evaluation::resume_with_entry_value`. + RequiresEntryValue(Expression), + /// The `Evaluation` needs the value of the parameter at the given location + /// in the current function's caller. Once the caller determines what value + /// to provide it should resume the `Evaluation` by calling + /// `Evaluation::resume_with_parameter_ref`. + RequiresParameterRef(UnitOffset), + /// The `Evaluation` needs an address to be relocated to proceed further. + /// Once the caller determines what value to provide it should resume the + /// `Evaluation` by calling `Evaluation::resume_with_relocated_address`. + RequiresRelocatedAddress(u64), + /// The `Evaluation` needs an address from the `.debug_addr` section. + /// This address may also need to be relocated. + /// Once the caller determines what value to provide it should resume the + /// `Evaluation` by calling `Evaluation::resume_with_indexed_address`. + RequiresIndexedAddress { + /// The index of the address in the `.debug_addr` section, + /// relative to the `DW_AT_addr_base` of the compilation unit. + index: DebugAddrIndex, + /// Whether the address also needs to be relocated. + relocate: bool, + }, + /// The `Evaluation` needs the `ValueType` for the base type DIE at + /// the give unit offset. Once the caller determines what value to provide it + /// should resume the `Evaluation` by calling + /// `Evaluation::resume_with_base_type`. + RequiresBaseType(UnitOffset), +} + +/// The bytecode for a DWARF expression or location description. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Expression(pub R); + +impl Expression { + /// Create an evaluation for this expression. + /// + /// The `encoding` is determined by the + /// [`CompilationUnitHeader`](struct.CompilationUnitHeader.html) or + /// [`TypeUnitHeader`](struct.TypeUnitHeader.html) that this expression + /// relates to. + /// + /// # Examples + /// ```rust,no_run + /// use gimli::Expression; + /// # let endian = gimli::LittleEndian; + /// # let debug_info = gimli::DebugInfo::from(gimli::EndianSlice::new(&[], endian)); + /// # let unit = debug_info.units().next().unwrap().unwrap(); + /// # let bytecode = gimli::EndianSlice::new(&[], endian); + /// let expression = gimli::Expression(bytecode); + /// let mut eval = expression.evaluation(unit.encoding()); + /// let mut result = eval.evaluate().unwrap(); + /// ``` + #[cfg(feature = "read")] + #[inline] + pub fn evaluation(self, encoding: Encoding) -> Evaluation { + Evaluation::new(self.0, encoding) + } + + /// Return an iterator for the operations in the expression. + pub fn operations(self, encoding: Encoding) -> OperationIter { + OperationIter { + input: self.0, + encoding, + } + } +} + +/// An iterator for the operations in an expression. +#[derive(Debug, Clone, Copy)] +pub struct OperationIter { + input: R, + encoding: Encoding, +} + +impl OperationIter { + /// Read the next operation in an expression. + pub fn next(&mut self) -> Result>> { + if self.input.is_empty() { + return Ok(None); + } + match Operation::parse(&mut self.input, self.encoding) { + Ok(op) => Ok(Some(op)), + Err(e) => { + self.input.empty(); + Err(e) + } + } + } + + /// Return the current byte offset of the iterator. + pub fn offset_from(&self, expression: &Expression) -> R::Offset { + self.input.offset_from(&expression.0) + } +} + +#[cfg(feature = "fallible-iterator")] +impl fallible_iterator::FallibleIterator for OperationIter { + type Item = Operation; + type Error = Error; + + fn next(&mut self) -> ::core::result::Result, Self::Error> { + OperationIter::next(self) + } +} + +/// Specification of what storage should be used for [`Evaluation`]. +/// +#[cfg_attr( + feature = "read", + doc = " +Normally you would only need to use [`StoreOnHeap`], which places the stacks and the results +on the heap using [`Vec`]. This is the default storage type parameter for [`Evaluation`]. +" +)] +/// +/// If you need to avoid [`Evaluation`] from allocating memory, e.g. for signal safety, +/// you can provide you own storage specification: +/// ```rust,no_run +/// # use gimli::*; +/// # let bytecode = EndianSlice::new(&[], LittleEndian); +/// # let encoding = unimplemented!(); +/// # let get_register_value = |_, _| Value::Generic(42); +/// # let get_frame_base = || 0xdeadbeef; +/// # +/// struct StoreOnStack; +/// +/// impl EvaluationStorage for StoreOnStack { +/// type Stack = [Value; 64]; +/// type ExpressionStack = [(R, R); 4]; +/// type Result = [Piece; 1]; +/// } +/// +/// let mut eval = Evaluation::<_, StoreOnStack>::new_in(bytecode, encoding); +/// let mut result = eval.evaluate().unwrap(); +/// while result != EvaluationResult::Complete { +/// match result { +/// EvaluationResult::RequiresRegister { register, base_type } => { +/// let value = get_register_value(register, base_type); +/// result = eval.resume_with_register(value).unwrap(); +/// }, +/// EvaluationResult::RequiresFrameBase => { +/// let frame_base = get_frame_base(); +/// result = eval.resume_with_frame_base(frame_base).unwrap(); +/// }, +/// _ => unimplemented!(), +/// }; +/// } +/// +/// let result = eval.as_result(); +/// println!("{:?}", result); +/// ``` +pub trait EvaluationStorage { + /// The storage used for the evaluation stack. + type Stack: ArrayLike; + /// The storage used for the expression stack. + type ExpressionStack: ArrayLike; + /// The storage used for the results. + type Result: ArrayLike>; +} + +#[cfg(feature = "read")] +impl EvaluationStorage for StoreOnHeap { + type Stack = Vec; + type ExpressionStack = Vec<(R, R)>; + type Result = Vec>; +} + +/// A DWARF expression evaluator. +/// +/// # Usage +/// A DWARF expression may require additional data to produce a final result, +/// such as the value of a register or a memory location. Once initial setup +/// is complete (i.e. `set_initial_value()`, `set_object_address()`) the +/// consumer calls the `evaluate()` method. That returns an `EvaluationResult`, +/// which is either `EvaluationResult::Complete` or a value indicating what +/// data is needed to resume the `Evaluation`. The consumer is responsible for +/// producing that data and resuming the computation with the correct method, +/// as documented for `EvaluationResult`. Only once an `EvaluationResult::Complete` +/// is returned can the consumer call `result()`. +/// +/// This design allows the consumer of `Evaluation` to decide how and when to +/// produce the required data and resume the computation. The `Evaluation` can +/// be driven synchronously (as shown below) or by some asynchronous mechanism +/// such as futures. +/// +/// # Examples +/// ```rust,no_run +/// use gimli::{Evaluation, EvaluationResult, Expression}; +/// # let bytecode = gimli::EndianSlice::new(&[], gimli::LittleEndian); +/// # let encoding = unimplemented!(); +/// # let get_register_value = |_, _| gimli::Value::Generic(42); +/// # let get_frame_base = || 0xdeadbeef; +/// +/// let mut eval = Evaluation::new(bytecode, encoding); +/// let mut result = eval.evaluate().unwrap(); +/// while result != EvaluationResult::Complete { +/// match result { +/// EvaluationResult::RequiresRegister { register, base_type } => { +/// let value = get_register_value(register, base_type); +/// result = eval.resume_with_register(value).unwrap(); +/// }, +/// EvaluationResult::RequiresFrameBase => { +/// let frame_base = get_frame_base(); +/// result = eval.resume_with_frame_base(frame_base).unwrap(); +/// }, +/// _ => unimplemented!(), +/// }; +/// } +/// +/// let result = eval.result(); +/// println!("{:?}", result); +/// ``` +#[derive(Debug)] +pub struct Evaluation = StoreOnHeap> { + bytecode: R, + encoding: Encoding, + object_address: Option, + max_iterations: Option, + iteration: u32, + state: EvaluationState, + + // Stack operations are done on word-sized values. We do all + // operations on 64-bit values, and then mask the results + // appropriately when popping. + addr_mask: u64, + + // The stack. + stack: ArrayVec, + + // The next operation to decode and evaluate. + pc: R, + + // If we see a DW_OP_call* operation, the previous PC and bytecode + // is stored here while evaluating the subroutine. + expression_stack: ArrayVec, + + value_result: Option, + result: ArrayVec, +} + +#[cfg(feature = "read")] +impl Evaluation { + /// Create a new DWARF expression evaluator. + /// + /// The new evaluator is created without an initial value, without + /// an object address, and without a maximum number of iterations. + pub fn new(bytecode: R, encoding: Encoding) -> Self { + Self::new_in(bytecode, encoding) + } + + /// Get the result of this `Evaluation`. + /// + /// # Panics + /// Panics if this `Evaluation` has not been driven to completion. + pub fn result(self) -> Vec> { + match self.state { + EvaluationState::Complete => self.result.into_vec(), + _ => { + panic!("Called `Evaluation::result` on an `Evaluation` that has not been completed") + } + } + } +} + +impl> Evaluation { + /// Create a new DWARF expression evaluator. + /// + /// The new evaluator is created without an initial value, without + /// an object address, and without a maximum number of iterations. + pub fn new_in(bytecode: R, encoding: Encoding) -> Self { + let pc = bytecode.clone(); + Evaluation { + bytecode, + encoding, + object_address: None, + max_iterations: None, + iteration: 0, + state: EvaluationState::Start(None), + addr_mask: if encoding.address_size == 8 { + !0u64 + } else { + (1 << (8 * u64::from(encoding.address_size))) - 1 + }, + stack: Default::default(), + expression_stack: Default::default(), + pc, + value_result: None, + result: Default::default(), + } + } + + /// Set an initial value to be pushed on the DWARF expression + /// evaluator's stack. This can be used in cases like + /// `DW_AT_vtable_elem_location`, which require a value on the + /// stack before evaluation commences. If no initial value is + /// set, and the expression uses an opcode requiring the initial + /// value, then evaluation will fail with an error. + /// + /// # Panics + /// Panics if `set_initial_value()` has already been called, or if + /// `evaluate()` has already been called. + pub fn set_initial_value(&mut self, value: u64) { + match self.state { + EvaluationState::Start(None) => { + self.state = EvaluationState::Start(Some(value)); + } + _ => panic!( + "`Evaluation::set_initial_value` was called twice, or after evaluation began." + ), + }; + } + + /// Set the enclosing object's address, as used by + /// `DW_OP_push_object_address`. If no object address is set, and + /// the expression uses an opcode requiring the object address, + /// then evaluation will fail with an error. + pub fn set_object_address(&mut self, value: u64) { + self.object_address = Some(value); + } + + /// Set the maximum number of iterations to be allowed by the + /// expression evaluator. + /// + /// An iteration corresponds approximately to the evaluation of a + /// single operation in an expression ("approximately" because the + /// implementation may allow two such operations in some cases). + /// The default is not to have a maximum; once set, it's not + /// possible to go back to this default state. This value can be + /// set to avoid denial of service attacks by bad DWARF bytecode. + pub fn set_max_iterations(&mut self, value: u32) { + self.max_iterations = Some(value); + } + + fn pop(&mut self) -> Result { + match self.stack.pop() { + Some(value) => Ok(value), + None => Err(Error::NotEnoughStackItems), + } + } + + fn push(&mut self, value: Value) -> Result<()> { + self.stack.try_push(value).map_err(|_| Error::StackFull) + } + + fn evaluate_one_operation(&mut self) -> Result> { + let operation = Operation::parse(&mut self.pc, self.encoding)?; + + match operation { + Operation::Deref { + base_type, + size, + space, + } => { + let entry = self.pop()?; + let addr = entry.to_u64(self.addr_mask)?; + let addr_space = if space { + let entry = self.pop()?; + let value = entry.to_u64(self.addr_mask)?; + Some(value) + } else { + None + }; + return Ok(OperationEvaluationResult::Waiting( + EvaluationWaiting::Memory, + EvaluationResult::RequiresMemory { + address: addr, + size, + space: addr_space, + base_type, + }, + )); + } + + Operation::Drop => { + self.pop()?; + } + Operation::Pick { index } => { + let len = self.stack.len(); + let index = index as usize; + if index >= len { + return Err(Error::NotEnoughStackItems); + } + let value = self.stack[len - index - 1]; + self.push(value)?; + } + Operation::Swap => { + let top = self.pop()?; + let next = self.pop()?; + self.push(top)?; + self.push(next)?; + } + Operation::Rot => { + let one = self.pop()?; + let two = self.pop()?; + let three = self.pop()?; + self.push(one)?; + self.push(three)?; + self.push(two)?; + } + + Operation::Abs => { + let value = self.pop()?; + let result = value.abs(self.addr_mask)?; + self.push(result)?; + } + Operation::And => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.and(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Div => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.div(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Minus => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.sub(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Mod => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.rem(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Mul => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.mul(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Neg => { + let v = self.pop()?; + let result = v.neg(self.addr_mask)?; + self.push(result)?; + } + Operation::Not => { + let value = self.pop()?; + let result = value.not(self.addr_mask)?; + self.push(result)?; + } + Operation::Or => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.or(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Plus => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.add(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::PlusConstant { value } => { + let lhs = self.pop()?; + let rhs = Value::from_u64(lhs.value_type(), value)?; + let result = lhs.add(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Shl => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.shl(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Shr => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.shr(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Shra => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.shra(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Xor => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.xor(rhs, self.addr_mask)?; + self.push(result)?; + } + + Operation::Bra { target } => { + let entry = self.pop()?; + let v = entry.to_u64(self.addr_mask)?; + if v != 0 { + self.pc = compute_pc(&self.pc, &self.bytecode, target)?; + } + } + + Operation::Eq => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.eq(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Ge => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.ge(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Gt => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.gt(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Le => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.le(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Lt => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.lt(rhs, self.addr_mask)?; + self.push(result)?; + } + Operation::Ne => { + let rhs = self.pop()?; + let lhs = self.pop()?; + let result = lhs.ne(rhs, self.addr_mask)?; + self.push(result)?; + } + + Operation::Skip { target } => { + self.pc = compute_pc(&self.pc, &self.bytecode, target)?; + } + + Operation::UnsignedConstant { value } => { + self.push(Value::Generic(value))?; + } + + Operation::SignedConstant { value } => { + self.push(Value::Generic(value as u64))?; + } + + Operation::RegisterOffset { + register, + offset, + base_type, + } => { + return Ok(OperationEvaluationResult::Waiting( + EvaluationWaiting::Register { offset }, + EvaluationResult::RequiresRegister { + register, + base_type, + }, + )); + } + + Operation::FrameOffset { offset } => { + return Ok(OperationEvaluationResult::Waiting( + EvaluationWaiting::FrameBase { offset }, + EvaluationResult::RequiresFrameBase, + )); + } + + Operation::Nop => {} + + Operation::PushObjectAddress => { + if let Some(value) = self.object_address { + self.push(Value::Generic(value))?; + } else { + return Err(Error::InvalidPushObjectAddress); + } + } + + Operation::Call { offset } => { + return Ok(OperationEvaluationResult::Waiting( + EvaluationWaiting::AtLocation, + EvaluationResult::RequiresAtLocation(offset), + )); + } + + Operation::TLS => { + let entry = self.pop()?; + let index = entry.to_u64(self.addr_mask)?; + return Ok(OperationEvaluationResult::Waiting( + EvaluationWaiting::Tls, + EvaluationResult::RequiresTls(index), + )); + } + + Operation::CallFrameCFA => { + return Ok(OperationEvaluationResult::Waiting( + EvaluationWaiting::Cfa, + EvaluationResult::RequiresCallFrameCfa, + )); + } + + Operation::Register { register } => { + let location = Location::Register { register }; + return Ok(OperationEvaluationResult::Complete { location }); + } + + Operation::ImplicitValue { ref data } => { + let location = Location::Bytes { + value: data.clone(), + }; + return Ok(OperationEvaluationResult::Complete { location }); + } + + Operation::StackValue => { + let value = self.pop()?; + let location = Location::Value { value }; + return Ok(OperationEvaluationResult::Complete { location }); + } + + Operation::ImplicitPointer { value, byte_offset } => { + let location = Location::ImplicitPointer { value, byte_offset }; + return Ok(OperationEvaluationResult::Complete { location }); + } + + Operation::EntryValue { ref expression } => { + return Ok(OperationEvaluationResult::Waiting( + EvaluationWaiting::EntryValue, + EvaluationResult::RequiresEntryValue(Expression(expression.clone())), + )); + } + + Operation::ParameterRef { offset } => { + return Ok(OperationEvaluationResult::Waiting( + EvaluationWaiting::ParameterRef, + EvaluationResult::RequiresParameterRef(offset), + )); + } + + Operation::Address { address } => { + return Ok(OperationEvaluationResult::Waiting( + EvaluationWaiting::RelocatedAddress, + EvaluationResult::RequiresRelocatedAddress(address), + )); + } + + Operation::AddressIndex { index } => { + return Ok(OperationEvaluationResult::Waiting( + EvaluationWaiting::IndexedAddress, + EvaluationResult::RequiresIndexedAddress { + index, + relocate: true, + }, + )); + } + + Operation::ConstantIndex { index } => { + return Ok(OperationEvaluationResult::Waiting( + EvaluationWaiting::IndexedAddress, + EvaluationResult::RequiresIndexedAddress { + index, + relocate: false, + }, + )); + } + + Operation::Piece { + size_in_bits, + bit_offset, + } => { + let location = if self.stack.is_empty() { + Location::Empty + } else { + let entry = self.pop()?; + let address = entry.to_u64(self.addr_mask)?; + Location::Address { address } + }; + self.result + .try_push(Piece { + size_in_bits: Some(size_in_bits), + bit_offset, + location, + }) + .map_err(|_| Error::StackFull)?; + return Ok(OperationEvaluationResult::Piece); + } + + Operation::TypedLiteral { base_type, value } => { + return Ok(OperationEvaluationResult::Waiting( + EvaluationWaiting::TypedLiteral { value }, + EvaluationResult::RequiresBaseType(base_type), + )); + } + Operation::Convert { base_type } => { + return Ok(OperationEvaluationResult::Waiting( + EvaluationWaiting::Convert, + EvaluationResult::RequiresBaseType(base_type), + )); + } + Operation::Reinterpret { base_type } => { + return Ok(OperationEvaluationResult::Waiting( + EvaluationWaiting::Reinterpret, + EvaluationResult::RequiresBaseType(base_type), + )); + } + Operation::WasmLocal { .. } + | Operation::WasmGlobal { .. } + | Operation::WasmStack { .. } => { + return Err(Error::UnsupportedEvaluation); + } + } + + Ok(OperationEvaluationResult::Incomplete) + } + + /// Get the result if this is an evaluation for a value. + /// + /// Returns `None` if the evaluation contained operations that are only + /// valid for location descriptions. + /// + /// # Panics + /// Panics if this `Evaluation` has not been driven to completion. + pub fn value_result(&self) -> Option { + match self.state { + EvaluationState::Complete => self.value_result, + _ => { + panic!("Called `Evaluation::value_result` on an `Evaluation` that has not been completed") + } + } + } + + /// Get the result of this `Evaluation`. + /// + /// # Panics + /// Panics if this `Evaluation` has not been driven to completion. + pub fn as_result(&self) -> &[Piece] { + match self.state { + EvaluationState::Complete => &self.result, + _ => { + panic!( + "Called `Evaluation::as_result` on an `Evaluation` that has not been completed" + ) + } + } + } + + /// Evaluate a DWARF expression. This method should only ever be called + /// once. If the returned `EvaluationResult` is not + /// `EvaluationResult::Complete`, the caller should provide the required + /// value and resume the evaluation by calling the appropriate resume_with + /// method on `Evaluation`. + pub fn evaluate(&mut self) -> Result> { + match self.state { + EvaluationState::Start(initial_value) => { + if let Some(value) = initial_value { + self.push(Value::Generic(value))?; + } + self.state = EvaluationState::Ready; + } + EvaluationState::Ready => {} + EvaluationState::Error(err) => return Err(err), + EvaluationState::Complete => return Ok(EvaluationResult::Complete), + EvaluationState::Waiting(_) => panic!(), + }; + + match self.evaluate_internal() { + Ok(r) => Ok(r), + Err(e) => { + self.state = EvaluationState::Error(e); + Err(e) + } + } + } + + /// Resume the `Evaluation` with the provided memory `value`. This will apply + /// the provided memory value to the evaluation and continue evaluating + /// opcodes until the evaluation is completed, reaches an error, or needs + /// more information again. + /// + /// # Panics + /// Panics if this `Evaluation` did not previously stop with `EvaluationResult::RequiresMemory`. + pub fn resume_with_memory(&mut self, value: Value) -> Result> { + match self.state { + EvaluationState::Error(err) => return Err(err), + EvaluationState::Waiting(EvaluationWaiting::Memory) => { + self.push(value)?; + } + _ => panic!( + "Called `Evaluation::resume_with_memory` without a preceding `EvaluationResult::RequiresMemory`" + ), + }; + + self.evaluate_internal() + } + + /// Resume the `Evaluation` with the provided `register` value. This will apply + /// the provided register value to the evaluation and continue evaluating + /// opcodes until the evaluation is completed, reaches an error, or needs + /// more information again. + /// + /// # Panics + /// Panics if this `Evaluation` did not previously stop with `EvaluationResult::RequiresRegister`. + pub fn resume_with_register(&mut self, value: Value) -> Result> { + match self.state { + EvaluationState::Error(err) => return Err(err), + EvaluationState::Waiting(EvaluationWaiting::Register { offset }) => { + let offset = Value::from_u64(value.value_type(), offset as u64)?; + let value = value.add(offset, self.addr_mask)?; + self.push(value)?; + } + _ => panic!( + "Called `Evaluation::resume_with_register` without a preceding `EvaluationResult::RequiresRegister`" + ), + }; + + self.evaluate_internal() + } + + /// Resume the `Evaluation` with the provided `frame_base`. This will + /// apply the provided frame base value to the evaluation and continue + /// evaluating opcodes until the evaluation is completed, reaches an error, + /// or needs more information again. + /// + /// # Panics + /// Panics if this `Evaluation` did not previously stop with `EvaluationResult::RequiresFrameBase`. + pub fn resume_with_frame_base(&mut self, frame_base: u64) -> Result> { + match self.state { + EvaluationState::Error(err) => return Err(err), + EvaluationState::Waiting(EvaluationWaiting::FrameBase { offset }) => { + self.push(Value::Generic(frame_base.wrapping_add(offset as u64)))?; + } + _ => panic!( + "Called `Evaluation::resume_with_frame_base` without a preceding `EvaluationResult::RequiresFrameBase`" + ), + }; + + self.evaluate_internal() + } + + /// Resume the `Evaluation` with the provided `value`. This will apply + /// the provided TLS value to the evaluation and continue evaluating + /// opcodes until the evaluation is completed, reaches an error, or needs + /// more information again. + /// + /// # Panics + /// Panics if this `Evaluation` did not previously stop with `EvaluationResult::RequiresTls`. + pub fn resume_with_tls(&mut self, value: u64) -> Result> { + match self.state { + EvaluationState::Error(err) => return Err(err), + EvaluationState::Waiting(EvaluationWaiting::Tls) => { + self.push(Value::Generic(value))?; + } + _ => panic!( + "Called `Evaluation::resume_with_tls` without a preceding `EvaluationResult::RequiresTls`" + ), + }; + + self.evaluate_internal() + } + + /// Resume the `Evaluation` with the provided `cfa`. This will + /// apply the provided CFA value to the evaluation and continue evaluating + /// opcodes until the evaluation is completed, reaches an error, or needs + /// more information again. + /// + /// # Panics + /// Panics if this `Evaluation` did not previously stop with `EvaluationResult::RequiresCallFrameCfa`. + pub fn resume_with_call_frame_cfa(&mut self, cfa: u64) -> Result> { + match self.state { + EvaluationState::Error(err) => return Err(err), + EvaluationState::Waiting(EvaluationWaiting::Cfa) => { + self.push(Value::Generic(cfa))?; + } + _ => panic!( + "Called `Evaluation::resume_with_call_frame_cfa` without a preceding `EvaluationResult::RequiresCallFrameCfa`" + ), + }; + + self.evaluate_internal() + } + + /// Resume the `Evaluation` with the provided `bytes`. This will + /// continue processing the evaluation with the new expression provided + /// until the evaluation is completed, reaches an error, or needs more + /// information again. + /// + /// # Panics + /// Panics if this `Evaluation` did not previously stop with `EvaluationResult::RequiresAtLocation`. + pub fn resume_with_at_location(&mut self, mut bytes: R) -> Result> { + match self.state { + EvaluationState::Error(err) => return Err(err), + EvaluationState::Waiting(EvaluationWaiting::AtLocation) => { + if !bytes.is_empty() { + let mut pc = bytes.clone(); + mem::swap(&mut pc, &mut self.pc); + mem::swap(&mut bytes, &mut self.bytecode); + self.expression_stack.try_push((pc, bytes)).map_err(|_| Error::StackFull)?; + } + } + _ => panic!( + "Called `Evaluation::resume_with_at_location` without a precedeing `EvaluationResult::RequiresAtLocation`" + ), + }; + + self.evaluate_internal() + } + + /// Resume the `Evaluation` with the provided `entry_value`. This will + /// apply the provided entry value to the evaluation and continue evaluating + /// opcodes until the evaluation is completed, reaches an error, or needs + /// more information again. + /// + /// # Panics + /// Panics if this `Evaluation` did not previously stop with `EvaluationResult::RequiresEntryValue`. + pub fn resume_with_entry_value(&mut self, entry_value: Value) -> Result> { + match self.state { + EvaluationState::Error(err) => return Err(err), + EvaluationState::Waiting(EvaluationWaiting::EntryValue) => { + self.push(entry_value)?; + } + _ => panic!( + "Called `Evaluation::resume_with_entry_value` without a preceding `EvaluationResult::RequiresEntryValue`" + ), + }; + + self.evaluate_internal() + } + + /// Resume the `Evaluation` with the provided `parameter_value`. This will + /// apply the provided parameter value to the evaluation and continue evaluating + /// opcodes until the evaluation is completed, reaches an error, or needs + /// more information again. + /// + /// # Panics + /// Panics if this `Evaluation` did not previously stop with `EvaluationResult::RequiresParameterRef`. + pub fn resume_with_parameter_ref( + &mut self, + parameter_value: u64, + ) -> Result> { + match self.state { + EvaluationState::Error(err) => return Err(err), + EvaluationState::Waiting(EvaluationWaiting::ParameterRef) => { + self.push(Value::Generic(parameter_value))?; + } + _ => panic!( + "Called `Evaluation::resume_with_parameter_ref` without a preceding `EvaluationResult::RequiresParameterRef`" + ), + }; + + self.evaluate_internal() + } + + /// Resume the `Evaluation` with the provided relocated `address`. This will use the + /// provided relocated address for the operation that required it, and continue evaluating + /// opcodes until the evaluation is completed, reaches an error, or needs + /// more information again. + /// + /// # Panics + /// Panics if this `Evaluation` did not previously stop with + /// `EvaluationResult::RequiresRelocatedAddress`. + pub fn resume_with_relocated_address(&mut self, address: u64) -> Result> { + match self.state { + EvaluationState::Error(err) => return Err(err), + EvaluationState::Waiting(EvaluationWaiting::RelocatedAddress) => { + self.push(Value::Generic(address))?; + } + _ => panic!( + "Called `Evaluation::resume_with_relocated_address` without a preceding `EvaluationResult::RequiresRelocatedAddress`" + ), + }; + + self.evaluate_internal() + } + + /// Resume the `Evaluation` with the provided indexed `address`. This will use the + /// provided indexed address for the operation that required it, and continue evaluating + /// opcodes until the evaluation is completed, reaches an error, or needs + /// more information again. + /// + /// # Panics + /// Panics if this `Evaluation` did not previously stop with + /// `EvaluationResult::RequiresIndexedAddress`. + pub fn resume_with_indexed_address(&mut self, address: u64) -> Result> { + match self.state { + EvaluationState::Error(err) => return Err(err), + EvaluationState::Waiting(EvaluationWaiting::IndexedAddress) => { + self.push(Value::Generic(address))?; + } + _ => panic!( + "Called `Evaluation::resume_with_indexed_address` without a preceding `EvaluationResult::RequiresIndexedAddress`" + ), + }; + + self.evaluate_internal() + } + + /// Resume the `Evaluation` with the provided `base_type`. This will use the + /// provided base type for the operation that required it, and continue evaluating + /// opcodes until the evaluation is completed, reaches an error, or needs + /// more information again. + /// + /// # Panics + /// Panics if this `Evaluation` did not previously stop with `EvaluationResult::RequiresBaseType`. + pub fn resume_with_base_type(&mut self, base_type: ValueType) -> Result> { + let value = match self.state { + EvaluationState::Error(err) => return Err(err), + EvaluationState::Waiting(EvaluationWaiting::TypedLiteral { ref value }) => { + Value::parse(base_type, value.clone())? + } + EvaluationState::Waiting(EvaluationWaiting::Convert) => { + let entry = self.pop()?; + entry.convert(base_type, self.addr_mask)? + } + EvaluationState::Waiting(EvaluationWaiting::Reinterpret) => { + let entry = self.pop()?; + entry.reinterpret(base_type, self.addr_mask)? + } + _ => panic!( + "Called `Evaluation::resume_with_base_type` without a preceding `EvaluationResult::RequiresBaseType`" + ), + }; + self.push(value)?; + self.evaluate_internal() + } + + fn end_of_expression(&mut self) -> bool { + while self.pc.is_empty() { + match self.expression_stack.pop() { + Some((newpc, newbytes)) => { + self.pc = newpc; + self.bytecode = newbytes; + } + None => return true, + } + } + false + } + + fn evaluate_internal(&mut self) -> Result> { + while !self.end_of_expression() { + self.iteration += 1; + if let Some(max_iterations) = self.max_iterations { + if self.iteration > max_iterations { + return Err(Error::TooManyIterations); + } + } + + let op_result = self.evaluate_one_operation()?; + match op_result { + OperationEvaluationResult::Piece => {} + OperationEvaluationResult::Incomplete => { + if self.end_of_expression() && !self.result.is_empty() { + // We saw a piece earlier and then some + // unterminated piece. It's not clear this is + // well-defined. + return Err(Error::InvalidPiece); + } + } + OperationEvaluationResult::Complete { location } => { + if self.end_of_expression() { + if !self.result.is_empty() { + // We saw a piece earlier and then some + // unterminated piece. It's not clear this is + // well-defined. + return Err(Error::InvalidPiece); + } + self.result + .try_push(Piece { + size_in_bits: None, + bit_offset: None, + location, + }) + .map_err(|_| Error::StackFull)?; + } else { + // If there are more operations, then the next operation must + // be a Piece. + match Operation::parse(&mut self.pc, self.encoding)? { + Operation::Piece { + size_in_bits, + bit_offset, + } => { + self.result + .try_push(Piece { + size_in_bits: Some(size_in_bits), + bit_offset, + location, + }) + .map_err(|_| Error::StackFull)?; + } + _ => { + let value = + self.bytecode.len().into_u64() - self.pc.len().into_u64() - 1; + return Err(Error::InvalidExpressionTerminator(value)); + } + } + } + } + OperationEvaluationResult::Waiting(waiting, result) => { + self.state = EvaluationState::Waiting(waiting); + return Ok(result); + } + } + } + + // If no pieces have been seen, use the stack top as the + // result. + if self.result.is_empty() { + let entry = self.pop()?; + self.value_result = Some(entry); + let addr = entry.to_u64(self.addr_mask)?; + self.result + .try_push(Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Address { address: addr }, + }) + .map_err(|_| Error::StackFull)?; + } + + self.state = EvaluationState::Complete; + Ok(EvaluationResult::Complete) + } +} + +#[cfg(test)] +// Tests require leb128::write. +#[cfg(feature = "write")] +mod tests { + use super::*; + use crate::common::Format; + use crate::constants; + use crate::endianity::LittleEndian; + use crate::leb128; + use crate::read::{EndianSlice, Error, Result, UnitOffset}; + use crate::test_util::GimliSectionMethods; + use core::usize; + use test_assembler::{Endian, Section}; + + fn encoding4() -> Encoding { + Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + } + } + + fn encoding8() -> Encoding { + Encoding { + format: Format::Dwarf64, + version: 4, + address_size: 8, + } + } + + #[test] + fn test_compute_pc() { + // Contents don't matter for this test, just length. + let bytes = [0, 1, 2, 3, 4]; + let bytecode = &bytes[..]; + let ebuf = &EndianSlice::new(bytecode, LittleEndian); + + assert_eq!(compute_pc(ebuf, ebuf, 0), Ok(*ebuf)); + assert_eq!( + compute_pc(ebuf, ebuf, -1), + Err(Error::BadBranchTarget(usize::MAX as u64)) + ); + assert_eq!(compute_pc(ebuf, ebuf, 5), Ok(ebuf.range_from(5..))); + assert_eq!( + compute_pc(&ebuf.range_from(3..), ebuf, -2), + Ok(ebuf.range_from(1..)) + ); + assert_eq!( + compute_pc(&ebuf.range_from(2..), ebuf, 2), + Ok(ebuf.range_from(4..)) + ); + } + + fn check_op_parse_simple<'input>( + input: &'input [u8], + expect: &Operation>, + encoding: Encoding, + ) { + let buf = EndianSlice::new(input, LittleEndian); + let mut pc = buf; + let value = Operation::parse(&mut pc, encoding); + match value { + Ok(val) => { + assert_eq!(val, *expect); + assert_eq!(pc.len(), 0); + } + _ => panic!("Unexpected result"), + } + } + + fn check_op_parse_eof(input: &[u8], encoding: Encoding) { + let buf = EndianSlice::new(input, LittleEndian); + let mut pc = buf; + match Operation::parse(&mut pc, encoding) { + Err(Error::UnexpectedEof(id)) => { + assert!(buf.lookup_offset_id(id).is_some()); + } + + _ => panic!("Unexpected result"), + } + } + + fn check_op_parse( + input: F, + expect: &Operation>, + encoding: Encoding, + ) where + F: Fn(Section) -> Section, + { + let input = input(Section::with_endian(Endian::Little)) + .get_contents() + .unwrap(); + for i in 1..input.len() { + check_op_parse_eof(&input[..i], encoding); + } + check_op_parse_simple(&input, expect, encoding); + } + + #[test] + fn test_op_parse_onebyte() { + // Doesn't matter for this test. + let encoding = encoding4(); + + // Test all single-byte opcodes. + #[rustfmt::skip] + let inputs = [ + ( + constants::DW_OP_deref, + Operation::Deref { + base_type: generic_type(), + size: encoding.address_size, + space: false, + }, + ), + (constants::DW_OP_dup, Operation::Pick { index: 0 }), + (constants::DW_OP_drop, Operation::Drop), + (constants::DW_OP_over, Operation::Pick { index: 1 }), + (constants::DW_OP_swap, Operation::Swap), + (constants::DW_OP_rot, Operation::Rot), + ( + constants::DW_OP_xderef, + Operation::Deref { + base_type: generic_type(), + size: encoding.address_size, + space: true, + }, + ), + (constants::DW_OP_abs, Operation::Abs), + (constants::DW_OP_and, Operation::And), + (constants::DW_OP_div, Operation::Div), + (constants::DW_OP_minus, Operation::Minus), + (constants::DW_OP_mod, Operation::Mod), + (constants::DW_OP_mul, Operation::Mul), + (constants::DW_OP_neg, Operation::Neg), + (constants::DW_OP_not, Operation::Not), + (constants::DW_OP_or, Operation::Or), + (constants::DW_OP_plus, Operation::Plus), + (constants::DW_OP_shl, Operation::Shl), + (constants::DW_OP_shr, Operation::Shr), + (constants::DW_OP_shra, Operation::Shra), + (constants::DW_OP_xor, Operation::Xor), + (constants::DW_OP_eq, Operation::Eq), + (constants::DW_OP_ge, Operation::Ge), + (constants::DW_OP_gt, Operation::Gt), + (constants::DW_OP_le, Operation::Le), + (constants::DW_OP_lt, Operation::Lt), + (constants::DW_OP_ne, Operation::Ne), + (constants::DW_OP_lit0, Operation::UnsignedConstant { value: 0 }), + (constants::DW_OP_lit1, Operation::UnsignedConstant { value: 1 }), + (constants::DW_OP_lit2, Operation::UnsignedConstant { value: 2 }), + (constants::DW_OP_lit3, Operation::UnsignedConstant { value: 3 }), + (constants::DW_OP_lit4, Operation::UnsignedConstant { value: 4 }), + (constants::DW_OP_lit5, Operation::UnsignedConstant { value: 5 }), + (constants::DW_OP_lit6, Operation::UnsignedConstant { value: 6 }), + (constants::DW_OP_lit7, Operation::UnsignedConstant { value: 7 }), + (constants::DW_OP_lit8, Operation::UnsignedConstant { value: 8 }), + (constants::DW_OP_lit9, Operation::UnsignedConstant { value: 9 }), + (constants::DW_OP_lit10, Operation::UnsignedConstant { value: 10 }), + (constants::DW_OP_lit11, Operation::UnsignedConstant { value: 11 }), + (constants::DW_OP_lit12, Operation::UnsignedConstant { value: 12 }), + (constants::DW_OP_lit13, Operation::UnsignedConstant { value: 13 }), + (constants::DW_OP_lit14, Operation::UnsignedConstant { value: 14 }), + (constants::DW_OP_lit15, Operation::UnsignedConstant { value: 15 }), + (constants::DW_OP_lit16, Operation::UnsignedConstant { value: 16 }), + (constants::DW_OP_lit17, Operation::UnsignedConstant { value: 17 }), + (constants::DW_OP_lit18, Operation::UnsignedConstant { value: 18 }), + (constants::DW_OP_lit19, Operation::UnsignedConstant { value: 19 }), + (constants::DW_OP_lit20, Operation::UnsignedConstant { value: 20 }), + (constants::DW_OP_lit21, Operation::UnsignedConstant { value: 21 }), + (constants::DW_OP_lit22, Operation::UnsignedConstant { value: 22 }), + (constants::DW_OP_lit23, Operation::UnsignedConstant { value: 23 }), + (constants::DW_OP_lit24, Operation::UnsignedConstant { value: 24 }), + (constants::DW_OP_lit25, Operation::UnsignedConstant { value: 25 }), + (constants::DW_OP_lit26, Operation::UnsignedConstant { value: 26 }), + (constants::DW_OP_lit27, Operation::UnsignedConstant { value: 27 }), + (constants::DW_OP_lit28, Operation::UnsignedConstant { value: 28 }), + (constants::DW_OP_lit29, Operation::UnsignedConstant { value: 29 }), + (constants::DW_OP_lit30, Operation::UnsignedConstant { value: 30 }), + (constants::DW_OP_lit31, Operation::UnsignedConstant { value: 31 }), + (constants::DW_OP_reg0, Operation::Register { register: Register(0) }), + (constants::DW_OP_reg1, Operation::Register { register: Register(1) }), + (constants::DW_OP_reg2, Operation::Register { register: Register(2) }), + (constants::DW_OP_reg3, Operation::Register { register: Register(3) }), + (constants::DW_OP_reg4, Operation::Register { register: Register(4) }), + (constants::DW_OP_reg5, Operation::Register { register: Register(5) }), + (constants::DW_OP_reg6, Operation::Register { register: Register(6) }), + (constants::DW_OP_reg7, Operation::Register { register: Register(7) }), + (constants::DW_OP_reg8, Operation::Register { register: Register(8) }), + (constants::DW_OP_reg9, Operation::Register { register: Register(9) }), + (constants::DW_OP_reg10, Operation::Register { register: Register(10) }), + (constants::DW_OP_reg11, Operation::Register { register: Register(11) }), + (constants::DW_OP_reg12, Operation::Register { register: Register(12) }), + (constants::DW_OP_reg13, Operation::Register { register: Register(13) }), + (constants::DW_OP_reg14, Operation::Register { register: Register(14) }), + (constants::DW_OP_reg15, Operation::Register { register: Register(15) }), + (constants::DW_OP_reg16, Operation::Register { register: Register(16) }), + (constants::DW_OP_reg17, Operation::Register { register: Register(17) }), + (constants::DW_OP_reg18, Operation::Register { register: Register(18) }), + (constants::DW_OP_reg19, Operation::Register { register: Register(19) }), + (constants::DW_OP_reg20, Operation::Register { register: Register(20) }), + (constants::DW_OP_reg21, Operation::Register { register: Register(21) }), + (constants::DW_OP_reg22, Operation::Register { register: Register(22) }), + (constants::DW_OP_reg23, Operation::Register { register: Register(23) }), + (constants::DW_OP_reg24, Operation::Register { register: Register(24) }), + (constants::DW_OP_reg25, Operation::Register { register: Register(25) }), + (constants::DW_OP_reg26, Operation::Register { register: Register(26) }), + (constants::DW_OP_reg27, Operation::Register { register: Register(27) }), + (constants::DW_OP_reg28, Operation::Register { register: Register(28) }), + (constants::DW_OP_reg29, Operation::Register { register: Register(29) }), + (constants::DW_OP_reg30, Operation::Register { register: Register(30) }), + (constants::DW_OP_reg31, Operation::Register { register: Register(31) }), + (constants::DW_OP_nop, Operation::Nop), + (constants::DW_OP_push_object_address, Operation::PushObjectAddress), + (constants::DW_OP_form_tls_address, Operation::TLS), + (constants::DW_OP_GNU_push_tls_address, Operation::TLS), + (constants::DW_OP_call_frame_cfa, Operation::CallFrameCFA), + (constants::DW_OP_stack_value, Operation::StackValue), + ]; + + let input = []; + check_op_parse_eof(&input[..], encoding); + + for item in inputs.iter() { + let (opcode, ref result) = *item; + check_op_parse(|s| s.D8(opcode.0), result, encoding); + } + } + + #[test] + fn test_op_parse_twobyte() { + // Doesn't matter for this test. + let encoding = encoding4(); + + let inputs = [ + ( + constants::DW_OP_const1u, + 23, + Operation::UnsignedConstant { value: 23 }, + ), + ( + constants::DW_OP_const1s, + (-23i8) as u8, + Operation::SignedConstant { value: -23 }, + ), + (constants::DW_OP_pick, 7, Operation::Pick { index: 7 }), + ( + constants::DW_OP_deref_size, + 19, + Operation::Deref { + base_type: generic_type(), + size: 19, + space: false, + }, + ), + ( + constants::DW_OP_xderef_size, + 19, + Operation::Deref { + base_type: generic_type(), + size: 19, + space: true, + }, + ), + ]; + + for item in inputs.iter() { + let (opcode, arg, ref result) = *item; + check_op_parse(|s| s.D8(opcode.0).D8(arg), result, encoding); + } + } + + #[test] + fn test_op_parse_threebyte() { + // Doesn't matter for this test. + let encoding = encoding4(); + + // While bra and skip are 3-byte opcodes, they aren't tested here, + // but rather specially in their own function. + let inputs = [ + ( + constants::DW_OP_const2u, + 23, + Operation::UnsignedConstant { value: 23 }, + ), + ( + constants::DW_OP_const2s, + (-23i16) as u16, + Operation::SignedConstant { value: -23 }, + ), + ( + constants::DW_OP_call2, + 1138, + Operation::Call { + offset: DieReference::UnitRef(UnitOffset(1138)), + }, + ), + ( + constants::DW_OP_bra, + (-23i16) as u16, + Operation::Bra { target: -23 }, + ), + ( + constants::DW_OP_skip, + (-23i16) as u16, + Operation::Skip { target: -23 }, + ), + ]; + + for item in inputs.iter() { + let (opcode, arg, ref result) = *item; + check_op_parse(|s| s.D8(opcode.0).L16(arg), result, encoding); + } + } + + #[test] + fn test_op_parse_fivebyte() { + // There are some tests here that depend on address size. + let encoding = encoding4(); + + let inputs = [ + ( + constants::DW_OP_addr, + 0x1234_5678, + Operation::Address { + address: 0x1234_5678, + }, + ), + ( + constants::DW_OP_const4u, + 0x1234_5678, + Operation::UnsignedConstant { value: 0x1234_5678 }, + ), + ( + constants::DW_OP_const4s, + (-23i32) as u32, + Operation::SignedConstant { value: -23 }, + ), + ( + constants::DW_OP_call4, + 0x1234_5678, + Operation::Call { + offset: DieReference::UnitRef(UnitOffset(0x1234_5678)), + }, + ), + ( + constants::DW_OP_call_ref, + 0x1234_5678, + Operation::Call { + offset: DieReference::DebugInfoRef(DebugInfoOffset(0x1234_5678)), + }, + ), + ]; + + for item in inputs.iter() { + let (op, arg, ref expect) = *item; + check_op_parse(|s| s.D8(op.0).L32(arg), expect, encoding); + } + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_op_parse_ninebyte() { + // There are some tests here that depend on address size. + let encoding = encoding8(); + + let inputs = [ + ( + constants::DW_OP_addr, + 0x1234_5678_1234_5678, + Operation::Address { + address: 0x1234_5678_1234_5678, + }, + ), + ( + constants::DW_OP_const8u, + 0x1234_5678_1234_5678, + Operation::UnsignedConstant { + value: 0x1234_5678_1234_5678, + }, + ), + ( + constants::DW_OP_const8s, + (-23i64) as u64, + Operation::SignedConstant { value: -23 }, + ), + ( + constants::DW_OP_call_ref, + 0x1234_5678_1234_5678, + Operation::Call { + offset: DieReference::DebugInfoRef(DebugInfoOffset(0x1234_5678_1234_5678)), + }, + ), + ]; + + for item in inputs.iter() { + let (op, arg, ref expect) = *item; + check_op_parse(|s| s.D8(op.0).L64(arg), expect, encoding); + } + } + + #[test] + fn test_op_parse_sleb() { + // Doesn't matter for this test. + let encoding = encoding4(); + + let values = [ + -1i64, + 0, + 1, + 0x100, + 0x1eee_eeee, + 0x7fff_ffff_ffff_ffff, + -0x100, + -0x1eee_eeee, + -0x7fff_ffff_ffff_ffff, + ]; + for value in values.iter() { + let mut inputs = vec![ + ( + constants::DW_OP_consts.0, + Operation::SignedConstant { value: *value }, + ), + ( + constants::DW_OP_fbreg.0, + Operation::FrameOffset { offset: *value }, + ), + ]; + + for i in 0..32 { + inputs.push(( + constants::DW_OP_breg0.0 + i, + Operation::RegisterOffset { + register: Register(i.into()), + offset: *value, + base_type: UnitOffset(0), + }, + )); + } + + for item in inputs.iter() { + let (op, ref expect) = *item; + check_op_parse(|s| s.D8(op).sleb(*value), expect, encoding); + } + } + } + + #[test] + fn test_op_parse_uleb() { + // Doesn't matter for this test. + let encoding = encoding4(); + + let values = [ + 0, + 1, + 0x100, + (!0u16).into(), + 0x1eee_eeee, + 0x7fff_ffff_ffff_ffff, + !0u64, + ]; + for value in values.iter() { + let mut inputs = vec![ + ( + constants::DW_OP_constu, + Operation::UnsignedConstant { value: *value }, + ), + ( + constants::DW_OP_plus_uconst, + Operation::PlusConstant { value: *value }, + ), + ]; + + if *value <= (!0u16).into() { + inputs.push(( + constants::DW_OP_regx, + Operation::Register { + register: Register::from_u64(*value).unwrap(), + }, + )); + } + + if *value <= (!0u32).into() { + inputs.extend(&[ + ( + constants::DW_OP_addrx, + Operation::AddressIndex { + index: DebugAddrIndex(*value as usize), + }, + ), + ( + constants::DW_OP_constx, + Operation::ConstantIndex { + index: DebugAddrIndex(*value as usize), + }, + ), + ]); + } + + // FIXME + if *value < !0u64 / 8 { + inputs.push(( + constants::DW_OP_piece, + Operation::Piece { + size_in_bits: 8 * value, + bit_offset: None, + }, + )); + } + + for item in inputs.iter() { + let (op, ref expect) = *item; + let input = Section::with_endian(Endian::Little) + .D8(op.0) + .uleb(*value) + .get_contents() + .unwrap(); + check_op_parse_simple(&input, expect, encoding); + } + } + } + + #[test] + fn test_op_parse_bregx() { + // Doesn't matter for this test. + let encoding = encoding4(); + + let uvalues = [0, 1, 0x100, !0u16]; + let svalues = [ + -1i64, + 0, + 1, + 0x100, + 0x1eee_eeee, + 0x7fff_ffff_ffff_ffff, + -0x100, + -0x1eee_eeee, + -0x7fff_ffff_ffff_ffff, + ]; + + for v1 in uvalues.iter() { + for v2 in svalues.iter() { + check_op_parse( + |s| s.D8(constants::DW_OP_bregx.0).uleb((*v1).into()).sleb(*v2), + &Operation::RegisterOffset { + register: Register(*v1), + offset: *v2, + base_type: UnitOffset(0), + }, + encoding, + ); + } + } + } + + #[test] + fn test_op_parse_bit_piece() { + // Doesn't matter for this test. + let encoding = encoding4(); + + let values = [0, 1, 0x100, 0x1eee_eeee, 0x7fff_ffff_ffff_ffff, !0u64]; + + for v1 in values.iter() { + for v2 in values.iter() { + let input = Section::with_endian(Endian::Little) + .D8(constants::DW_OP_bit_piece.0) + .uleb(*v1) + .uleb(*v2) + .get_contents() + .unwrap(); + check_op_parse_simple( + &input, + &Operation::Piece { + size_in_bits: *v1, + bit_offset: Some(*v2), + }, + encoding, + ); + } + } + } + + #[test] + fn test_op_parse_implicit_value() { + // Doesn't matter for this test. + let encoding = encoding4(); + + let data = b"hello"; + + check_op_parse( + |s| { + s.D8(constants::DW_OP_implicit_value.0) + .uleb(data.len() as u64) + .append_bytes(&data[..]) + }, + &Operation::ImplicitValue { + data: EndianSlice::new(&data[..], LittleEndian), + }, + encoding, + ); + } + + #[test] + fn test_op_parse_const_type() { + // Doesn't matter for this test. + let encoding = encoding4(); + + let data = b"hello"; + + check_op_parse( + |s| { + s.D8(constants::DW_OP_const_type.0) + .uleb(100) + .D8(data.len() as u8) + .append_bytes(&data[..]) + }, + &Operation::TypedLiteral { + base_type: UnitOffset(100), + value: EndianSlice::new(&data[..], LittleEndian), + }, + encoding, + ); + check_op_parse( + |s| { + s.D8(constants::DW_OP_GNU_const_type.0) + .uleb(100) + .D8(data.len() as u8) + .append_bytes(&data[..]) + }, + &Operation::TypedLiteral { + base_type: UnitOffset(100), + value: EndianSlice::new(&data[..], LittleEndian), + }, + encoding, + ); + } + + #[test] + fn test_op_parse_regval_type() { + // Doesn't matter for this test. + let encoding = encoding4(); + + check_op_parse( + |s| s.D8(constants::DW_OP_regval_type.0).uleb(1).uleb(100), + &Operation::RegisterOffset { + register: Register(1), + offset: 0, + base_type: UnitOffset(100), + }, + encoding, + ); + check_op_parse( + |s| s.D8(constants::DW_OP_GNU_regval_type.0).uleb(1).uleb(100), + &Operation::RegisterOffset { + register: Register(1), + offset: 0, + base_type: UnitOffset(100), + }, + encoding, + ); + } + + #[test] + fn test_op_parse_deref_type() { + // Doesn't matter for this test. + let encoding = encoding4(); + + check_op_parse( + |s| s.D8(constants::DW_OP_deref_type.0).D8(8).uleb(100), + &Operation::Deref { + base_type: UnitOffset(100), + size: 8, + space: false, + }, + encoding, + ); + check_op_parse( + |s| s.D8(constants::DW_OP_GNU_deref_type.0).D8(8).uleb(100), + &Operation::Deref { + base_type: UnitOffset(100), + size: 8, + space: false, + }, + encoding, + ); + check_op_parse( + |s| s.D8(constants::DW_OP_xderef_type.0).D8(8).uleb(100), + &Operation::Deref { + base_type: UnitOffset(100), + size: 8, + space: true, + }, + encoding, + ); + } + + #[test] + fn test_op_convert() { + // Doesn't matter for this test. + let encoding = encoding4(); + + check_op_parse( + |s| s.D8(constants::DW_OP_convert.0).uleb(100), + &Operation::Convert { + base_type: UnitOffset(100), + }, + encoding, + ); + check_op_parse( + |s| s.D8(constants::DW_OP_GNU_convert.0).uleb(100), + &Operation::Convert { + base_type: UnitOffset(100), + }, + encoding, + ); + } + + #[test] + fn test_op_reinterpret() { + // Doesn't matter for this test. + let encoding = encoding4(); + + check_op_parse( + |s| s.D8(constants::DW_OP_reinterpret.0).uleb(100), + &Operation::Reinterpret { + base_type: UnitOffset(100), + }, + encoding, + ); + check_op_parse( + |s| s.D8(constants::DW_OP_GNU_reinterpret.0).uleb(100), + &Operation::Reinterpret { + base_type: UnitOffset(100), + }, + encoding, + ); + } + + #[test] + fn test_op_parse_implicit_pointer() { + for op in &[ + constants::DW_OP_implicit_pointer, + constants::DW_OP_GNU_implicit_pointer, + ] { + check_op_parse( + |s| s.D8(op.0).D32(0x1234_5678).sleb(0x123), + &Operation::ImplicitPointer { + value: DebugInfoOffset(0x1234_5678), + byte_offset: 0x123, + }, + encoding4(), + ); + + check_op_parse( + |s| s.D8(op.0).D64(0x1234_5678).sleb(0x123), + &Operation::ImplicitPointer { + value: DebugInfoOffset(0x1234_5678), + byte_offset: 0x123, + }, + encoding8(), + ); + + check_op_parse( + |s| s.D8(op.0).D64(0x1234_5678).sleb(0x123), + &Operation::ImplicitPointer { + value: DebugInfoOffset(0x1234_5678), + byte_offset: 0x123, + }, + Encoding { + format: Format::Dwarf32, + version: 2, + address_size: 8, + }, + ) + } + } + + #[test] + fn test_op_parse_entry_value() { + for op in &[ + constants::DW_OP_entry_value, + constants::DW_OP_GNU_entry_value, + ] { + let data = b"hello"; + check_op_parse( + |s| s.D8(op.0).uleb(data.len() as u64).append_bytes(&data[..]), + &Operation::EntryValue { + expression: EndianSlice::new(&data[..], LittleEndian), + }, + encoding4(), + ); + } + } + + #[test] + fn test_op_parse_gnu_parameter_ref() { + check_op_parse( + |s| s.D8(constants::DW_OP_GNU_parameter_ref.0).D32(0x1234_5678), + &Operation::ParameterRef { + offset: UnitOffset(0x1234_5678), + }, + encoding4(), + ) + } + + #[test] + fn test_op_wasm() { + // Doesn't matter for this test. + let encoding = encoding4(); + + check_op_parse( + |s| s.D8(constants::DW_OP_WASM_location.0).D8(0).uleb(1000), + &Operation::WasmLocal { index: 1000 }, + encoding, + ); + check_op_parse( + |s| s.D8(constants::DW_OP_WASM_location.0).D8(1).uleb(1000), + &Operation::WasmGlobal { index: 1000 }, + encoding, + ); + check_op_parse( + |s| s.D8(constants::DW_OP_WASM_location.0).D8(2).uleb(1000), + &Operation::WasmStack { index: 1000 }, + encoding, + ); + check_op_parse( + |s| s.D8(constants::DW_OP_WASM_location.0).D8(3).D32(1000), + &Operation::WasmGlobal { index: 1000 }, + encoding, + ); + } + + enum AssemblerEntry { + Op(constants::DwOp), + Mark(u8), + Branch(u8), + U8(u8), + U16(u16), + U32(u32), + U64(u64), + Uleb(u64), + Sleb(u64), + } + + fn assemble(entries: &[AssemblerEntry]) -> Vec { + let mut result = Vec::new(); + + struct Marker(Option, Vec); + + let mut markers = Vec::new(); + for _ in 0..256 { + markers.push(Marker(None, Vec::new())); + } + + fn write(stack: &mut [u8], index: usize, mut num: u64, nbytes: u8) { + for i in 0..nbytes as usize { + stack[index + i] = (num & 0xff) as u8; + num >>= 8; + } + } + + fn push(stack: &mut Vec, num: u64, nbytes: u8) { + let index = stack.len(); + for _ in 0..nbytes { + stack.push(0); + } + write(stack, index, num, nbytes); + } + + for item in entries { + match *item { + AssemblerEntry::Op(op) => result.push(op.0), + AssemblerEntry::Mark(num) => { + assert!(markers[num as usize].0.is_none()); + markers[num as usize].0 = Some(result.len()); + } + AssemblerEntry::Branch(num) => { + markers[num as usize].1.push(result.len()); + push(&mut result, 0, 2); + } + AssemblerEntry::U8(num) => result.push(num), + AssemblerEntry::U16(num) => push(&mut result, u64::from(num), 2), + AssemblerEntry::U32(num) => push(&mut result, u64::from(num), 4), + AssemblerEntry::U64(num) => push(&mut result, num, 8), + AssemblerEntry::Uleb(num) => { + leb128::write::unsigned(&mut result, num).unwrap(); + } + AssemblerEntry::Sleb(num) => { + leb128::write::signed(&mut result, num as i64).unwrap(); + } + } + } + + // Update all the branches. + for marker in markers { + if let Some(offset) = marker.0 { + for branch_offset in marker.1 { + let delta = offset.wrapping_sub(branch_offset + 2) as u64; + write(&mut result, branch_offset, delta, 2); + } + } + } + + result + } + + fn check_eval_with_args( + program: &[AssemblerEntry], + expect: Result<&[Piece>]>, + encoding: Encoding, + object_address: Option, + initial_value: Option, + max_iterations: Option, + f: F, + ) where + for<'a> F: Fn( + &mut Evaluation>, + EvaluationResult>, + ) -> Result>>, + { + let bytes = assemble(program); + let bytes = EndianSlice::new(&bytes, LittleEndian); + + let mut eval = Evaluation::new(bytes, encoding); + + if let Some(val) = object_address { + eval.set_object_address(val); + } + if let Some(val) = initial_value { + eval.set_initial_value(val); + } + if let Some(val) = max_iterations { + eval.set_max_iterations(val); + } + + let result = match eval.evaluate() { + Err(e) => Err(e), + Ok(r) => f(&mut eval, r), + }; + + match (result, expect) { + (Ok(EvaluationResult::Complete), Ok(pieces)) => { + let vec = eval.result(); + assert_eq!(vec.len(), pieces.len()); + for i in 0..pieces.len() { + assert_eq!(vec[i], pieces[i]); + } + } + (Err(f1), Err(f2)) => { + assert_eq!(f1, f2); + } + otherwise => panic!("Unexpected result: {:?}", otherwise), + } + } + + fn check_eval( + program: &[AssemblerEntry], + expect: Result<&[Piece>]>, + encoding: Encoding, + ) { + check_eval_with_args(program, expect, encoding, None, None, None, |_, result| { + Ok(result) + }); + } + + #[test] + fn test_eval_arith() { + // It's nice if an operation and its arguments can fit on a single + // line in the test program. + use self::AssemblerEntry::*; + use crate::constants::*; + + // Indices of marks in the assembly. + let done = 0; + let fail = 1; + + #[rustfmt::skip] + let program = [ + Op(DW_OP_const1u), U8(23), + Op(DW_OP_const1s), U8((-23i8) as u8), + Op(DW_OP_plus), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const2u), U16(23), + Op(DW_OP_const2s), U16((-23i16) as u16), + Op(DW_OP_plus), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const4u), U32(0x1111_2222), + Op(DW_OP_const4s), U32((-0x1111_2222i32) as u32), + Op(DW_OP_plus), + Op(DW_OP_bra), Branch(fail), + + // Plus should overflow. + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_const1u), U8(1), + Op(DW_OP_plus), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_plus_uconst), Uleb(1), + Op(DW_OP_bra), Branch(fail), + + // Minus should underflow. + Op(DW_OP_const1s), U8(0), + Op(DW_OP_const1u), U8(1), + Op(DW_OP_minus), + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_abs), + Op(DW_OP_const1u), U8(1), + Op(DW_OP_minus), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const4u), U32(0xf078_fffe), + Op(DW_OP_const4u), U32(0x0f87_0001), + Op(DW_OP_and), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const4u), U32(0xf078_fffe), + Op(DW_OP_const4u), U32(0xf000_00fe), + Op(DW_OP_and), + Op(DW_OP_const4u), U32(0xf000_00fe), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + // Division is signed. + Op(DW_OP_const1s), U8(0xfe), + Op(DW_OP_const1s), U8(2), + Op(DW_OP_div), + Op(DW_OP_plus_uconst), Uleb(1), + Op(DW_OP_bra), Branch(fail), + + // Mod is unsigned. + Op(DW_OP_const1s), U8(0xfd), + Op(DW_OP_const1s), U8(2), + Op(DW_OP_mod), + Op(DW_OP_neg), + Op(DW_OP_plus_uconst), Uleb(1), + Op(DW_OP_bra), Branch(fail), + + // Overflow is defined for multiplication. + Op(DW_OP_const4u), U32(0x8000_0001), + Op(DW_OP_lit2), + Op(DW_OP_mul), + Op(DW_OP_lit2), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const4u), U32(0xf0f0_f0f0), + Op(DW_OP_const4u), U32(0xf0f0_f0f0), + Op(DW_OP_xor), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const4u), U32(0xf0f0_f0f0), + Op(DW_OP_const4u), U32(0x0f0f_0f0f), + Op(DW_OP_or), + Op(DW_OP_not), + Op(DW_OP_bra), Branch(fail), + + // In 32 bit mode, values are truncated. + Op(DW_OP_const8u), U64(0xffff_ffff_0000_0000), + Op(DW_OP_lit2), + Op(DW_OP_div), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const1u), U8(0xff), + Op(DW_OP_lit1), + Op(DW_OP_shl), + Op(DW_OP_const2u), U16(0x1fe), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const1u), U8(0xff), + Op(DW_OP_const1u), U8(50), + Op(DW_OP_shl), + Op(DW_OP_bra), Branch(fail), + + // Absurd shift. + Op(DW_OP_const1u), U8(0xff), + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_shl), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_lit1), + Op(DW_OP_shr), + Op(DW_OP_const4u), U32(0x7fff_ffff), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_const1u), U8(0xff), + Op(DW_OP_shr), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_lit1), + Op(DW_OP_shra), + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_const1u), U8(0xff), + Op(DW_OP_shra), + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + // Success. + Op(DW_OP_lit0), + Op(DW_OP_nop), + Op(DW_OP_skip), Branch(done), + + Mark(fail), + Op(DW_OP_lit1), + + Mark(done), + Op(DW_OP_stack_value), + ]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Value { + value: Value::Generic(0), + }, + }]; + + check_eval(&program, Ok(&result), encoding4()); + } + + #[test] + fn test_eval_arith64() { + // It's nice if an operation and its arguments can fit on a single + // line in the test program. + use self::AssemblerEntry::*; + use crate::constants::*; + + // Indices of marks in the assembly. + let done = 0; + let fail = 1; + + #[rustfmt::skip] + let program = [ + Op(DW_OP_const8u), U64(0x1111_2222_3333_4444), + Op(DW_OP_const8s), U64((-0x1111_2222_3333_4444i64) as u64), + Op(DW_OP_plus), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_constu), Uleb(0x1111_2222_3333_4444), + Op(DW_OP_consts), Sleb((-0x1111_2222_3333_4444i64) as u64), + Op(DW_OP_plus), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_lit1), + Op(DW_OP_plus_uconst), Uleb(!0u64), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_lit1), + Op(DW_OP_neg), + Op(DW_OP_not), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const8u), U64(0x8000_0000_0000_0000), + Op(DW_OP_const1u), U8(63), + Op(DW_OP_shr), + Op(DW_OP_lit1), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const8u), U64(0x8000_0000_0000_0000), + Op(DW_OP_const1u), U8(62), + Op(DW_OP_shra), + Op(DW_OP_plus_uconst), Uleb(2), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_lit1), + Op(DW_OP_const1u), U8(63), + Op(DW_OP_shl), + Op(DW_OP_const8u), U64(0x8000_0000_0000_0000), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + // Success. + Op(DW_OP_lit0), + Op(DW_OP_nop), + Op(DW_OP_skip), Branch(done), + + Mark(fail), + Op(DW_OP_lit1), + + Mark(done), + Op(DW_OP_stack_value), + ]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Value { + value: Value::Generic(0), + }, + }]; + + check_eval(&program, Ok(&result), encoding8()); + } + + #[test] + fn test_eval_compare() { + // It's nice if an operation and its arguments can fit on a single + // line in the test program. + use self::AssemblerEntry::*; + use crate::constants::*; + + // Indices of marks in the assembly. + let done = 0; + let fail = 1; + + #[rustfmt::skip] + let program = [ + // Comparisons are signed. + Op(DW_OP_const1s), U8(1), + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_lt), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_const1s), U8(1), + Op(DW_OP_gt), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const1s), U8(1), + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_le), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_const1s), U8(1), + Op(DW_OP_ge), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const1s), U8(0xff), + Op(DW_OP_const1s), U8(1), + Op(DW_OP_eq), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_const4s), U32(1), + Op(DW_OP_const1s), U8(1), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + // Success. + Op(DW_OP_lit0), + Op(DW_OP_nop), + Op(DW_OP_skip), Branch(done), + + Mark(fail), + Op(DW_OP_lit1), + + Mark(done), + Op(DW_OP_stack_value), + ]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Value { + value: Value::Generic(0), + }, + }]; + + check_eval(&program, Ok(&result), encoding4()); + } + + #[test] + fn test_eval_stack() { + // It's nice if an operation and its arguments can fit on a single + // line in the test program. + use self::AssemblerEntry::*; + use crate::constants::*; + + #[rustfmt::skip] + let program = [ + Op(DW_OP_lit17), // -- 17 + Op(DW_OP_dup), // -- 17 17 + Op(DW_OP_over), // -- 17 17 17 + Op(DW_OP_minus), // -- 17 0 + Op(DW_OP_swap), // -- 0 17 + Op(DW_OP_dup), // -- 0 17 17 + Op(DW_OP_plus_uconst), Uleb(1), // -- 0 17 18 + Op(DW_OP_rot), // -- 18 0 17 + Op(DW_OP_pick), U8(2), // -- 18 0 17 18 + Op(DW_OP_pick), U8(3), // -- 18 0 17 18 18 + Op(DW_OP_minus), // -- 18 0 17 0 + Op(DW_OP_drop), // -- 18 0 17 + Op(DW_OP_swap), // -- 18 17 0 + Op(DW_OP_drop), // -- 18 17 + Op(DW_OP_minus), // -- 1 + Op(DW_OP_stack_value), + ]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Value { + value: Value::Generic(1), + }, + }]; + + check_eval(&program, Ok(&result), encoding4()); + } + + #[test] + fn test_eval_lit_and_reg() { + // It's nice if an operation and its arguments can fit on a single + // line in the test program. + use self::AssemblerEntry::*; + use crate::constants::*; + + let mut program = Vec::new(); + program.push(Op(DW_OP_lit0)); + for i in 0..32 { + program.push(Op(DwOp(DW_OP_lit0.0 + i))); + program.push(Op(DwOp(DW_OP_breg0.0 + i))); + program.push(Sleb(u64::from(i))); + program.push(Op(DW_OP_plus)); + program.push(Op(DW_OP_plus)); + } + + program.push(Op(DW_OP_bregx)); + program.push(Uleb(0x1234)); + program.push(Sleb(0x1234)); + program.push(Op(DW_OP_plus)); + + program.push(Op(DW_OP_stack_value)); + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Value { + value: Value::Generic(496), + }, + }]; + + check_eval_with_args( + &program, + Ok(&result), + encoding4(), + None, + None, + None, + |eval, mut result| { + while result != EvaluationResult::Complete { + result = eval.resume_with_register(match result { + EvaluationResult::RequiresRegister { + register, + base_type, + } => { + assert_eq!(base_type, UnitOffset(0)); + Value::Generic(u64::from(register.0).wrapping_neg()) + } + _ => panic!(), + })?; + } + Ok(result) + }, + ); + } + + #[test] + fn test_eval_memory() { + // It's nice if an operation and its arguments can fit on a single + // line in the test program. + use self::AssemblerEntry::*; + use crate::constants::*; + + // Indices of marks in the assembly. + let done = 0; + let fail = 1; + + #[rustfmt::skip] + let program = [ + Op(DW_OP_addr), U32(0x7fff_ffff), + Op(DW_OP_deref), + Op(DW_OP_const4u), U32(0xffff_fffc), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_addr), U32(0x7fff_ffff), + Op(DW_OP_deref_size), U8(2), + Op(DW_OP_const4u), U32(0xfffc), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_lit1), + Op(DW_OP_addr), U32(0x7fff_ffff), + Op(DW_OP_xderef), + Op(DW_OP_const4u), U32(0xffff_fffd), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_lit1), + Op(DW_OP_addr), U32(0x7fff_ffff), + Op(DW_OP_xderef_size), U8(2), + Op(DW_OP_const4u), U32(0xfffd), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_lit17), + Op(DW_OP_form_tls_address), + Op(DW_OP_constu), Uleb(!17), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_lit17), + Op(DW_OP_GNU_push_tls_address), + Op(DW_OP_constu), Uleb(!17), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_addrx), Uleb(0x10), + Op(DW_OP_deref), + Op(DW_OP_const4u), U32(0x4040), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + Op(DW_OP_constx), Uleb(17), + Op(DW_OP_form_tls_address), + Op(DW_OP_constu), Uleb(!27), + Op(DW_OP_ne), + Op(DW_OP_bra), Branch(fail), + + // Success. + Op(DW_OP_lit0), + Op(DW_OP_nop), + Op(DW_OP_skip), Branch(done), + + Mark(fail), + Op(DW_OP_lit1), + + Mark(done), + Op(DW_OP_stack_value), + ]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Value { + value: Value::Generic(0), + }, + }]; + + check_eval_with_args( + &program, + Ok(&result), + encoding4(), + None, + None, + None, + |eval, mut result| { + while result != EvaluationResult::Complete { + result = match result { + EvaluationResult::RequiresMemory { + address, + size, + space, + base_type, + } => { + assert_eq!(base_type, UnitOffset(0)); + let mut v = address << 2; + if let Some(value) = space { + v += value; + } + v &= (1u64 << (8 * size)) - 1; + eval.resume_with_memory(Value::Generic(v))? + } + EvaluationResult::RequiresTls(slot) => eval.resume_with_tls(!slot)?, + EvaluationResult::RequiresRelocatedAddress(address) => { + eval.resume_with_relocated_address(address)? + } + EvaluationResult::RequiresIndexedAddress { index, relocate } => { + if relocate { + eval.resume_with_indexed_address(0x1000 + index.0 as u64)? + } else { + eval.resume_with_indexed_address(10 + index.0 as u64)? + } + } + _ => panic!(), + }; + } + + Ok(result) + }, + ); + } + + #[test] + fn test_eval_register() { + // It's nice if an operation and its arguments can fit on a single + // line in the test program. + use self::AssemblerEntry::*; + use crate::constants::*; + + for i in 0..32 { + #[rustfmt::skip] + let program = [ + Op(DwOp(DW_OP_reg0.0 + i)), + // Included only in the "bad" run. + Op(DW_OP_lit23), + ]; + let ok_result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Register { + register: Register(i.into()), + }, + }]; + + check_eval(&program[..1], Ok(&ok_result), encoding4()); + + check_eval( + &program, + Err(Error::InvalidExpressionTerminator(1)), + encoding4(), + ); + } + + #[rustfmt::skip] + let program = [ + Op(DW_OP_regx), Uleb(0x1234) + ]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Register { + register: Register(0x1234), + }, + }]; + + check_eval(&program, Ok(&result), encoding4()); + } + + #[test] + fn test_eval_context() { + // It's nice if an operation and its arguments can fit on a single + // line in the test program. + use self::AssemblerEntry::*; + use crate::constants::*; + + // Test `frame_base` and `call_frame_cfa` callbacks. + #[rustfmt::skip] + let program = [ + Op(DW_OP_fbreg), Sleb((-8i8) as u64), + Op(DW_OP_call_frame_cfa), + Op(DW_OP_plus), + Op(DW_OP_neg), + Op(DW_OP_stack_value) + ]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Value { + value: Value::Generic(9), + }, + }]; + + check_eval_with_args( + &program, + Ok(&result), + encoding8(), + None, + None, + None, + |eval, result| { + match result { + EvaluationResult::RequiresFrameBase => {} + _ => panic!(), + }; + match eval.resume_with_frame_base(0x0123_4567_89ab_cdef)? { + EvaluationResult::RequiresCallFrameCfa => {} + _ => panic!(), + }; + eval.resume_with_call_frame_cfa(0xfedc_ba98_7654_3210) + }, + ); + + // Test `evaluate_entry_value` callback. + #[rustfmt::skip] + let program = [ + Op(DW_OP_entry_value), Uleb(8), U64(0x1234_5678), + Op(DW_OP_stack_value) + ]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Value { + value: Value::Generic(0x1234_5678), + }, + }]; + + check_eval_with_args( + &program, + Ok(&result), + encoding8(), + None, + None, + None, + |eval, result| { + let entry_value = match result { + EvaluationResult::RequiresEntryValue(mut expression) => { + expression.0.read_u64()? + } + _ => panic!(), + }; + eval.resume_with_entry_value(Value::Generic(entry_value)) + }, + ); + + // Test missing `object_address` field. + #[rustfmt::skip] + let program = [ + Op(DW_OP_push_object_address), + ]; + + check_eval_with_args( + &program, + Err(Error::InvalidPushObjectAddress), + encoding4(), + None, + None, + None, + |_, _| panic!(), + ); + + // Test `object_address` field. + #[rustfmt::skip] + let program = [ + Op(DW_OP_push_object_address), + Op(DW_OP_stack_value), + ]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Value { + value: Value::Generic(0xff), + }, + }]; + + check_eval_with_args( + &program, + Ok(&result), + encoding8(), + Some(0xff), + None, + None, + |_, result| Ok(result), + ); + + // Test `initial_value` field. + #[rustfmt::skip] + let program = [ + ]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Address { + address: 0x1234_5678, + }, + }]; + + check_eval_with_args( + &program, + Ok(&result), + encoding8(), + None, + Some(0x1234_5678), + None, + |_, result| Ok(result), + ); + } + + #[test] + fn test_eval_empty_stack() { + // It's nice if an operation and its arguments can fit on a single + // line in the test program. + use self::AssemblerEntry::*; + use crate::constants::*; + + #[rustfmt::skip] + let program = [ + Op(DW_OP_stack_value) + ]; + + check_eval(&program, Err(Error::NotEnoughStackItems), encoding4()); + } + + #[test] + fn test_eval_call() { + // It's nice if an operation and its arguments can fit on a single + // line in the test program. + use self::AssemblerEntry::*; + use crate::constants::*; + + #[rustfmt::skip] + let program = [ + Op(DW_OP_lit23), + Op(DW_OP_call2), U16(0x7755), + Op(DW_OP_call4), U32(0x7755_aaee), + Op(DW_OP_call_ref), U32(0x7755_aaee), + Op(DW_OP_stack_value) + ]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Value { + value: Value::Generic(23), + }, + }]; + + check_eval_with_args( + &program, + Ok(&result), + encoding4(), + None, + None, + None, + |eval, result| { + let buf = EndianSlice::new(&[], LittleEndian); + match result { + EvaluationResult::RequiresAtLocation(_) => {} + _ => panic!(), + }; + + eval.resume_with_at_location(buf)?; + + match result { + EvaluationResult::RequiresAtLocation(_) => {} + _ => panic!(), + }; + + eval.resume_with_at_location(buf)?; + + match result { + EvaluationResult::RequiresAtLocation(_) => {} + _ => panic!(), + }; + + eval.resume_with_at_location(buf) + }, + ); + + // DW_OP_lit2 DW_OP_mul + const SUBR: &[u8] = &[0x32, 0x1e]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Value { + value: Value::Generic(184), + }, + }]; + + check_eval_with_args( + &program, + Ok(&result), + encoding4(), + None, + None, + None, + |eval, result| { + let buf = EndianSlice::new(SUBR, LittleEndian); + match result { + EvaluationResult::RequiresAtLocation(_) => {} + _ => panic!(), + }; + + eval.resume_with_at_location(buf)?; + + match result { + EvaluationResult::RequiresAtLocation(_) => {} + _ => panic!(), + }; + + eval.resume_with_at_location(buf)?; + + match result { + EvaluationResult::RequiresAtLocation(_) => {} + _ => panic!(), + }; + + eval.resume_with_at_location(buf) + }, + ); + } + + #[test] + fn test_eval_pieces() { + // It's nice if an operation and its arguments can fit on a single + // line in the test program. + use self::AssemblerEntry::*; + use crate::constants::*; + + // Example from DWARF 2.6.1.3. + #[rustfmt::skip] + let program = [ + Op(DW_OP_reg3), + Op(DW_OP_piece), Uleb(4), + Op(DW_OP_reg4), + Op(DW_OP_piece), Uleb(2), + ]; + + let result = [ + Piece { + size_in_bits: Some(32), + bit_offset: None, + location: Location::Register { + register: Register(3), + }, + }, + Piece { + size_in_bits: Some(16), + bit_offset: None, + location: Location::Register { + register: Register(4), + }, + }, + ]; + + check_eval(&program, Ok(&result), encoding4()); + + // Example from DWARF 2.6.1.3 (but hacked since dealing with fbreg + // in the tests is a pain). + #[rustfmt::skip] + let program = [ + Op(DW_OP_reg0), + Op(DW_OP_piece), Uleb(4), + Op(DW_OP_piece), Uleb(4), + Op(DW_OP_addr), U32(0x7fff_ffff), + Op(DW_OP_piece), Uleb(4), + ]; + + let result = [ + Piece { + size_in_bits: Some(32), + bit_offset: None, + location: Location::Register { + register: Register(0), + }, + }, + Piece { + size_in_bits: Some(32), + bit_offset: None, + location: Location::Empty, + }, + Piece { + size_in_bits: Some(32), + bit_offset: None, + location: Location::Address { + address: 0x7fff_ffff, + }, + }, + ]; + + check_eval_with_args( + &program, + Ok(&result), + encoding4(), + None, + None, + None, + |eval, mut result| { + while result != EvaluationResult::Complete { + result = match result { + EvaluationResult::RequiresRelocatedAddress(address) => { + eval.resume_with_relocated_address(address)? + } + _ => panic!(), + }; + } + + Ok(result) + }, + ); + + #[rustfmt::skip] + let program = [ + Op(DW_OP_implicit_value), Uleb(5), + U8(23), U8(24), U8(25), U8(26), U8(0), + ]; + + const BYTES: &[u8] = &[23, 24, 25, 26, 0]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Bytes { + value: EndianSlice::new(BYTES, LittleEndian), + }, + }]; + + check_eval(&program, Ok(&result), encoding4()); + + #[rustfmt::skip] + let program = [ + Op(DW_OP_lit7), + Op(DW_OP_stack_value), + Op(DW_OP_bit_piece), Uleb(5), Uleb(0), + Op(DW_OP_bit_piece), Uleb(3), Uleb(0), + ]; + + let result = [ + Piece { + size_in_bits: Some(5), + bit_offset: Some(0), + location: Location::Value { + value: Value::Generic(7), + }, + }, + Piece { + size_in_bits: Some(3), + bit_offset: Some(0), + location: Location::Empty, + }, + ]; + + check_eval(&program, Ok(&result), encoding4()); + + #[rustfmt::skip] + let program = [ + Op(DW_OP_lit7), + ]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Address { address: 7 }, + }]; + + check_eval(&program, Ok(&result), encoding4()); + + #[rustfmt::skip] + let program = [ + Op(DW_OP_implicit_pointer), U32(0x1234_5678), Sleb(0x123), + ]; + + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::ImplicitPointer { + value: DebugInfoOffset(0x1234_5678), + byte_offset: 0x123, + }, + }]; + + check_eval(&program, Ok(&result), encoding4()); + + #[rustfmt::skip] + let program = [ + Op(DW_OP_reg3), + Op(DW_OP_piece), Uleb(4), + Op(DW_OP_reg4), + ]; + + check_eval(&program, Err(Error::InvalidPiece), encoding4()); + + #[rustfmt::skip] + let program = [ + Op(DW_OP_reg3), + Op(DW_OP_piece), Uleb(4), + Op(DW_OP_lit0), + ]; + + check_eval(&program, Err(Error::InvalidPiece), encoding4()); + } + + #[test] + fn test_eval_max_iterations() { + // It's nice if an operation and its arguments can fit on a single + // line in the test program. + use self::AssemblerEntry::*; + use crate::constants::*; + + #[rustfmt::skip] + let program = [ + Mark(1), + Op(DW_OP_skip), Branch(1), + ]; + + check_eval_with_args( + &program, + Err(Error::TooManyIterations), + encoding4(), + None, + None, + Some(150), + |_, _| panic!(), + ); + } + + #[test] + fn test_eval_typed_stack() { + use self::AssemblerEntry::*; + use crate::constants::*; + + let base_types = [ + ValueType::Generic, + ValueType::U16, + ValueType::U32, + ValueType::F32, + ]; + + // TODO: convert, reinterpret + #[rustfmt::skip] + let tests = [ + ( + &[ + Op(DW_OP_const_type), Uleb(1), U8(2), U16(0x1234), + Op(DW_OP_stack_value), + ][..], + Value::U16(0x1234), + ), + ( + &[ + Op(DW_OP_regval_type), Uleb(0x1234), Uleb(1), + Op(DW_OP_stack_value), + ][..], + Value::U16(0x2340), + ), + ( + &[ + Op(DW_OP_addr), U32(0x7fff_ffff), + Op(DW_OP_deref_type), U8(2), Uleb(1), + Op(DW_OP_stack_value), + ][..], + Value::U16(0xfff0), + ), + ( + &[ + Op(DW_OP_lit1), + Op(DW_OP_addr), U32(0x7fff_ffff), + Op(DW_OP_xderef_type), U8(2), Uleb(1), + Op(DW_OP_stack_value), + ][..], + Value::U16(0xfff1), + ), + ( + &[ + Op(DW_OP_const_type), Uleb(1), U8(2), U16(0x1234), + Op(DW_OP_convert), Uleb(2), + Op(DW_OP_stack_value), + ][..], + Value::U32(0x1234), + ), + ( + &[ + Op(DW_OP_const_type), Uleb(2), U8(4), U32(0x3f80_0000), + Op(DW_OP_reinterpret), Uleb(3), + Op(DW_OP_stack_value), + ][..], + Value::F32(1.0), + ), + ]; + for &(program, value) in &tests { + let result = [Piece { + size_in_bits: None, + bit_offset: None, + location: Location::Value { value }, + }]; + + check_eval_with_args( + program, + Ok(&result), + encoding4(), + None, + None, + None, + |eval, mut result| { + while result != EvaluationResult::Complete { + result = match result { + EvaluationResult::RequiresMemory { + address, + size, + space, + base_type, + } => { + let mut v = address << 4; + if let Some(value) = space { + v += value; + } + v &= (1u64 << (8 * size)) - 1; + let v = Value::from_u64(base_types[base_type.0], v)?; + eval.resume_with_memory(v)? + } + EvaluationResult::RequiresRegister { + register, + base_type, + } => { + let v = Value::from_u64( + base_types[base_type.0], + u64::from(register.0) << 4, + )?; + eval.resume_with_register(v)? + } + EvaluationResult::RequiresBaseType(offset) => { + eval.resume_with_base_type(base_types[offset.0])? + } + EvaluationResult::RequiresRelocatedAddress(address) => { + eval.resume_with_relocated_address(address)? + } + _ => panic!("Unexpected result {:?}", result), + } + } + Ok(result) + }, + ); + } + } +} diff --git a/third_party/rust/gimli/src/read/pubnames.rs b/third_party/rust/gimli/src/read/pubnames.rs new file mode 100644 index 000000000000..e8b7e5528633 --- /dev/null +++ b/third_party/rust/gimli/src/read/pubnames.rs @@ -0,0 +1,141 @@ +use crate::common::{DebugInfoOffset, SectionId}; +use crate::endianity::Endianity; +use crate::read::lookup::{DebugLookup, LookupEntryIter, PubStuffEntry, PubStuffParser}; +use crate::read::{EndianSlice, Reader, Result, Section, UnitOffset}; + +/// A single parsed pubname. +#[derive(Debug, Clone)] +pub struct PubNamesEntry { + unit_header_offset: DebugInfoOffset, + die_offset: UnitOffset, + name: R, +} + +impl PubNamesEntry { + /// Returns the name this entry refers to. + pub fn name(&self) -> &R { + &self.name + } + + /// Returns the offset into the .debug_info section for the header of the compilation unit + /// which contains this name. + pub fn unit_header_offset(&self) -> DebugInfoOffset { + self.unit_header_offset + } + + /// Returns the offset into the compilation unit for the debugging information entry which + /// has this name. + pub fn die_offset(&self) -> UnitOffset { + self.die_offset + } +} + +impl PubStuffEntry for PubNamesEntry { + fn new( + die_offset: UnitOffset, + name: R, + unit_header_offset: DebugInfoOffset, + ) -> Self { + PubNamesEntry { + unit_header_offset, + die_offset, + name, + } + } +} + +/// The `DebugPubNames` struct represents the DWARF public names information +/// found in the `.debug_pubnames` section. +#[derive(Debug, Clone)] +pub struct DebugPubNames(DebugLookup>>); + +impl<'input, Endian> DebugPubNames> +where + Endian: Endianity, +{ + /// Construct a new `DebugPubNames` instance from the data in the `.debug_pubnames` + /// section. + /// + /// It is the caller's responsibility to read the `.debug_pubnames` section and + /// present it as a `&[u8]` slice. That means using some ELF loader on + /// Linux, a Mach-O loader on macOS, etc. + /// + /// ``` + /// use gimli::{DebugPubNames, LittleEndian}; + /// + /// # let buf = []; + /// # let read_debug_pubnames_section_somehow = || &buf; + /// let debug_pubnames = + /// DebugPubNames::new(read_debug_pubnames_section_somehow(), LittleEndian); + /// ``` + pub fn new(debug_pubnames_section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(debug_pubnames_section, endian)) + } +} + +impl DebugPubNames { + /// Iterate the pubnames in the `.debug_pubnames` section. + /// + /// ``` + /// use gimli::{DebugPubNames, EndianSlice, LittleEndian}; + /// + /// # let buf = []; + /// # let read_debug_pubnames_section_somehow = || &buf; + /// let debug_pubnames = + /// DebugPubNames::new(read_debug_pubnames_section_somehow(), LittleEndian); + /// + /// let mut iter = debug_pubnames.items(); + /// while let Some(pubname) = iter.next().unwrap() { + /// println!("pubname {} found!", pubname.name().to_string_lossy()); + /// } + /// ``` + pub fn items(&self) -> PubNamesEntryIter { + PubNamesEntryIter(self.0.items()) + } +} + +impl Section for DebugPubNames { + fn id() -> SectionId { + SectionId::DebugPubNames + } + + fn reader(&self) -> &R { + self.0.reader() + } +} + +impl From for DebugPubNames { + fn from(debug_pubnames_section: R) -> Self { + DebugPubNames(DebugLookup::from(debug_pubnames_section)) + } +} + +/// An iterator over the pubnames from a `.debug_pubnames` section. +/// +/// Can be [used with +/// `FallibleIterator`](./index.html#using-with-fallibleiterator). +#[derive(Debug, Clone)] +pub struct PubNamesEntryIter(LookupEntryIter>>); + +impl PubNamesEntryIter { + /// Advance the iterator and return the next pubname. + /// + /// Returns the newly parsed pubname as `Ok(Some(pubname))`. Returns + /// `Ok(None)` when iteration is complete and all pubnames have already been + /// parsed and yielded. If an error occurs while parsing the next pubname, + /// then this error is returned as `Err(e)`, and all subsequent calls return + /// `Ok(None)`. + pub fn next(&mut self) -> Result>> { + self.0.next() + } +} + +#[cfg(feature = "fallible-iterator")] +impl fallible_iterator::FallibleIterator for PubNamesEntryIter { + type Item = PubNamesEntry; + type Error = crate::read::Error; + + fn next(&mut self) -> ::core::result::Result, Self::Error> { + self.0.next() + } +} diff --git a/third_party/rust/gimli/src/read/pubtypes.rs b/third_party/rust/gimli/src/read/pubtypes.rs new file mode 100644 index 000000000000..6723b4222812 --- /dev/null +++ b/third_party/rust/gimli/src/read/pubtypes.rs @@ -0,0 +1,141 @@ +use crate::common::{DebugInfoOffset, SectionId}; +use crate::endianity::Endianity; +use crate::read::lookup::{DebugLookup, LookupEntryIter, PubStuffEntry, PubStuffParser}; +use crate::read::{EndianSlice, Reader, Result, Section, UnitOffset}; + +/// A single parsed pubtype. +#[derive(Debug, Clone)] +pub struct PubTypesEntry { + unit_header_offset: DebugInfoOffset, + die_offset: UnitOffset, + name: R, +} + +impl PubTypesEntry { + /// Returns the name of the type this entry refers to. + pub fn name(&self) -> &R { + &self.name + } + + /// Returns the offset into the .debug_info section for the header of the compilation unit + /// which contains the type with this name. + pub fn unit_header_offset(&self) -> DebugInfoOffset { + self.unit_header_offset + } + + /// Returns the offset into the compilation unit for the debugging information entry which + /// the type with this name. + pub fn die_offset(&self) -> UnitOffset { + self.die_offset + } +} + +impl PubStuffEntry for PubTypesEntry { + fn new( + die_offset: UnitOffset, + name: R, + unit_header_offset: DebugInfoOffset, + ) -> Self { + PubTypesEntry { + unit_header_offset, + die_offset, + name, + } + } +} + +/// The `DebugPubTypes` struct represents the DWARF public types information +/// found in the `.debug_info` section. +#[derive(Debug, Clone)] +pub struct DebugPubTypes(DebugLookup>>); + +impl<'input, Endian> DebugPubTypes> +where + Endian: Endianity, +{ + /// Construct a new `DebugPubTypes` instance from the data in the `.debug_pubtypes` + /// section. + /// + /// It is the caller's responsibility to read the `.debug_pubtypes` section and + /// present it as a `&[u8]` slice. That means using some ELF loader on + /// Linux, a Mach-O loader on macOS, etc. + /// + /// ``` + /// use gimli::{DebugPubTypes, LittleEndian}; + /// + /// # let buf = []; + /// # let read_debug_pubtypes_somehow = || &buf; + /// let debug_pubtypes = + /// DebugPubTypes::new(read_debug_pubtypes_somehow(), LittleEndian); + /// ``` + pub fn new(debug_pubtypes_section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(debug_pubtypes_section, endian)) + } +} + +impl DebugPubTypes { + /// Iterate the pubtypes in the `.debug_pubtypes` section. + /// + /// ``` + /// use gimli::{DebugPubTypes, EndianSlice, LittleEndian}; + /// + /// # let buf = []; + /// # let read_debug_pubtypes_section_somehow = || &buf; + /// let debug_pubtypes = + /// DebugPubTypes::new(read_debug_pubtypes_section_somehow(), LittleEndian); + /// + /// let mut iter = debug_pubtypes.items(); + /// while let Some(pubtype) = iter.next().unwrap() { + /// println!("pubtype {} found!", pubtype.name().to_string_lossy()); + /// } + /// ``` + pub fn items(&self) -> PubTypesEntryIter { + PubTypesEntryIter(self.0.items()) + } +} + +impl Section for DebugPubTypes { + fn id() -> SectionId { + SectionId::DebugPubTypes + } + + fn reader(&self) -> &R { + self.0.reader() + } +} + +impl From for DebugPubTypes { + fn from(debug_pubtypes_section: R) -> Self { + DebugPubTypes(DebugLookup::from(debug_pubtypes_section)) + } +} + +/// An iterator over the pubtypes from a `.debug_pubtypes` section. +/// +/// Can be [used with +/// `FallibleIterator`](./index.html#using-with-fallibleiterator). +#[derive(Debug, Clone)] +pub struct PubTypesEntryIter(LookupEntryIter>>); + +impl PubTypesEntryIter { + /// Advance the iterator and return the next pubtype. + /// + /// Returns the newly parsed pubtype as `Ok(Some(pubtype))`. Returns + /// `Ok(None)` when iteration is complete and all pubtypes have already been + /// parsed and yielded. If an error occurs while parsing the next pubtype, + /// then this error is returned as `Err(e)`, and all subsequent calls return + /// `Ok(None)`. + pub fn next(&mut self) -> Result>> { + self.0.next() + } +} + +#[cfg(feature = "fallible-iterator")] +impl fallible_iterator::FallibleIterator for PubTypesEntryIter { + type Item = PubTypesEntry; + type Error = crate::read::Error; + + fn next(&mut self) -> ::core::result::Result, Self::Error> { + self.0.next() + } +} diff --git a/third_party/rust/gimli/src/read/reader.rs b/third_party/rust/gimli/src/read/reader.rs new file mode 100644 index 000000000000..2d55934fa903 --- /dev/null +++ b/third_party/rust/gimli/src/read/reader.rs @@ -0,0 +1,502 @@ +#[cfg(feature = "read")] +use alloc::borrow::Cow; +use core::convert::TryInto; +use core::fmt::Debug; +use core::hash::Hash; +use core::ops::{Add, AddAssign, Sub}; + +use crate::common::Format; +use crate::endianity::Endianity; +use crate::leb128; +use crate::read::{Error, Result}; + +/// An identifier for an offset within a section reader. +/// +/// This is used for error reporting. The meaning of this value is specific to +/// each reader implementation. The values should be chosen to be unique amongst +/// all readers. If values are not unique then errors may point to the wrong reader. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ReaderOffsetId(pub u64); + +/// A trait for offsets with a DWARF section. +/// +/// This allows consumers to choose a size that is appropriate for their address space. +pub trait ReaderOffset: + Debug + Copy + Eq + Ord + Hash + Add + AddAssign + Sub +{ + /// Convert a u8 to an offset. + fn from_u8(offset: u8) -> Self; + + /// Convert a u16 to an offset. + fn from_u16(offset: u16) -> Self; + + /// Convert an i16 to an offset. + fn from_i16(offset: i16) -> Self; + + /// Convert a u32 to an offset. + fn from_u32(offset: u32) -> Self; + + /// Convert a u64 to an offset. + /// + /// Returns `Error::UnsupportedOffset` if the value is too large. + fn from_u64(offset: u64) -> Result; + + /// Convert an offset to a u64. + fn into_u64(self) -> u64; + + /// Wrapping (modular) addition. Computes `self + other`. + fn wrapping_add(self, other: Self) -> Self; + + /// Checked subtraction. Computes `self - other`. + fn checked_sub(self, other: Self) -> Option; +} + +impl ReaderOffset for u64 { + #[inline] + fn from_u8(offset: u8) -> Self { + u64::from(offset) + } + + #[inline] + fn from_u16(offset: u16) -> Self { + u64::from(offset) + } + + #[inline] + fn from_i16(offset: i16) -> Self { + offset as u64 + } + + #[inline] + fn from_u32(offset: u32) -> Self { + u64::from(offset) + } + + #[inline] + fn from_u64(offset: u64) -> Result { + Ok(offset) + } + + #[inline] + fn into_u64(self) -> u64 { + self + } + + #[inline] + fn wrapping_add(self, other: Self) -> Self { + self.wrapping_add(other) + } + + #[inline] + fn checked_sub(self, other: Self) -> Option { + self.checked_sub(other) + } +} + +impl ReaderOffset for u32 { + #[inline] + fn from_u8(offset: u8) -> Self { + u32::from(offset) + } + + #[inline] + fn from_u16(offset: u16) -> Self { + u32::from(offset) + } + + #[inline] + fn from_i16(offset: i16) -> Self { + offset as u32 + } + + #[inline] + fn from_u32(offset: u32) -> Self { + offset + } + + #[inline] + fn from_u64(offset64: u64) -> Result { + let offset = offset64 as u32; + if u64::from(offset) == offset64 { + Ok(offset) + } else { + Err(Error::UnsupportedOffset) + } + } + + #[inline] + fn into_u64(self) -> u64 { + u64::from(self) + } + + #[inline] + fn wrapping_add(self, other: Self) -> Self { + self.wrapping_add(other) + } + + #[inline] + fn checked_sub(self, other: Self) -> Option { + self.checked_sub(other) + } +} + +impl ReaderOffset for usize { + #[inline] + fn from_u8(offset: u8) -> Self { + offset as usize + } + + #[inline] + fn from_u16(offset: u16) -> Self { + offset as usize + } + + #[inline] + fn from_i16(offset: i16) -> Self { + offset as usize + } + + #[inline] + fn from_u32(offset: u32) -> Self { + offset as usize + } + + #[inline] + fn from_u64(offset64: u64) -> Result { + let offset = offset64 as usize; + if offset as u64 == offset64 { + Ok(offset) + } else { + Err(Error::UnsupportedOffset) + } + } + + #[inline] + fn into_u64(self) -> u64 { + self as u64 + } + + #[inline] + fn wrapping_add(self, other: Self) -> Self { + self.wrapping_add(other) + } + + #[inline] + fn checked_sub(self, other: Self) -> Option { + self.checked_sub(other) + } +} + +#[cfg(not(feature = "read"))] +pub(crate) mod seal_if_no_alloc { + #[derive(Debug)] + pub struct Sealed; +} + +/// A trait for reading the data from a DWARF section. +/// +/// All read operations advance the section offset of the reader +/// unless specified otherwise. +/// +/// ## Choosing a `Reader` Implementation +/// +/// `gimli` comes with a few different `Reader` implementations and lets you +/// choose the one that is right for your use case. A `Reader` is essentially a +/// view into the raw bytes that make up some DWARF, but this view might borrow +/// the underlying data or use reference counting ownership, and it might be +/// thread safe or not. +/// +/// | Implementation | Ownership | Thread Safe | Notes | +/// |:------------------|:------------------|:------------|:------| +/// | [`EndianSlice`](./struct.EndianSlice.html) | Borrowed | Yes | Fastest, but requires that all of your code work with borrows. | +/// | [`EndianRcSlice`](./struct.EndianRcSlice.html) | Reference counted | No | Shared ownership via reference counting, which alleviates the borrow restrictions of `EndianSlice` but imposes reference counting increments and decrements. Cannot be sent across threads, because the reference count is not atomic. | +/// | [`EndianArcSlice`](./struct.EndianArcSlice.html) | Reference counted | Yes | The same as `EndianRcSlice`, but uses atomic reference counting, and therefore reference counting operations are slower but `EndianArcSlice`s may be sent across threads. | +/// | [`EndianReader`](./struct.EndianReader.html) | Same as `T` | Same as `T` | Escape hatch for easily defining your own type of `Reader`. | +pub trait Reader: Debug + Clone { + /// The endianity of bytes that are read. + type Endian: Endianity; + + /// The type used for offsets and lengths. + type Offset: ReaderOffset; + + /// Return the endianity of bytes that are read. + fn endian(&self) -> Self::Endian; + + /// Return the number of bytes remaining. + fn len(&self) -> Self::Offset; + + /// Set the number of bytes remaining to zero. + fn empty(&mut self); + + /// Set the number of bytes remaining to the specified length. + fn truncate(&mut self, len: Self::Offset) -> Result<()>; + + /// Return the offset of this reader's data relative to the start of + /// the given base reader's data. + /// + /// May panic if this reader's data is not contained within the given + /// base reader's data. + fn offset_from(&self, base: &Self) -> Self::Offset; + + /// Return an identifier for the current reader offset. + fn offset_id(&self) -> ReaderOffsetId; + + /// Return the offset corresponding to the given `id` if + /// it is associated with this reader. + fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option; + + /// Find the index of the first occurrence of the given byte. + /// The offset of the reader is not changed. + fn find(&self, byte: u8) -> Result; + + /// Discard the specified number of bytes. + fn skip(&mut self, len: Self::Offset) -> Result<()>; + + /// Split a reader in two. + /// + /// A new reader is returned that can be used to read the next + /// `len` bytes, and `self` is advanced so that it reads the remainder. + fn split(&mut self, len: Self::Offset) -> Result; + + /// This trait cannot be implemented if "read" feature is not enabled. + /// + /// `Reader` trait has a few methods that depend on `alloc` crate. + /// Disallowing `Reader` trait implementation prevents a crate that only depends on + /// "read-core" from being broken if another crate depending on `gimli` enables + /// "read" feature. + #[cfg(not(feature = "read"))] + fn cannot_implement() -> seal_if_no_alloc::Sealed; + + /// Return all remaining data as a clone-on-write slice. + /// + /// The slice will be borrowed where possible, but some readers may + /// always return an owned vector. + /// + /// Does not advance the reader. + #[cfg(feature = "read")] + fn to_slice(&self) -> Result>; + + /// Convert all remaining data to a clone-on-write string. + /// + /// The string will be borrowed where possible, but some readers may + /// always return an owned string. + /// + /// Does not advance the reader. + /// + /// Returns an error if the data contains invalid characters. + #[cfg(feature = "read")] + fn to_string(&self) -> Result>; + + /// Convert all remaining data to a clone-on-write string, including invalid characters. + /// + /// The string will be borrowed where possible, but some readers may + /// always return an owned string. + /// + /// Does not advance the reader. + #[cfg(feature = "read")] + fn to_string_lossy(&self) -> Result>; + + /// Read exactly `buf.len()` bytes into `buf`. + fn read_slice(&mut self, buf: &mut [u8]) -> Result<()>; + + /// Read a u8 array. + #[inline] + fn read_u8_array(&mut self) -> Result + where + A: Sized + Default + AsMut<[u8]>, + { + let mut val = Default::default(); + self.read_slice(>::as_mut(&mut val))?; + Ok(val) + } + + /// Return true if the number of bytes remaining is zero. + #[inline] + fn is_empty(&self) -> bool { + self.len() == Self::Offset::from_u8(0) + } + + /// Read a u8. + #[inline] + fn read_u8(&mut self) -> Result { + let a: [u8; 1] = self.read_u8_array()?; + Ok(a[0]) + } + + /// Read an i8. + #[inline] + fn read_i8(&mut self) -> Result { + let a: [u8; 1] = self.read_u8_array()?; + Ok(a[0] as i8) + } + + /// Read a u16. + #[inline] + fn read_u16(&mut self) -> Result { + let a: [u8; 2] = self.read_u8_array()?; + Ok(self.endian().read_u16(&a)) + } + + /// Read an i16. + #[inline] + fn read_i16(&mut self) -> Result { + let a: [u8; 2] = self.read_u8_array()?; + Ok(self.endian().read_i16(&a)) + } + + /// Read a u32. + #[inline] + fn read_u32(&mut self) -> Result { + let a: [u8; 4] = self.read_u8_array()?; + Ok(self.endian().read_u32(&a)) + } + + /// Read an i32. + #[inline] + fn read_i32(&mut self) -> Result { + let a: [u8; 4] = self.read_u8_array()?; + Ok(self.endian().read_i32(&a)) + } + + /// Read a u64. + #[inline] + fn read_u64(&mut self) -> Result { + let a: [u8; 8] = self.read_u8_array()?; + Ok(self.endian().read_u64(&a)) + } + + /// Read an i64. + #[inline] + fn read_i64(&mut self) -> Result { + let a: [u8; 8] = self.read_u8_array()?; + Ok(self.endian().read_i64(&a)) + } + + /// Read a f32. + #[inline] + fn read_f32(&mut self) -> Result { + let a: [u8; 4] = self.read_u8_array()?; + Ok(self.endian().read_f32(&a)) + } + + /// Read a f64. + #[inline] + fn read_f64(&mut self) -> Result { + let a: [u8; 8] = self.read_u8_array()?; + Ok(self.endian().read_f64(&a)) + } + + /// Read an unsigned n-bytes integer u64. + /// + /// # Panics + /// + /// Panics when nbytes < 1 or nbytes > 8 + #[inline] + fn read_uint(&mut self, n: usize) -> Result { + let mut buf = [0; 8]; + self.read_slice(&mut buf[..n])?; + Ok(self.endian().read_uint(&buf[..n])) + } + + /// Read a null-terminated slice, and return it (excluding the null). + fn read_null_terminated_slice(&mut self) -> Result { + let idx = self.find(0)?; + let val = self.split(idx)?; + self.skip(Self::Offset::from_u8(1))?; + Ok(val) + } + + /// Skip a LEB128 encoded integer. + fn skip_leb128(&mut self) -> Result<()> { + leb128::read::skip(self) + } + + /// Read an unsigned LEB128 encoded integer. + fn read_uleb128(&mut self) -> Result { + leb128::read::unsigned(self) + } + + /// Read an unsigned LEB128 encoded u32. + fn read_uleb128_u32(&mut self) -> Result { + leb128::read::unsigned(self)? + .try_into() + .map_err(|_| Error::BadUnsignedLeb128) + } + + /// Read an unsigned LEB128 encoded u16. + fn read_uleb128_u16(&mut self) -> Result { + leb128::read::u16(self) + } + + /// Read a signed LEB128 encoded integer. + fn read_sleb128(&mut self) -> Result { + leb128::read::signed(self) + } + + /// Read an initial length field. + /// + /// This field is encoded as either a 32-bit length or + /// a 64-bit length, and the returned `Format` indicates which. + fn read_initial_length(&mut self) -> Result<(Self::Offset, Format)> { + const MAX_DWARF_32_UNIT_LENGTH: u32 = 0xffff_fff0; + const DWARF_64_INITIAL_UNIT_LENGTH: u32 = 0xffff_ffff; + + let val = self.read_u32()?; + if val < MAX_DWARF_32_UNIT_LENGTH { + Ok((Self::Offset::from_u32(val), Format::Dwarf32)) + } else if val == DWARF_64_INITIAL_UNIT_LENGTH { + let val = self.read_u64().and_then(Self::Offset::from_u64)?; + Ok((val, Format::Dwarf64)) + } else { + Err(Error::UnknownReservedLength) + } + } + + /// Read an address-sized integer, and return it as a `u64`. + fn read_address(&mut self, address_size: u8) -> Result { + match address_size { + 1 => self.read_u8().map(u64::from), + 2 => self.read_u16().map(u64::from), + 4 => self.read_u32().map(u64::from), + 8 => self.read_u64(), + otherwise => Err(Error::UnsupportedAddressSize(otherwise)), + } + } + + /// Parse a word-sized integer according to the DWARF format. + /// + /// These are always used to encode section offsets or lengths, + /// and so have a type of `Self::Offset`. + fn read_word(&mut self, format: Format) -> Result { + match format { + Format::Dwarf32 => self.read_u32().map(Self::Offset::from_u32), + Format::Dwarf64 => self.read_u64().and_then(Self::Offset::from_u64), + } + } + + /// Parse a word-sized section length according to the DWARF format. + #[inline] + fn read_length(&mut self, format: Format) -> Result { + self.read_word(format) + } + + /// Parse a word-sized section offset according to the DWARF format. + #[inline] + fn read_offset(&mut self, format: Format) -> Result { + self.read_word(format) + } + + /// Parse a section offset of the given size. + /// + /// This is used for `DW_FORM_ref_addr` values in DWARF version 2. + fn read_sized_offset(&mut self, size: u8) -> Result { + match size { + 1 => self.read_u8().map(u64::from), + 2 => self.read_u16().map(u64::from), + 4 => self.read_u32().map(u64::from), + 8 => self.read_u64(), + otherwise => Err(Error::UnsupportedOffsetSize(otherwise)), + } + .and_then(Self::Offset::from_u64) + } +} diff --git a/third_party/rust/gimli/src/read/relocate.rs b/third_party/rust/gimli/src/read/relocate.rs new file mode 100644 index 000000000000..d26c2bf8cbdd --- /dev/null +++ b/third_party/rust/gimli/src/read/relocate.rs @@ -0,0 +1,153 @@ +#[cfg(feature = "read")] +use alloc::borrow::Cow; +use core::fmt::Debug; + +use crate::common::Format; +use crate::read::{Reader, ReaderOffset, ReaderOffsetId, Result}; + +/// Trait for relocating addresses and offsets while reading a section. +pub trait Relocate { + /// Relocate an address which was read from the given section offset. + fn relocate_address(&self, offset: T, value: u64) -> Result; + + /// Relocate a value which was read from the given section offset. + fn relocate_offset(&self, offset: T, value: T) -> Result; +} + +/// A `Reader` which applies relocations to addresses and offsets. +/// +/// This is useful for reading sections which contain relocations, +/// such as those in a relocatable object file. +/// It is generally not used for reading sections in an executable file. +#[derive(Debug, Clone)] +pub struct RelocateReader, T: Relocate> { + section: R, + reader: R, + relocate: T, +} + +impl RelocateReader +where + R: Reader, + T: Relocate, +{ + /// Create a new `RelocateReader` which applies relocations to the given section reader. + pub fn new(section: R, relocate: T) -> Self { + let reader = section.clone(); + Self { + section, + reader, + relocate, + } + } +} + +impl Reader for RelocateReader +where + R: Reader, + T: Relocate + Debug + Clone, +{ + type Endian = R::Endian; + type Offset = R::Offset; + + fn read_address(&mut self, address_size: u8) -> Result { + let offset = self.reader.offset_from(&self.section); + let value = self.reader.read_address(address_size)?; + self.relocate.relocate_address(offset, value) + } + + fn read_offset(&mut self, format: Format) -> Result { + let offset = self.reader.offset_from(&self.section); + let value = self.reader.read_offset(format)?; + self.relocate.relocate_offset(offset, value) + } + + fn read_sized_offset(&mut self, size: u8) -> Result { + let offset = self.reader.offset_from(&self.section); + let value = self.reader.read_sized_offset(size)?; + self.relocate.relocate_offset(offset, value) + } + + #[inline] + fn split(&mut self, len: Self::Offset) -> Result { + let mut other = self.clone(); + other.reader.truncate(len)?; + self.reader.skip(len)?; + Ok(other) + } + + // All remaining methods simply delegate to `self.reader`. + + #[inline] + fn endian(&self) -> Self::Endian { + self.reader.endian() + } + + #[inline] + fn len(&self) -> Self::Offset { + self.reader.len() + } + + #[inline] + fn empty(&mut self) { + self.reader.empty() + } + + #[inline] + fn truncate(&mut self, len: Self::Offset) -> Result<()> { + self.reader.truncate(len) + } + + #[inline] + fn offset_from(&self, base: &Self) -> Self::Offset { + self.reader.offset_from(&base.reader) + } + + #[inline] + fn offset_id(&self) -> ReaderOffsetId { + self.reader.offset_id() + } + + #[inline] + fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option { + self.reader.lookup_offset_id(id) + } + + #[inline] + fn find(&self, byte: u8) -> Result { + self.reader.find(byte) + } + + #[inline] + fn skip(&mut self, len: Self::Offset) -> Result<()> { + self.reader.skip(len) + } + + #[cfg(not(feature = "read"))] + fn cannot_implement() -> super::reader::seal_if_no_alloc::Sealed { + super::reader::seal_if_no_alloc::Sealed + } + + #[cfg(feature = "read")] + #[inline] + fn to_slice(&self) -> Result> { + self.reader.to_slice() + } + + #[cfg(feature = "read")] + #[inline] + fn to_string(&self) -> Result> { + self.reader.to_string() + } + + #[cfg(feature = "read")] + #[inline] + fn to_string_lossy(&self) -> Result> { + self.reader.to_string_lossy() + } + + #[inline] + fn read_slice(&mut self, buf: &mut [u8]) -> Result<()> { + self.reader.read_slice(buf) + } +} diff --git a/third_party/rust/gimli/src/read/rnglists.rs b/third_party/rust/gimli/src/read/rnglists.rs new file mode 100644 index 000000000000..f3f713d72c98 --- /dev/null +++ b/third_party/rust/gimli/src/read/rnglists.rs @@ -0,0 +1,1471 @@ +use crate::common::{ + DebugAddrBase, DebugAddrIndex, DebugRngListsBase, DebugRngListsIndex, DwarfFileType, Encoding, + RangeListsOffset, SectionId, +}; +use crate::constants; +use crate::endianity::Endianity; +use crate::read::{ + lists::ListsHeader, DebugAddr, EndianSlice, Error, Reader, ReaderOffset, ReaderOffsetId, + Result, Section, +}; + +/// The raw contents of the `.debug_ranges` section. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugRanges { + pub(crate) section: R, +} + +impl<'input, Endian> DebugRanges> +where + Endian: Endianity, +{ + /// Construct a new `DebugRanges` instance from the data in the `.debug_ranges` + /// section. + /// + /// It is the caller's responsibility to read the `.debug_ranges` section and + /// present it as a `&[u8]` slice. That means using some ELF loader on + /// Linux, a Mach-O loader on macOS, etc. + /// + /// ``` + /// use gimli::{DebugRanges, LittleEndian}; + /// + /// # let buf = [0x00, 0x01, 0x02, 0x03]; + /// # let read_debug_ranges_section_somehow = || &buf; + /// let debug_ranges = DebugRanges::new(read_debug_ranges_section_somehow(), LittleEndian); + /// ``` + pub fn new(section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(section, endian)) + } +} + +impl DebugRanges { + /// Create a `DebugRanges` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfSections::borrow`. + pub(crate) fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugRanges + where + F: FnMut(&'a T) -> R, + { + borrow(&self.section).into() + } +} + +impl Section for DebugRanges { + fn id() -> SectionId { + SectionId::DebugRanges + } + + fn reader(&self) -> &R { + &self.section + } +} + +impl From for DebugRanges { + fn from(section: R) -> Self { + DebugRanges { section } + } +} + +/// The `DebugRngLists` struct represents the contents of the +/// `.debug_rnglists` section. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugRngLists { + section: R, +} + +impl<'input, Endian> DebugRngLists> +where + Endian: Endianity, +{ + /// Construct a new `DebugRngLists` instance from the data in the + /// `.debug_rnglists` section. + /// + /// It is the caller's responsibility to read the `.debug_rnglists` + /// section and present it as a `&[u8]` slice. That means using some ELF + /// loader on Linux, a Mach-O loader on macOS, etc. + /// + /// ``` + /// use gimli::{DebugRngLists, LittleEndian}; + /// + /// # let buf = [0x00, 0x01, 0x02, 0x03]; + /// # let read_debug_rnglists_section_somehow = || &buf; + /// let debug_rnglists = + /// DebugRngLists::new(read_debug_rnglists_section_somehow(), LittleEndian); + /// ``` + pub fn new(section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(section, endian)) + } +} + +impl DebugRngLists { + /// Create a `DebugRngLists` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfSections::borrow`. + pub(crate) fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugRngLists + where + F: FnMut(&'a T) -> R, + { + borrow(&self.section).into() + } +} + +impl Section for DebugRngLists { + fn id() -> SectionId { + SectionId::DebugRngLists + } + + fn reader(&self) -> &R { + &self.section + } +} + +impl From for DebugRngLists { + fn from(section: R) -> Self { + DebugRngLists { section } + } +} + +#[allow(unused)] +pub(crate) type RngListsHeader = ListsHeader; + +impl DebugRngListsBase +where + Offset: ReaderOffset, +{ + /// Returns a `DebugRngListsBase` with the default value of DW_AT_rnglists_base + /// for the given `Encoding` and `DwarfFileType`. + pub fn default_for_encoding_and_file( + encoding: Encoding, + file_type: DwarfFileType, + ) -> DebugRngListsBase { + if encoding.version >= 5 && file_type == DwarfFileType::Dwo { + // In .dwo files, the compiler omits the DW_AT_rnglists_base attribute (because there is + // only a single unit in the file) but we must skip past the header, which the attribute + // would normally do for us. + DebugRngListsBase(Offset::from_u8(RngListsHeader::size_for_encoding(encoding))) + } else { + DebugRngListsBase(Offset::from_u8(0)) + } + } +} + +/// The DWARF data found in `.debug_ranges` and `.debug_rnglists` sections. +#[derive(Debug, Default, Clone, Copy)] +pub struct RangeLists { + debug_ranges: DebugRanges, + debug_rnglists: DebugRngLists, +} + +impl RangeLists { + /// Construct a new `RangeLists` instance from the data in the `.debug_ranges` and + /// `.debug_rnglists` sections. + pub fn new(debug_ranges: DebugRanges, debug_rnglists: DebugRngLists) -> RangeLists { + RangeLists { + debug_ranges, + debug_rnglists, + } + } + + /// Return the `.debug_ranges` section. + pub fn debug_ranges(&self) -> &DebugRanges { + &self.debug_ranges + } + + /// Replace the `.debug_ranges` section. + /// + /// This is useful for `.dwo` files when using the GNU split-dwarf extension to DWARF 4. + pub fn set_debug_ranges(&mut self, debug_ranges: DebugRanges) { + self.debug_ranges = debug_ranges; + } + + /// Return the `.debug_rnglists` section. + pub fn debug_rnglists(&self) -> &DebugRngLists { + &self.debug_rnglists + } +} + +impl RangeLists { + /// Create a `RangeLists` that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `Dwarf::borrow`. + pub fn borrow<'a, F, R>(&'a self, mut borrow: F) -> RangeLists + where + F: FnMut(&'a T) -> R, + { + RangeLists { + debug_ranges: borrow(&self.debug_ranges.section).into(), + debug_rnglists: borrow(&self.debug_rnglists.section).into(), + } + } +} + +impl RangeLists { + /// Iterate over the `Range` list entries starting at the given offset. + /// + /// The `unit_version` and `address_size` must match the compilation unit that the + /// offset was contained in. + /// + /// The `base_address` should be obtained from the `DW_AT_low_pc` attribute in the + /// `DW_TAG_compile_unit` entry for the compilation unit that contains this range list. + /// + /// Can be [used with + /// `FallibleIterator`](./index.html#using-with-fallibleiterator). + pub fn ranges( + &self, + offset: RangeListsOffset, + unit_encoding: Encoding, + base_address: u64, + debug_addr: &DebugAddr, + debug_addr_base: DebugAddrBase, + ) -> Result> { + Ok(RngListIter::new( + self.raw_ranges(offset, unit_encoding)?, + base_address, + debug_addr.clone(), + debug_addr_base, + )) + } + + /// Iterate over the `RawRngListEntry`ies starting at the given offset. + /// + /// The `unit_encoding` must match the compilation unit that the + /// offset was contained in. + /// + /// This iterator does not perform any processing of the range entries, + /// such as handling base addresses. + /// + /// Can be [used with + /// `FallibleIterator`](./index.html#using-with-fallibleiterator). + pub fn raw_ranges( + &self, + offset: RangeListsOffset, + unit_encoding: Encoding, + ) -> Result> { + let (mut input, format) = if unit_encoding.version <= 4 { + (self.debug_ranges.section.clone(), RangeListsFormat::Bare) + } else { + (self.debug_rnglists.section.clone(), RangeListsFormat::Rle) + }; + input.skip(offset.0)?; + Ok(RawRngListIter::new(input, unit_encoding, format)) + } + + /// Returns the `.debug_rnglists` offset at the given `base` and `index`. + /// + /// The `base` must be the `DW_AT_rnglists_base` value from the compilation unit DIE. + /// This is an offset that points to the first entry following the header. + /// + /// The `index` is the value of a `DW_FORM_rnglistx` attribute. + /// + /// The `unit_encoding` must match the compilation unit that the + /// index was contained in. + pub fn get_offset( + &self, + unit_encoding: Encoding, + base: DebugRngListsBase, + index: DebugRngListsIndex, + ) -> Result> { + let format = unit_encoding.format; + let input = &mut self.debug_rnglists.section.clone(); + input.skip(base.0)?; + input.skip(R::Offset::from_u64( + index.0.into_u64() * u64::from(format.word_size()), + )?)?; + input + .read_offset(format) + .map(|x| RangeListsOffset(base.0 + x)) + } + + /// Call `Reader::lookup_offset_id` for each section, and return the first match. + pub fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option<(SectionId, R::Offset)> { + self.debug_ranges + .lookup_offset_id(id) + .or_else(|| self.debug_rnglists.lookup_offset_id(id)) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum RangeListsFormat { + /// The bare range list format used before DWARF 5. + Bare, + /// The DW_RLE encoded range list format used in DWARF 5. + Rle, +} + +/// A raw iterator over an address range list. +/// +/// This iterator does not perform any processing of the range entries, +/// such as handling base addresses. +#[derive(Debug)] +pub struct RawRngListIter { + input: R, + encoding: Encoding, + format: RangeListsFormat, +} + +/// A raw entry in .debug_rnglists +#[derive(Clone, Debug)] +pub enum RawRngListEntry { + /// A range from DWARF version <= 4. + AddressOrOffsetPair { + /// Start of range. May be an address or an offset. + begin: u64, + /// End of range. May be an address or an offset. + end: u64, + }, + /// DW_RLE_base_address + BaseAddress { + /// base address + addr: u64, + }, + /// DW_RLE_base_addressx + BaseAddressx { + /// base address + addr: DebugAddrIndex, + }, + /// DW_RLE_startx_endx + StartxEndx { + /// start of range + begin: DebugAddrIndex, + /// end of range + end: DebugAddrIndex, + }, + /// DW_RLE_startx_length + StartxLength { + /// start of range + begin: DebugAddrIndex, + /// length of range + length: u64, + }, + /// DW_RLE_offset_pair + OffsetPair { + /// start of range + begin: u64, + /// end of range + end: u64, + }, + /// DW_RLE_start_end + StartEnd { + /// start of range + begin: u64, + /// end of range + end: u64, + }, + /// DW_RLE_start_length + StartLength { + /// start of range + begin: u64, + /// length of range + length: u64, + }, +} + +impl RawRngListEntry { + /// Parse a range entry from `.debug_rnglists` + fn parse>( + input: &mut R, + encoding: Encoding, + format: RangeListsFormat, + ) -> Result> { + Ok(match format { + RangeListsFormat::Bare => { + let range = RawRange::parse(input, encoding.address_size)?; + if range.is_end() { + None + } else if range.is_base_address(encoding.address_size) { + Some(RawRngListEntry::BaseAddress { addr: range.end }) + } else { + Some(RawRngListEntry::AddressOrOffsetPair { + begin: range.begin, + end: range.end, + }) + } + } + RangeListsFormat::Rle => match constants::DwRle(input.read_u8()?) { + constants::DW_RLE_end_of_list => None, + constants::DW_RLE_base_addressx => Some(RawRngListEntry::BaseAddressx { + addr: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + }), + constants::DW_RLE_startx_endx => Some(RawRngListEntry::StartxEndx { + begin: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + end: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + }), + constants::DW_RLE_startx_length => Some(RawRngListEntry::StartxLength { + begin: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + length: input.read_uleb128()?, + }), + constants::DW_RLE_offset_pair => Some(RawRngListEntry::OffsetPair { + begin: input.read_uleb128()?, + end: input.read_uleb128()?, + }), + constants::DW_RLE_base_address => Some(RawRngListEntry::BaseAddress { + addr: input.read_address(encoding.address_size)?, + }), + constants::DW_RLE_start_end => Some(RawRngListEntry::StartEnd { + begin: input.read_address(encoding.address_size)?, + end: input.read_address(encoding.address_size)?, + }), + constants::DW_RLE_start_length => Some(RawRngListEntry::StartLength { + begin: input.read_address(encoding.address_size)?, + length: input.read_uleb128()?, + }), + entry => { + return Err(Error::UnknownRangeListsEntry(entry)); + } + }, + }) + } +} + +impl RawRngListIter { + /// Construct a `RawRngListIter`. + fn new(input: R, encoding: Encoding, format: RangeListsFormat) -> RawRngListIter { + RawRngListIter { + input, + encoding, + format, + } + } + + /// Advance the iterator to the next range. + pub fn next(&mut self) -> Result>> { + if self.input.is_empty() { + return Ok(None); + } + + match RawRngListEntry::parse(&mut self.input, self.encoding, self.format) { + Ok(range) => { + if range.is_none() { + self.input.empty(); + } + Ok(range) + } + Err(e) => { + self.input.empty(); + Err(e) + } + } + } +} + +#[cfg(feature = "fallible-iterator")] +impl fallible_iterator::FallibleIterator for RawRngListIter { + type Item = RawRngListEntry; + type Error = Error; + + fn next(&mut self) -> ::core::result::Result, Self::Error> { + RawRngListIter::next(self) + } +} + +/// An iterator over an address range list. +/// +/// This iterator internally handles processing of base addresses and different +/// entry types. Thus, it only returns range entries that are valid +/// and already adjusted for the base address. +#[derive(Debug)] +pub struct RngListIter { + raw: RawRngListIter, + base_address: u64, + debug_addr: DebugAddr, + debug_addr_base: DebugAddrBase, +} + +impl RngListIter { + /// Construct a `RngListIter`. + fn new( + raw: RawRngListIter, + base_address: u64, + debug_addr: DebugAddr, + debug_addr_base: DebugAddrBase, + ) -> RngListIter { + RngListIter { + raw, + base_address, + debug_addr, + debug_addr_base, + } + } + + #[inline] + fn get_address(&self, index: DebugAddrIndex) -> Result { + self.debug_addr + .get_address(self.raw.encoding.address_size, self.debug_addr_base, index) + } + + /// Advance the iterator to the next range. + pub fn next(&mut self) -> Result> { + loop { + let raw_range = match self.raw.next()? { + Some(range) => range, + None => return Ok(None), + }; + + let range = self.convert_raw(raw_range)?; + if range.is_some() { + return Ok(range); + } + } + } + + /// Return the next raw range. + /// + /// The raw range should be passed to `convert_range`. + #[doc(hidden)] + pub fn next_raw(&mut self) -> Result>> { + self.raw.next() + } + + /// Convert a raw range into a range, and update the state of the iterator. + /// + /// The raw range should have been obtained from `next_raw`. + #[doc(hidden)] + pub fn convert_raw(&mut self, raw_range: RawRngListEntry) -> Result> { + let mask = !0 >> (64 - self.raw.encoding.address_size * 8); + let tombstone = if self.raw.encoding.version <= 4 { + mask - 1 + } else { + mask + }; + + let range = match raw_range { + RawRngListEntry::BaseAddress { addr } => { + self.base_address = addr; + return Ok(None); + } + RawRngListEntry::BaseAddressx { addr } => { + self.base_address = self.get_address(addr)?; + return Ok(None); + } + RawRngListEntry::StartxEndx { begin, end } => { + let begin = self.get_address(begin)?; + let end = self.get_address(end)?; + Range { begin, end } + } + RawRngListEntry::StartxLength { begin, length } => { + let begin = self.get_address(begin)?; + let end = begin.wrapping_add(length) & mask; + Range { begin, end } + } + RawRngListEntry::AddressOrOffsetPair { begin, end } + | RawRngListEntry::OffsetPair { begin, end } => { + if self.base_address == tombstone { + return Ok(None); + } + let mut range = Range { begin, end }; + range.add_base_address(self.base_address, self.raw.encoding.address_size); + range + } + RawRngListEntry::StartEnd { begin, end } => Range { begin, end }, + RawRngListEntry::StartLength { begin, length } => { + let end = begin.wrapping_add(length) & mask; + Range { begin, end } + } + }; + + if range.begin == tombstone || range.begin > range.end { + return Ok(None); + } + + Ok(Some(range)) + } +} + +#[cfg(feature = "fallible-iterator")] +impl fallible_iterator::FallibleIterator for RngListIter { + type Item = Range; + type Error = Error; + + fn next(&mut self) -> ::core::result::Result, Self::Error> { + RngListIter::next(self) + } +} + +/// A raw address range from the `.debug_ranges` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct RawRange { + /// The beginning address of the range. + pub begin: u64, + + /// The first address past the end of the range. + pub end: u64, +} + +impl RawRange { + /// Check if this is a range end entry. + #[inline] + pub fn is_end(&self) -> bool { + self.begin == 0 && self.end == 0 + } + + /// Check if this is a base address selection entry. + /// + /// A base address selection entry changes the base address that subsequent + /// range entries are relative to. + #[inline] + pub fn is_base_address(&self, address_size: u8) -> bool { + self.begin == !0 >> (64 - address_size * 8) + } + + /// Parse an address range entry from `.debug_ranges` or `.debug_loc`. + #[inline] + pub fn parse(input: &mut R, address_size: u8) -> Result { + let begin = input.read_address(address_size)?; + let end = input.read_address(address_size)?; + let range = RawRange { begin, end }; + Ok(range) + } +} + +/// An address range from the `.debug_ranges`, `.debug_rnglists`, or `.debug_aranges` sections. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Range { + /// The beginning address of the range. + pub begin: u64, + + /// The first address past the end of the range. + pub end: u64, +} + +impl Range { + /// Add a base address to this range. + #[inline] + pub(crate) fn add_base_address(&mut self, base_address: u64, address_size: u8) { + let mask = !0 >> (64 - address_size * 8); + self.begin = base_address.wrapping_add(self.begin) & mask; + self.end = base_address.wrapping_add(self.end) & mask; + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::common::Format; + use crate::endianity::LittleEndian; + use crate::test_util::GimliSectionMethods; + use test_assembler::{Endian, Label, LabelMaker, Section}; + + #[test] + fn test_rnglists_32() { + let tombstone = !0u32; + let encoding = Encoding { + format: Format::Dwarf32, + version: 5, + address_size: 4, + }; + let section = Section::with_endian(Endian::Little) + .L32(0x0300_0000) + .L32(0x0301_0300) + .L32(0x0301_0400) + .L32(0x0301_0500) + .L32(tombstone) + .L32(0x0301_0600); + let buf = section.get_contents().unwrap(); + let debug_addr = &DebugAddr::from(EndianSlice::new(&buf, LittleEndian)); + let debug_addr_base = DebugAddrBase(0); + + let start = Label::new(); + let first = Label::new(); + let size = Label::new(); + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + // Header + .mark(&start) + .L32(&size) + .L16(encoding.version) + .L8(encoding.address_size) + .L8(0) + .L32(0) + .mark(&first) + // An OffsetPair using the unit base address. + .L8(4).uleb(0x10200).uleb(0x10300) + // A base address selection followed by an OffsetPair. + .L8(5).L32(0x0200_0000) + .L8(4).uleb(0x10400).uleb(0x10500) + // An empty OffsetPair followed by a normal OffsetPair. + .L8(4).uleb(0x10600).uleb(0x10600) + .L8(4).uleb(0x10800).uleb(0x10900) + // A StartEnd + .L8(6).L32(0x201_0a00).L32(0x201_0b00) + // A StartLength + .L8(7).L32(0x201_0c00).uleb(0x100) + // An OffsetPair that starts at 0. + .L8(4).uleb(0).uleb(1) + // An OffsetPair that starts and ends at 0. + .L8(4).uleb(0).uleb(0) + // An OffsetPair that ends at -1. + .L8(5).L32(0) + .L8(4).uleb(0).uleb(0xffff_ffff) + // A BaseAddressx + OffsetPair + .L8(1).uleb(0) + .L8(4).uleb(0x10100).uleb(0x10200) + // A StartxEndx + .L8(2).uleb(1).uleb(2) + // A StartxLength + .L8(3).uleb(3).uleb(0x100) + + // Tombstone entries, all of which should be ignored. + // A BaseAddressx that is a tombstone. + .L8(1).uleb(4) + .L8(4).uleb(0x11100).uleb(0x11200) + // A BaseAddress that is a tombstone. + .L8(5).L32(tombstone) + .L8(4).uleb(0x11300).uleb(0x11400) + // A StartxEndx that is a tombstone. + .L8(2).uleb(4).uleb(5) + // A StartxLength that is a tombstone. + .L8(3).uleb(4).uleb(0x100) + // A StartEnd that is a tombstone. + .L8(6).L32(tombstone).L32(0x201_1500) + // A StartLength that is a tombstone. + .L8(7).L32(tombstone).uleb(0x100) + // A StartEnd (not ignored) + .L8(6).L32(0x201_1600).L32(0x201_1700) + + // A range end. + .L8(0) + // Some extra data. + .L32(0xffff_ffff); + size.set_const((§ion.here() - &start - 4) as u64); + + let buf = section.get_contents().unwrap(); + let debug_ranges = DebugRanges::new(&[], LittleEndian); + let debug_rnglists = DebugRngLists::new(&buf, LittleEndian); + let rnglists = RangeLists::new(debug_ranges, debug_rnglists); + let offset = RangeListsOffset((&first - &start) as usize); + let mut ranges = rnglists + .ranges(offset, encoding, 0x0100_0000, debug_addr, debug_addr_base) + .unwrap(); + + // A normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0101_0200, + end: 0x0101_0300, + })) + ); + + // A base address selection followed by a normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0400, + end: 0x0201_0500, + })) + ); + + // An empty range followed by a normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0600, + end: 0x0201_0600, + })) + ); + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0800, + end: 0x0201_0900, + })) + ); + + // A normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0a00, + end: 0x0201_0b00, + })) + ); + + // A normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0c00, + end: 0x0201_0d00, + })) + ); + + // A range that starts at 0. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0200_0000, + end: 0x0200_0001, + })) + ); + + // A range that starts and ends at 0. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0200_0000, + end: 0x0200_0000, + })) + ); + + // A range that ends at -1. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0000_0000, + end: 0xffff_ffff, + })) + ); + + // A BaseAddressx + OffsetPair + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0301_0100, + end: 0x0301_0200, + })) + ); + + // A StartxEndx + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0301_0300, + end: 0x0301_0400, + })) + ); + + // A StartxLength + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0301_0500, + end: 0x0301_0600, + })) + ); + + // A StartEnd range following the tombstones + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_1600, + end: 0x0201_1700, + })) + ); + + // A range end. + assert_eq!(ranges.next(), Ok(None)); + + // An offset at the end of buf. + let mut ranges = rnglists + .ranges( + RangeListsOffset(buf.len()), + encoding, + 0x0100_0000, + debug_addr, + debug_addr_base, + ) + .unwrap(); + assert_eq!(ranges.next(), Ok(None)); + } + + #[test] + fn test_rnglists_64() { + let tombstone = !0u64; + let encoding = Encoding { + format: Format::Dwarf64, + version: 5, + address_size: 8, + }; + let section = Section::with_endian(Endian::Little) + .L64(0x0300_0000) + .L64(0x0301_0300) + .L64(0x0301_0400) + .L64(0x0301_0500) + .L64(tombstone) + .L64(0x0301_0600); + let buf = section.get_contents().unwrap(); + let debug_addr = &DebugAddr::from(EndianSlice::new(&buf, LittleEndian)); + let debug_addr_base = DebugAddrBase(0); + + let start = Label::new(); + let first = Label::new(); + let size = Label::new(); + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + // Header + .mark(&start) + .L32(0xffff_ffff) + .L64(&size) + .L16(encoding.version) + .L8(encoding.address_size) + .L8(0) + .L32(0) + .mark(&first) + // An OffsetPair using the unit base address. + .L8(4).uleb(0x10200).uleb(0x10300) + // A base address selection followed by an OffsetPair. + .L8(5).L64(0x0200_0000) + .L8(4).uleb(0x10400).uleb(0x10500) + // An empty OffsetPair followed by a normal OffsetPair. + .L8(4).uleb(0x10600).uleb(0x10600) + .L8(4).uleb(0x10800).uleb(0x10900) + // A StartEnd + .L8(6).L64(0x201_0a00).L64(0x201_0b00) + // A StartLength + .L8(7).L64(0x201_0c00).uleb(0x100) + // An OffsetPair that starts at 0. + .L8(4).uleb(0).uleb(1) + // An OffsetPair that starts and ends at 0. + .L8(4).uleb(0).uleb(0) + // An OffsetPair that ends at -1. + .L8(5).L64(0) + .L8(4).uleb(0).uleb(0xffff_ffff) + // A BaseAddressx + OffsetPair + .L8(1).uleb(0) + .L8(4).uleb(0x10100).uleb(0x10200) + // A StartxEndx + .L8(2).uleb(1).uleb(2) + // A StartxLength + .L8(3).uleb(3).uleb(0x100) + + // Tombstone entries, all of which should be ignored. + // A BaseAddressx that is a tombstone. + .L8(1).uleb(4) + .L8(4).uleb(0x11100).uleb(0x11200) + // A BaseAddress that is a tombstone. + .L8(5).L64(tombstone) + .L8(4).uleb(0x11300).uleb(0x11400) + // A StartxEndx that is a tombstone. + .L8(2).uleb(4).uleb(5) + // A StartxLength that is a tombstone. + .L8(3).uleb(4).uleb(0x100) + // A StartEnd that is a tombstone. + .L8(6).L64(tombstone).L64(0x201_1500) + // A StartLength that is a tombstone. + .L8(7).L64(tombstone).uleb(0x100) + // A StartEnd (not ignored) + .L8(6).L64(0x201_1600).L64(0x201_1700) + + // A range end. + .L8(0) + // Some extra data. + .L32(0xffff_ffff); + size.set_const((§ion.here() - &start - 12) as u64); + + let buf = section.get_contents().unwrap(); + let debug_ranges = DebugRanges::new(&[], LittleEndian); + let debug_rnglists = DebugRngLists::new(&buf, LittleEndian); + let rnglists = RangeLists::new(debug_ranges, debug_rnglists); + let offset = RangeListsOffset((&first - &start) as usize); + let mut ranges = rnglists + .ranges(offset, encoding, 0x0100_0000, debug_addr, debug_addr_base) + .unwrap(); + + // A normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0101_0200, + end: 0x0101_0300, + })) + ); + + // A base address selection followed by a normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0400, + end: 0x0201_0500, + })) + ); + + // An empty range followed by a normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0600, + end: 0x0201_0600, + })) + ); + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0800, + end: 0x0201_0900, + })) + ); + + // A normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0a00, + end: 0x0201_0b00, + })) + ); + + // A normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0c00, + end: 0x0201_0d00, + })) + ); + + // A range that starts at 0. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0200_0000, + end: 0x0200_0001, + })) + ); + + // A range that starts and ends at 0. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0200_0000, + end: 0x0200_0000, + })) + ); + + // A range that ends at -1. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0000_0000, + end: 0xffff_ffff, + })) + ); + + // A BaseAddressx + OffsetPair + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0301_0100, + end: 0x0301_0200, + })) + ); + + // A StartxEndx + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0301_0300, + end: 0x0301_0400, + })) + ); + + // A StartxLength + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0301_0500, + end: 0x0301_0600, + })) + ); + + // A StartEnd range following the tombstones + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_1600, + end: 0x0201_1700, + })) + ); + + // A range end. + assert_eq!(ranges.next(), Ok(None)); + + // An offset at the end of buf. + let mut ranges = rnglists + .ranges( + RangeListsOffset(buf.len()), + encoding, + 0x0100_0000, + debug_addr, + debug_addr_base, + ) + .unwrap(); + assert_eq!(ranges.next(), Ok(None)); + } + + #[test] + fn test_raw_range() { + let range = RawRange { + begin: 0, + end: 0xffff_ffff, + }; + assert!(!range.is_end()); + assert!(!range.is_base_address(4)); + assert!(!range.is_base_address(8)); + + let range = RawRange { begin: 0, end: 0 }; + assert!(range.is_end()); + assert!(!range.is_base_address(4)); + assert!(!range.is_base_address(8)); + + let range = RawRange { + begin: 0xffff_ffff, + end: 0, + }; + assert!(!range.is_end()); + assert!(range.is_base_address(4)); + assert!(!range.is_base_address(8)); + + let range = RawRange { + begin: 0xffff_ffff_ffff_ffff, + end: 0, + }; + assert!(!range.is_end()); + assert!(!range.is_base_address(4)); + assert!(range.is_base_address(8)); + } + + #[test] + fn test_ranges_32() { + let tombstone = !0u32 - 1; + let start = Label::new(); + let first = Label::new(); + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + // A range before the offset. + .mark(&start) + .L32(0x10000).L32(0x10100) + .mark(&first) + // A normal range. + .L32(0x10200).L32(0x10300) + // A base address selection followed by a normal range. + .L32(0xffff_ffff).L32(0x0200_0000) + .L32(0x10400).L32(0x10500) + // An empty range followed by a normal range. + .L32(0x10600).L32(0x10600) + .L32(0x10800).L32(0x10900) + // A range that starts at 0. + .L32(0).L32(1) + // A range that ends at -1. + .L32(0xffff_ffff).L32(0x0000_0000) + .L32(0).L32(0xffff_ffff) + // A normal range with tombstone. + .L32(tombstone).L32(tombstone) + // A base address selection with tombstone followed by a normal range. + .L32(0xffff_ffff).L32(tombstone) + .L32(0x10a00).L32(0x10b00) + // A range end. + .L32(0).L32(0) + // Some extra data. + .L32(0); + + let buf = section.get_contents().unwrap(); + let debug_ranges = DebugRanges::new(&buf, LittleEndian); + let debug_rnglists = DebugRngLists::new(&[], LittleEndian); + let rnglists = RangeLists::new(debug_ranges, debug_rnglists); + let offset = RangeListsOffset((&first - &start) as usize); + let debug_addr = &DebugAddr::from(EndianSlice::new(&[], LittleEndian)); + let debug_addr_base = DebugAddrBase(0); + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let mut ranges = rnglists + .ranges(offset, encoding, 0x0100_0000, debug_addr, debug_addr_base) + .unwrap(); + + // A normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0101_0200, + end: 0x0101_0300, + })) + ); + + // A base address selection followed by a normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0400, + end: 0x0201_0500, + })) + ); + + // An empty range followed by a normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0600, + end: 0x0201_0600, + })) + ); + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0800, + end: 0x0201_0900, + })) + ); + + // A range that starts at 0. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0200_0000, + end: 0x0200_0001, + })) + ); + + // A range that ends at -1. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0000_0000, + end: 0xffff_ffff, + })) + ); + + // A range end. + assert_eq!(ranges.next(), Ok(None)); + + // An offset at the end of buf. + let mut ranges = rnglists + .ranges( + RangeListsOffset(buf.len()), + encoding, + 0x0100_0000, + debug_addr, + debug_addr_base, + ) + .unwrap(); + assert_eq!(ranges.next(), Ok(None)); + } + + #[test] + fn test_ranges_64() { + let tombstone = !0u64 - 1; + let start = Label::new(); + let first = Label::new(); + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + // A range before the offset. + .mark(&start) + .L64(0x10000).L64(0x10100) + .mark(&first) + // A normal range. + .L64(0x10200).L64(0x10300) + // A base address selection followed by a normal range. + .L64(0xffff_ffff_ffff_ffff).L64(0x0200_0000) + .L64(0x10400).L64(0x10500) + // An empty range followed by a normal range. + .L64(0x10600).L64(0x10600) + .L64(0x10800).L64(0x10900) + // A range that starts at 0. + .L64(0).L64(1) + // A range that ends at -1. + .L64(0xffff_ffff_ffff_ffff).L64(0x0000_0000) + .L64(0).L64(0xffff_ffff_ffff_ffff) + // A normal range with tombstone. + .L64(tombstone).L64(tombstone) + // A base address selection with tombstone followed by a normal range. + .L64(0xffff_ffff_ffff_ffff).L64(tombstone) + .L64(0x10a00).L64(0x10b00) + // A range end. + .L64(0).L64(0) + // Some extra data. + .L64(0); + + let buf = section.get_contents().unwrap(); + let debug_ranges = DebugRanges::new(&buf, LittleEndian); + let debug_rnglists = DebugRngLists::new(&[], LittleEndian); + let rnglists = RangeLists::new(debug_ranges, debug_rnglists); + let offset = RangeListsOffset((&first - &start) as usize); + let debug_addr = &DebugAddr::from(EndianSlice::new(&[], LittleEndian)); + let debug_addr_base = DebugAddrBase(0); + let encoding = Encoding { + format: Format::Dwarf64, + version: 4, + address_size: 8, + }; + let mut ranges = rnglists + .ranges(offset, encoding, 0x0100_0000, debug_addr, debug_addr_base) + .unwrap(); + + // A normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0101_0200, + end: 0x0101_0300, + })) + ); + + // A base address selection followed by a normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0400, + end: 0x0201_0500, + })) + ); + + // An empty range followed by a normal range. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0600, + end: 0x0201_0600, + })) + ); + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0201_0800, + end: 0x0201_0900, + })) + ); + + // A range that starts at 0. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0200_0000, + end: 0x0200_0001, + })) + ); + + // A range that ends at -1. + assert_eq!( + ranges.next(), + Ok(Some(Range { + begin: 0x0, + end: 0xffff_ffff_ffff_ffff, + })) + ); + + // A range end. + assert_eq!(ranges.next(), Ok(None)); + + // An offset at the end of buf. + let mut ranges = rnglists + .ranges( + RangeListsOffset(buf.len()), + encoding, + 0x0100_0000, + debug_addr, + debug_addr_base, + ) + .unwrap(); + assert_eq!(ranges.next(), Ok(None)); + } + + #[test] + fn test_ranges_invalid() { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + // An invalid range. + .L32(0x20000).L32(0x10000) + // An invalid range after wrapping. + .L32(0x20000).L32(0xff01_0000); + + let buf = section.get_contents().unwrap(); + let debug_ranges = DebugRanges::new(&buf, LittleEndian); + let debug_rnglists = DebugRngLists::new(&[], LittleEndian); + let rnglists = RangeLists::new(debug_ranges, debug_rnglists); + let debug_addr = &DebugAddr::from(EndianSlice::new(&[], LittleEndian)); + let debug_addr_base = DebugAddrBase(0); + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + + // An invalid range. + let mut ranges = rnglists + .ranges( + RangeListsOffset(0x0), + encoding, + 0x0100_0000, + debug_addr, + debug_addr_base, + ) + .unwrap(); + assert_eq!(ranges.next(), Ok(None)); + + // An invalid range after wrapping. + let mut ranges = rnglists + .ranges( + RangeListsOffset(0x8), + encoding, + 0x0100_0000, + debug_addr, + debug_addr_base, + ) + .unwrap(); + assert_eq!(ranges.next(), Ok(None)); + + // An invalid offset. + match rnglists.ranges( + RangeListsOffset(buf.len() + 1), + encoding, + 0x0100_0000, + debug_addr, + debug_addr_base, + ) { + Err(Error::UnexpectedEof(_)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + } + } + + #[test] + fn test_get_offset() { + for format in [Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version: 5, + address_size: 4, + }; + + let zero = Label::new(); + let length = Label::new(); + let start = Label::new(); + let first = Label::new(); + let end = Label::new(); + let mut section = Section::with_endian(Endian::Little) + .mark(&zero) + .initial_length(format, &length, &start) + .D16(encoding.version) + .D8(encoding.address_size) + .D8(0) + .D32(20) + .mark(&first); + for i in 0..20 { + section = section.word(format.word_size(), 1000 + i); + } + section = section.mark(&end); + length.set_const((&end - &start) as u64); + let section = section.get_contents().unwrap(); + + let debug_ranges = DebugRanges::from(EndianSlice::new(&[], LittleEndian)); + let debug_rnglists = DebugRngLists::from(EndianSlice::new(§ion, LittleEndian)); + let ranges = RangeLists::new(debug_ranges, debug_rnglists); + + let base = DebugRngListsBase((&first - &zero) as usize); + assert_eq!( + ranges.get_offset(encoding, base, DebugRngListsIndex(0)), + Ok(RangeListsOffset(base.0 + 1000)) + ); + assert_eq!( + ranges.get_offset(encoding, base, DebugRngListsIndex(19)), + Ok(RangeListsOffset(base.0 + 1019)) + ); + } + } +} diff --git a/third_party/rust/gimli/src/read/str.rs b/third_party/rust/gimli/src/read/str.rs new file mode 100644 index 000000000000..df7ab1ccd223 --- /dev/null +++ b/third_party/rust/gimli/src/read/str.rs @@ -0,0 +1,291 @@ +use crate::common::{ + DebugLineStrOffset, DebugStrOffset, DebugStrOffsetsBase, DebugStrOffsetsIndex, DwarfFileType, + Encoding, SectionId, +}; +use crate::endianity::Endianity; +use crate::read::{EndianSlice, Reader, ReaderOffset, Result, Section}; +use crate::Format; + +/// The `DebugStr` struct represents the DWARF strings +/// found in the `.debug_str` section. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugStr { + debug_str_section: R, +} + +impl<'input, Endian> DebugStr> +where + Endian: Endianity, +{ + /// Construct a new `DebugStr` instance from the data in the `.debug_str` + /// section. + /// + /// It is the caller's responsibility to read the `.debug_str` section and + /// present it as a `&[u8]` slice. That means using some ELF loader on + /// Linux, a Mach-O loader on macOS, etc. + /// + /// ``` + /// use gimli::{DebugStr, LittleEndian}; + /// + /// # let buf = [0x00, 0x01, 0x02, 0x03]; + /// # let read_debug_str_section_somehow = || &buf; + /// let debug_str = DebugStr::new(read_debug_str_section_somehow(), LittleEndian); + /// ``` + pub fn new(debug_str_section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(debug_str_section, endian)) + } +} + +impl DebugStr { + /// Lookup a string from the `.debug_str` section by DebugStrOffset. + /// + /// ``` + /// use gimli::{DebugStr, DebugStrOffset, LittleEndian}; + /// + /// # let buf = [0x01, 0x02, 0x00]; + /// # let offset = DebugStrOffset(0); + /// # let read_debug_str_section_somehow = || &buf; + /// # let debug_str_offset_somehow = || offset; + /// let debug_str = DebugStr::new(read_debug_str_section_somehow(), LittleEndian); + /// println!("Found string {:?}", debug_str.get_str(debug_str_offset_somehow())); + /// ``` + pub fn get_str(&self, offset: DebugStrOffset) -> Result { + let input = &mut self.debug_str_section.clone(); + input.skip(offset.0)?; + input.read_null_terminated_slice() + } +} + +impl DebugStr { + /// Create a `DebugStr` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfSections::borrow`. + pub fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugStr + where + F: FnMut(&'a T) -> R, + { + borrow(&self.debug_str_section).into() + } +} + +impl Section for DebugStr { + fn id() -> SectionId { + SectionId::DebugStr + } + + fn reader(&self) -> &R { + &self.debug_str_section + } +} + +impl From for DebugStr { + fn from(debug_str_section: R) -> Self { + DebugStr { debug_str_section } + } +} + +/// The raw contents of the `.debug_str_offsets` section. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugStrOffsets { + section: R, +} + +impl DebugStrOffsets { + // TODO: add an iterator over the sets of entries in the section. + // This is not needed for common usage of the section though. + + /// Returns the `.debug_str` offset at the given `base` and `index`. + /// + /// A set of entries in the `.debug_str_offsets` section consists of a header + /// followed by a series of string table offsets. + /// + /// The `base` must be the `DW_AT_str_offsets_base` value from the compilation unit DIE. + /// This is an offset that points to the first entry following the header. + /// + /// The `index` is the value of a `DW_FORM_strx` attribute. + /// + /// The `format` must be the DWARF format of the compilation unit. This format must + /// match the header. However, note that we do not parse the header to validate this, + /// since locating the header is unreliable, and the GNU extensions do not emit it. + pub fn get_str_offset( + &self, + format: Format, + base: DebugStrOffsetsBase, + index: DebugStrOffsetsIndex, + ) -> Result> { + let input = &mut self.section.clone(); + input.skip(base.0)?; + input.skip(R::Offset::from_u64( + index.0.into_u64() * u64::from(format.word_size()), + )?)?; + input.read_offset(format).map(DebugStrOffset) + } +} + +impl DebugStrOffsets { + /// Create a `DebugStrOffsets` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfSections::borrow`. + pub fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugStrOffsets + where + F: FnMut(&'a T) -> R, + { + borrow(&self.section).into() + } +} + +impl Section for DebugStrOffsets { + fn id() -> SectionId { + SectionId::DebugStrOffsets + } + + fn reader(&self) -> &R { + &self.section + } +} + +impl From for DebugStrOffsets { + fn from(section: R) -> Self { + DebugStrOffsets { section } + } +} + +impl DebugStrOffsetsBase +where + Offset: ReaderOffset, +{ + /// Returns a `DebugStrOffsetsBase` with the default value of DW_AT_str_offsets_base + /// for the given `Encoding` and `DwarfFileType`. + pub fn default_for_encoding_and_file( + encoding: Encoding, + file_type: DwarfFileType, + ) -> DebugStrOffsetsBase { + if encoding.version >= 5 && file_type == DwarfFileType::Dwo { + // In .dwo files, the compiler omits the DW_AT_str_offsets_base attribute (because there is + // only a single unit in the file) but we must skip past the header, which the attribute + // would normally do for us. + // initial_length_size + version + 2 bytes of padding. + DebugStrOffsetsBase(Offset::from_u8( + encoding.format.initial_length_size() + 2 + 2, + )) + } else { + DebugStrOffsetsBase(Offset::from_u8(0)) + } + } +} + +/// The `DebugLineStr` struct represents the DWARF strings +/// found in the `.debug_line_str` section. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugLineStr { + section: R, +} + +impl<'input, Endian> DebugLineStr> +where + Endian: Endianity, +{ + /// Construct a new `DebugLineStr` instance from the data in the `.debug_line_str` + /// section. + /// + /// It is the caller's responsibility to read the `.debug_line_str` section and + /// present it as a `&[u8]` slice. That means using some ELF loader on + /// Linux, a Mach-O loader on macOS, etc. + /// + /// ``` + /// use gimli::{DebugLineStr, LittleEndian}; + /// + /// # let buf = [0x00, 0x01, 0x02, 0x03]; + /// # let read_debug_line_str_section_somehow = || &buf; + /// let debug_str = DebugLineStr::new(read_debug_line_str_section_somehow(), LittleEndian); + /// ``` + pub fn new(debug_line_str_section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(debug_line_str_section, endian)) + } +} + +impl DebugLineStr { + /// Lookup a string from the `.debug_line_str` section by DebugLineStrOffset. + pub fn get_str(&self, offset: DebugLineStrOffset) -> Result { + let input = &mut self.section.clone(); + input.skip(offset.0)?; + input.read_null_terminated_slice() + } +} + +impl DebugLineStr { + /// Create a `DebugLineStr` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfSections::borrow`. + pub fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugLineStr + where + F: FnMut(&'a T) -> R, + { + borrow(&self.section).into() + } +} + +impl Section for DebugLineStr { + fn id() -> SectionId { + SectionId::DebugLineStr + } + + fn reader(&self) -> &R { + &self.section + } +} + +impl From for DebugLineStr { + fn from(section: R) -> Self { + DebugLineStr { section } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_util::GimliSectionMethods; + use crate::LittleEndian; + use test_assembler::{Endian, Label, LabelMaker, Section}; + + #[test] + fn test_get_str_offset() { + for format in [Format::Dwarf32, Format::Dwarf64] { + let zero = Label::new(); + let length = Label::new(); + let start = Label::new(); + let first = Label::new(); + let end = Label::new(); + let mut section = Section::with_endian(Endian::Little) + .mark(&zero) + .initial_length(format, &length, &start) + .D16(5) + .D16(0) + .mark(&first); + for i in 0..20 { + section = section.word(format.word_size(), 1000 + i); + } + section = section.mark(&end); + length.set_const((&end - &start) as u64); + + let section = section.get_contents().unwrap(); + let debug_str_offsets = DebugStrOffsets::from(EndianSlice::new(§ion, LittleEndian)); + let base = DebugStrOffsetsBase((&first - &zero) as usize); + + assert_eq!( + debug_str_offsets.get_str_offset(format, base, DebugStrOffsetsIndex(0)), + Ok(DebugStrOffset(1000)) + ); + assert_eq!( + debug_str_offsets.get_str_offset(format, base, DebugStrOffsetsIndex(19)), + Ok(DebugStrOffset(1019)) + ); + } + } +} diff --git a/third_party/rust/gimli/src/read/unit.rs b/third_party/rust/gimli/src/read/unit.rs new file mode 100644 index 000000000000..bd62aa90069c --- /dev/null +++ b/third_party/rust/gimli/src/read/unit.rs @@ -0,0 +1,6133 @@ +//! Functions for parsing DWARF `.debug_info` and `.debug_types` sections. + +use core::cell::Cell; +use core::ops::{Range, RangeFrom, RangeTo}; +use core::{u16, u8}; + +use crate::common::{ + DebugAbbrevOffset, DebugAddrBase, DebugAddrIndex, DebugInfoOffset, DebugLineOffset, + DebugLineStrOffset, DebugLocListsBase, DebugLocListsIndex, DebugMacinfoOffset, + DebugMacroOffset, DebugRngListsBase, DebugRngListsIndex, DebugStrOffset, DebugStrOffsetsBase, + DebugStrOffsetsIndex, DebugTypeSignature, DebugTypesOffset, DwoId, Encoding, Format, + LocationListsOffset, RawRangeListsOffset, SectionId, UnitSectionOffset, +}; +use crate::constants; +use crate::endianity::Endianity; +use crate::read::abbrev::get_attribute_size; +use crate::read::{ + Abbreviation, Abbreviations, AttributeSpecification, DebugAbbrev, DebugStr, EndianSlice, Error, + Expression, Reader, ReaderOffset, Result, Section, UnitOffset, +}; + +impl DebugTypesOffset { + /// Convert an offset to be relative to the start of the given unit, + /// instead of relative to the start of the .debug_types section. + /// Returns `None` if the offset is not within the unit entries. + pub fn to_unit_offset(&self, unit: &UnitHeader) -> Option> + where + R: Reader, + { + let unit_offset = unit.offset().as_debug_types_offset()?; + let offset = UnitOffset(self.0.checked_sub(unit_offset.0)?); + if !unit.is_valid_offset(offset) { + return None; + } + Some(offset) + } +} + +impl DebugInfoOffset { + /// Convert an offset to be relative to the start of the given unit, + /// instead of relative to the start of the .debug_info section. + /// Returns `None` if the offset is not within this unit entries. + pub fn to_unit_offset(&self, unit: &UnitHeader) -> Option> + where + R: Reader, + { + let unit_offset = unit.offset().as_debug_info_offset()?; + let offset = UnitOffset(self.0.checked_sub(unit_offset.0)?); + if !unit.is_valid_offset(offset) { + return None; + } + Some(offset) + } +} + +impl UnitOffset { + /// Convert an offset to be relative to the start of the .debug_info section, + /// instead of relative to the start of the given unit. Returns None if the + /// provided unit lives in the .debug_types section. + pub fn to_debug_info_offset(&self, unit: &UnitHeader) -> Option> + where + R: Reader, + { + let unit_offset = unit.offset().as_debug_info_offset()?; + Some(DebugInfoOffset(unit_offset.0 + self.0)) + } + + /// Convert an offset to be relative to the start of the .debug_types section, + /// instead of relative to the start of the given unit. Returns None if the + /// provided unit lives in the .debug_info section. + pub fn to_debug_types_offset(&self, unit: &UnitHeader) -> Option> + where + R: Reader, + { + let unit_offset = unit.offset().as_debug_types_offset()?; + Some(DebugTypesOffset(unit_offset.0 + self.0)) + } +} + +/// The `DebugInfo` struct represents the DWARF debugging information found in +/// the `.debug_info` section. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugInfo { + debug_info_section: R, +} + +impl<'input, Endian> DebugInfo> +where + Endian: Endianity, +{ + /// Construct a new `DebugInfo` instance from the data in the `.debug_info` + /// section. + /// + /// It is the caller's responsibility to read the `.debug_info` section and + /// present it as a `&[u8]` slice. That means using some ELF loader on + /// Linux, a Mach-O loader on macOS, etc. + /// + /// ``` + /// use gimli::{DebugInfo, LittleEndian}; + /// + /// # let buf = [0x00, 0x01, 0x02, 0x03]; + /// # let read_debug_info_section_somehow = || &buf; + /// let debug_info = DebugInfo::new(read_debug_info_section_somehow(), LittleEndian); + /// ``` + pub fn new(debug_info_section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(debug_info_section, endian)) + } +} + +impl DebugInfo { + /// Iterate the units in this `.debug_info` section. + /// + /// ``` + /// use gimli::{DebugInfo, LittleEndian}; + /// + /// # let buf = []; + /// # let read_debug_info_section_somehow = || &buf; + /// let debug_info = DebugInfo::new(read_debug_info_section_somehow(), LittleEndian); + /// + /// let mut iter = debug_info.units(); + /// while let Some(unit) = iter.next().unwrap() { + /// println!("unit's length is {}", unit.unit_length()); + /// } + /// ``` + /// + /// Can be [used with + /// `FallibleIterator`](./index.html#using-with-fallibleiterator). + pub fn units(&self) -> DebugInfoUnitHeadersIter { + DebugInfoUnitHeadersIter { + input: self.debug_info_section.clone(), + offset: DebugInfoOffset(R::Offset::from_u8(0)), + } + } + + /// Get the UnitHeader located at offset from this .debug_info section. + /// + /// + pub fn header_from_offset(&self, offset: DebugInfoOffset) -> Result> { + let input = &mut self.debug_info_section.clone(); + input.skip(offset.0)?; + parse_unit_header(input, offset.into()) + } +} + +impl DebugInfo { + /// Create a `DebugInfo` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfSections::borrow`. + pub fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugInfo + where + F: FnMut(&'a T) -> R, + { + borrow(&self.debug_info_section).into() + } +} + +impl Section for DebugInfo { + fn id() -> SectionId { + SectionId::DebugInfo + } + + fn reader(&self) -> &R { + &self.debug_info_section + } +} + +impl From for DebugInfo { + fn from(debug_info_section: R) -> Self { + DebugInfo { debug_info_section } + } +} + +/// An iterator over the units of a .debug_info section. +/// +/// See the [documentation on +/// `DebugInfo::units`](./struct.DebugInfo.html#method.units) for more detail. +#[derive(Clone, Debug)] +pub struct DebugInfoUnitHeadersIter { + input: R, + offset: DebugInfoOffset, +} + +impl DebugInfoUnitHeadersIter { + /// Advance the iterator to the next unit header. + pub fn next(&mut self) -> Result>> { + if self.input.is_empty() { + Ok(None) + } else { + let len = self.input.len(); + match parse_unit_header(&mut self.input, self.offset.into()) { + Ok(header) => { + self.offset.0 += len - self.input.len(); + Ok(Some(header)) + } + Err(e) => { + self.input.empty(); + Err(e) + } + } + } + } +} + +#[cfg(feature = "fallible-iterator")] +impl fallible_iterator::FallibleIterator for DebugInfoUnitHeadersIter { + type Item = UnitHeader; + type Error = Error; + + fn next(&mut self) -> ::core::result::Result, Self::Error> { + DebugInfoUnitHeadersIter::next(self) + } +} + +/// Parse the unit type from the unit header. +fn parse_unit_type(input: &mut R) -> Result { + let val = input.read_u8()?; + Ok(constants::DwUt(val)) +} + +/// Parse the `debug_abbrev_offset` in the compilation unit header. +fn parse_debug_abbrev_offset( + input: &mut R, + format: Format, +) -> Result> { + input.read_offset(format).map(DebugAbbrevOffset) +} + +/// Parse the `debug_info_offset` in the arange header. +pub(crate) fn parse_debug_info_offset( + input: &mut R, + format: Format, +) -> Result> { + input.read_offset(format).map(DebugInfoOffset) +} + +/// This enum specifies the type of the unit and any type +/// specific data carried in the header (e.g. the type +/// signature/type offset of a type unit). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UnitType +where + Offset: ReaderOffset, +{ + /// In DWARF5, a unit with type `DW_UT_compile`. In previous DWARF versions, + /// any unit appearing in the .debug_info section. + Compilation, + /// In DWARF5, a unit with type `DW_UT_type`. In DWARF4, any unit appearing + /// in the .debug_types section. + Type { + /// The unique type signature for this type unit. + type_signature: DebugTypeSignature, + /// The offset within this type unit where the type is defined. + type_offset: UnitOffset, + }, + /// A unit with type `DW_UT_partial`. The root DIE of this unit should be a + /// `DW_TAG_partial_unit`. + Partial, + /// A unit with type `DW_UT_skeleton`. The enclosed dwo_id can be used to + /// link this with the corresponding `SplitCompilation` unit in a dwo file. + /// NB: The non-standard GNU split DWARF extension to DWARF 4 will instead + /// be a `Compilation` unit with the dwo_id present as an attribute on the + /// root DIE. + Skeleton(DwoId), + /// A unit with type `DW_UT_split_compile`. The enclosed dwo_id can be used to + /// link this with the corresponding `Skeleton` unit in the original binary. + /// NB: The non-standard GNU split DWARF extension to DWARF 4 will instead + /// be a `Compilation` unit with the dwo_id present as an attribute on the + /// root DIE. + SplitCompilation(DwoId), + /// A unit with type `DW_UT_split_type`. A split type unit is identical to a + /// conventional type unit except for the section in which it appears. + SplitType { + /// The unique type signature for this type unit. + type_signature: DebugTypeSignature, + /// The offset within this type unit where the type is defined. + type_offset: UnitOffset, + }, +} + +impl UnitType +where + Offset: ReaderOffset, +{ + // TODO: This will be used by the DWARF writing code once it + // supports unit types other than simple compilation units. + #[allow(unused)] + pub(crate) fn dw_ut(&self) -> constants::DwUt { + match self { + UnitType::Compilation => constants::DW_UT_compile, + UnitType::Type { .. } => constants::DW_UT_type, + UnitType::Partial => constants::DW_UT_partial, + UnitType::Skeleton(_) => constants::DW_UT_skeleton, + UnitType::SplitCompilation(_) => constants::DW_UT_split_compile, + UnitType::SplitType { .. } => constants::DW_UT_split_type, + } + } +} + +/// The common fields for the headers of compilation units and +/// type units. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct UnitHeader::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + encoding: Encoding, + unit_length: Offset, + unit_type: UnitType, + debug_abbrev_offset: DebugAbbrevOffset, + unit_offset: UnitSectionOffset, + entries_buf: R, +} + +/// Static methods. +impl UnitHeader +where + R: Reader, + Offset: ReaderOffset, +{ + /// Construct a new `UnitHeader`. + pub fn new( + encoding: Encoding, + unit_length: Offset, + unit_type: UnitType, + debug_abbrev_offset: DebugAbbrevOffset, + unit_offset: UnitSectionOffset, + entries_buf: R, + ) -> Self { + UnitHeader { + encoding, + unit_length, + unit_type, + debug_abbrev_offset, + unit_offset, + entries_buf, + } + } +} + +/// Instance methods. +impl UnitHeader +where + R: Reader, + Offset: ReaderOffset, +{ + /// Get the offset of this unit within its section. + pub fn offset(&self) -> UnitSectionOffset { + self.unit_offset + } + + /// Return the serialized size of the common unit header for the given + /// DWARF format. + pub fn size_of_header(&self) -> usize { + let unit_length_size = self.encoding.format.initial_length_size() as usize; + let version_size = 2; + let debug_abbrev_offset_size = self.encoding.format.word_size() as usize; + let address_size_size = 1; + let unit_type_size = if self.encoding.version == 5 { 1 } else { 0 }; + let type_specific_size = match self.unit_type { + UnitType::Compilation | UnitType::Partial => 0, + UnitType::Type { .. } | UnitType::SplitType { .. } => { + let type_signature_size = 8; + let type_offset_size = self.encoding.format.word_size() as usize; + type_signature_size + type_offset_size + } + UnitType::Skeleton(_) | UnitType::SplitCompilation(_) => 8, + }; + + unit_length_size + + version_size + + debug_abbrev_offset_size + + address_size_size + + unit_type_size + + type_specific_size + } + + /// Get the length of the debugging info for this compilation unit, not + /// including the byte length of the encoded length itself. + pub fn unit_length(&self) -> Offset { + self.unit_length + } + + /// Get the length of the debugging info for this compilation unit, + /// including the byte length of the encoded length itself. + pub fn length_including_self(&self) -> Offset { + Offset::from_u8(self.format().initial_length_size()) + self.unit_length + } + + /// Return the encoding parameters for this unit. + pub fn encoding(&self) -> Encoding { + self.encoding + } + + /// Get the DWARF version of the debugging info for this compilation unit. + pub fn version(&self) -> u16 { + self.encoding.version + } + + /// Get the UnitType of this unit. + pub fn type_(&self) -> UnitType { + self.unit_type + } + + /// The offset into the `.debug_abbrev` section for this compilation unit's + /// debugging information entries' abbreviations. + pub fn debug_abbrev_offset(&self) -> DebugAbbrevOffset { + self.debug_abbrev_offset + } + + /// The size of addresses (in bytes) in this compilation unit. + pub fn address_size(&self) -> u8 { + self.encoding.address_size + } + + /// Whether this compilation unit is encoded in 64- or 32-bit DWARF. + pub fn format(&self) -> Format { + self.encoding.format + } + + /// The serialized size of the header for this compilation unit. + pub fn header_size(&self) -> Offset { + self.length_including_self() - self.entries_buf.len() + } + + pub(crate) fn is_valid_offset(&self, offset: UnitOffset) -> bool { + let size_of_header = self.header_size(); + if offset.0 < size_of_header { + return false; + } + + let relative_to_entries_buf = offset.0 - size_of_header; + relative_to_entries_buf < self.entries_buf.len() + } + + /// Get the underlying bytes for the supplied range. + pub fn range(&self, idx: Range>) -> Result { + if !self.is_valid_offset(idx.start) { + return Err(Error::OffsetOutOfBounds); + } + if !self.is_valid_offset(idx.end) { + return Err(Error::OffsetOutOfBounds); + } + assert!(idx.start <= idx.end); + let size_of_header = self.header_size(); + let start = idx.start.0 - size_of_header; + let end = idx.end.0 - size_of_header; + let mut input = self.entries_buf.clone(); + input.skip(start)?; + input.truncate(end - start)?; + Ok(input) + } + + /// Get the underlying bytes for the supplied range. + pub fn range_from(&self, idx: RangeFrom>) -> Result { + if !self.is_valid_offset(idx.start) { + return Err(Error::OffsetOutOfBounds); + } + let start = idx.start.0 - self.header_size(); + let mut input = self.entries_buf.clone(); + input.skip(start)?; + Ok(input) + } + + /// Get the underlying bytes for the supplied range. + pub fn range_to(&self, idx: RangeTo>) -> Result { + if !self.is_valid_offset(idx.end) { + return Err(Error::OffsetOutOfBounds); + } + let end = idx.end.0 - self.header_size(); + let mut input = self.entries_buf.clone(); + input.truncate(end)?; + Ok(input) + } + + /// Read the `DebuggingInformationEntry` at the given offset. + pub fn entry<'me, 'abbrev>( + &'me self, + abbreviations: &'abbrev Abbreviations, + offset: UnitOffset, + ) -> Result> { + let mut input = self.range_from(offset..)?; + let entry = DebuggingInformationEntry::parse(&mut input, self, abbreviations)?; + entry.ok_or(Error::NoEntryAtGivenOffset) + } + + /// Navigate this unit's `DebuggingInformationEntry`s. + pub fn entries<'me, 'abbrev>( + &'me self, + abbreviations: &'abbrev Abbreviations, + ) -> EntriesCursor<'abbrev, 'me, R> { + EntriesCursor { + unit: self, + input: self.entries_buf.clone(), + abbreviations, + cached_current: None, + delta_depth: 0, + } + } + + /// Navigate this compilation unit's `DebuggingInformationEntry`s + /// starting at the given offset. + pub fn entries_at_offset<'me, 'abbrev>( + &'me self, + abbreviations: &'abbrev Abbreviations, + offset: UnitOffset, + ) -> Result> { + let input = self.range_from(offset..)?; + Ok(EntriesCursor { + unit: self, + input, + abbreviations, + cached_current: None, + delta_depth: 0, + }) + } + + /// Navigate this unit's `DebuggingInformationEntry`s as a tree + /// starting at the given offset. + pub fn entries_tree<'me, 'abbrev>( + &'me self, + abbreviations: &'abbrev Abbreviations, + offset: Option>, + ) -> Result> { + let input = match offset { + Some(offset) => self.range_from(offset..)?, + None => self.entries_buf.clone(), + }; + Ok(EntriesTree::new(input, self, abbreviations)) + } + + /// Read the raw data that defines the Debugging Information Entries. + pub fn entries_raw<'me, 'abbrev>( + &'me self, + abbreviations: &'abbrev Abbreviations, + offset: Option>, + ) -> Result> { + let input = match offset { + Some(offset) => self.range_from(offset..)?, + None => self.entries_buf.clone(), + }; + Ok(EntriesRaw { + input, + unit: self, + abbreviations, + depth: 0, + }) + } + + /// Parse this unit's abbreviations. + pub fn abbreviations(&self, debug_abbrev: &DebugAbbrev) -> Result { + debug_abbrev.abbreviations(self.debug_abbrev_offset()) + } +} + +/// Parse a unit header. +fn parse_unit_header( + input: &mut R, + unit_offset: UnitSectionOffset, +) -> Result> +where + R: Reader, + Offset: ReaderOffset, +{ + let (unit_length, format) = input.read_initial_length()?; + let mut rest = input.split(unit_length)?; + + let version = rest.read_u16()?; + let abbrev_offset; + let address_size; + let unit_type; + // DWARF 1 was very different, and is obsolete, so isn't supported by this + // reader. + if 2 <= version && version <= 4 { + abbrev_offset = parse_debug_abbrev_offset(&mut rest, format)?; + address_size = rest.read_u8()?; + // Before DWARF5, all units in the .debug_info section are compilation + // units, and all units in the .debug_types section are type units. + unit_type = match unit_offset { + UnitSectionOffset::DebugInfoOffset(_) => constants::DW_UT_compile, + UnitSectionOffset::DebugTypesOffset(_) => constants::DW_UT_type, + }; + } else if version == 5 { + unit_type = parse_unit_type(&mut rest)?; + address_size = rest.read_u8()?; + abbrev_offset = parse_debug_abbrev_offset(&mut rest, format)?; + } else { + return Err(Error::UnknownVersion(u64::from(version))); + } + let encoding = Encoding { + format, + version, + address_size, + }; + + // Parse any data specific to this type of unit. + let unit_type = match unit_type { + constants::DW_UT_compile => UnitType::Compilation, + constants::DW_UT_type => { + let type_signature = parse_type_signature(&mut rest)?; + let type_offset = parse_type_offset(&mut rest, format)?; + UnitType::Type { + type_signature, + type_offset, + } + } + constants::DW_UT_partial => UnitType::Partial, + constants::DW_UT_skeleton => { + let dwo_id = parse_dwo_id(&mut rest)?; + UnitType::Skeleton(dwo_id) + } + constants::DW_UT_split_compile => { + let dwo_id = parse_dwo_id(&mut rest)?; + UnitType::SplitCompilation(dwo_id) + } + constants::DW_UT_split_type => { + let type_signature = parse_type_signature(&mut rest)?; + let type_offset = parse_type_offset(&mut rest, format)?; + UnitType::SplitType { + type_signature, + type_offset, + } + } + _ => return Err(Error::UnsupportedUnitType), + }; + + Ok(UnitHeader::new( + encoding, + unit_length, + unit_type, + abbrev_offset, + unit_offset, + rest, + )) +} + +/// Parse a dwo_id from a header +fn parse_dwo_id(input: &mut R) -> Result { + Ok(DwoId(input.read_u64()?)) +} + +/// A Debugging Information Entry (DIE). +/// +/// DIEs have a set of attributes and optionally have children DIEs as well. +#[derive(Clone, Debug)] +pub struct DebuggingInformationEntry<'abbrev, 'unit, R, Offset = ::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + offset: UnitOffset, + attrs_slice: R, + attrs_len: Cell>, + abbrev: &'abbrev Abbreviation, + unit: &'unit UnitHeader, +} + +impl<'abbrev, 'unit, R, Offset> DebuggingInformationEntry<'abbrev, 'unit, R, Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + /// Construct a new `DebuggingInformationEntry`. + pub fn new( + offset: UnitOffset, + attrs_slice: R, + abbrev: &'abbrev Abbreviation, + unit: &'unit UnitHeader, + ) -> Self { + DebuggingInformationEntry { + offset, + attrs_slice, + attrs_len: Cell::new(None), + abbrev, + unit, + } + } + + /// Get this entry's code. + pub fn code(&self) -> u64 { + self.abbrev.code() + } + + /// Get this entry's offset. + pub fn offset(&self) -> UnitOffset { + self.offset + } + + /// Get this entry's `DW_TAG_whatever` tag. + /// + /// ``` + /// # use gimli::{DebugAbbrev, DebugInfo, LittleEndian}; + /// # let info_buf = [ + /// # // Comilation unit header + /// # + /// # // 32-bit unit length = 12 + /// # 0x0c, 0x00, 0x00, 0x00, + /// # // Version 4 + /// # 0x04, 0x00, + /// # // debug_abbrev_offset + /// # 0x00, 0x00, 0x00, 0x00, + /// # // Address size + /// # 0x04, + /// # + /// # // DIEs + /// # + /// # // Abbreviation code + /// # 0x01, + /// # // Attribute of form DW_FORM_string = "foo\0" + /// # 0x66, 0x6f, 0x6f, 0x00, + /// # ]; + /// # let debug_info = DebugInfo::new(&info_buf, LittleEndian); + /// # let abbrev_buf = [ + /// # // Code + /// # 0x01, + /// # // DW_TAG_subprogram + /// # 0x2e, + /// # // DW_CHILDREN_no + /// # 0x00, + /// # // Begin attributes + /// # // Attribute name = DW_AT_name + /// # 0x03, + /// # // Attribute form = DW_FORM_string + /// # 0x08, + /// # // End attributes + /// # 0x00, + /// # 0x00, + /// # // Null terminator + /// # 0x00 + /// # ]; + /// # let debug_abbrev = DebugAbbrev::new(&abbrev_buf, LittleEndian); + /// # let unit = debug_info.units().next().unwrap().unwrap(); + /// # let abbrevs = unit.abbreviations(&debug_abbrev).unwrap(); + /// # let mut cursor = unit.entries(&abbrevs); + /// # let (_, entry) = cursor.next_dfs().unwrap().unwrap(); + /// # let mut get_some_entry = || entry; + /// let entry = get_some_entry(); + /// + /// match entry.tag() { + /// gimli::DW_TAG_subprogram => + /// println!("this entry contains debug info about a function"), + /// gimli::DW_TAG_inlined_subroutine => + /// println!("this entry contains debug info about a particular instance of inlining"), + /// gimli::DW_TAG_variable => + /// println!("this entry contains debug info about a local variable"), + /// gimli::DW_TAG_formal_parameter => + /// println!("this entry contains debug info about a function parameter"), + /// otherwise => + /// println!("this entry is some other kind of data: {:?}", otherwise), + /// }; + /// ``` + pub fn tag(&self) -> constants::DwTag { + self.abbrev.tag() + } + + /// Return true if this entry's type can have children, false otherwise. + pub fn has_children(&self) -> bool { + self.abbrev.has_children() + } + + /// Iterate over this entry's set of attributes. + /// + /// ``` + /// use gimli::{DebugAbbrev, DebugInfo, LittleEndian}; + /// + /// // Read the `.debug_info` section. + /// + /// # let info_buf = [ + /// # // Comilation unit header + /// # + /// # // 32-bit unit length = 12 + /// # 0x0c, 0x00, 0x00, 0x00, + /// # // Version 4 + /// # 0x04, 0x00, + /// # // debug_abbrev_offset + /// # 0x00, 0x00, 0x00, 0x00, + /// # // Address size + /// # 0x04, + /// # + /// # // DIEs + /// # + /// # // Abbreviation code + /// # 0x01, + /// # // Attribute of form DW_FORM_string = "foo\0" + /// # 0x66, 0x6f, 0x6f, 0x00, + /// # ]; + /// # let read_debug_info_section_somehow = || &info_buf; + /// let debug_info = DebugInfo::new(read_debug_info_section_somehow(), LittleEndian); + /// + /// // Get the data about the first compilation unit out of the `.debug_info`. + /// + /// let unit = debug_info.units().next() + /// .expect("Should have at least one compilation unit") + /// .expect("and it should parse ok"); + /// + /// // Read the `.debug_abbrev` section and parse the + /// // abbreviations for our compilation unit. + /// + /// # let abbrev_buf = [ + /// # // Code + /// # 0x01, + /// # // DW_TAG_subprogram + /// # 0x2e, + /// # // DW_CHILDREN_no + /// # 0x00, + /// # // Begin attributes + /// # // Attribute name = DW_AT_name + /// # 0x03, + /// # // Attribute form = DW_FORM_string + /// # 0x08, + /// # // End attributes + /// # 0x00, + /// # 0x00, + /// # // Null terminator + /// # 0x00 + /// # ]; + /// # let read_debug_abbrev_section_somehow = || &abbrev_buf; + /// let debug_abbrev = DebugAbbrev::new(read_debug_abbrev_section_somehow(), LittleEndian); + /// let abbrevs = unit.abbreviations(&debug_abbrev).unwrap(); + /// + /// // Get the first entry from that compilation unit. + /// + /// let mut cursor = unit.entries(&abbrevs); + /// let (_, entry) = cursor.next_dfs() + /// .expect("Should parse next entry") + /// .expect("Should have at least one entry"); + /// + /// // Finally, print the first entry's attributes. + /// + /// let mut attrs = entry.attrs(); + /// while let Some(attr) = attrs.next().unwrap() { + /// println!("Attribute name = {:?}", attr.name()); + /// println!("Attribute value = {:?}", attr.value()); + /// } + /// ``` + /// + /// Can be [used with + /// `FallibleIterator`](./index.html#using-with-fallibleiterator). + pub fn attrs<'me>(&'me self) -> AttrsIter<'abbrev, 'me, 'unit, R> { + AttrsIter { + input: self.attrs_slice.clone(), + attributes: self.abbrev.attributes(), + entry: self, + } + } + + /// Find the first attribute in this entry which has the given name, + /// and return it. Returns `Ok(None)` if no attribute is found. + pub fn attr(&self, name: constants::DwAt) -> Result>> { + let mut attrs = self.attrs(); + while let Some(attr) = attrs.next()? { + if attr.name() == name { + return Ok(Some(attr)); + } + } + Ok(None) + } + + /// Find the first attribute in this entry which has the given name, + /// and return its raw value. Returns `Ok(None)` if no attribute is found. + pub fn attr_value_raw(&self, name: constants::DwAt) -> Result>> { + self.attr(name) + .map(|attr| attr.map(|attr| attr.raw_value())) + } + + /// Find the first attribute in this entry which has the given name, + /// and return its normalized value. Returns `Ok(None)` if no + /// attribute is found. + pub fn attr_value(&self, name: constants::DwAt) -> Result>> { + self.attr(name).map(|attr| attr.map(|attr| attr.value())) + } + + /// Return the input buffer after the last attribute. + #[inline(always)] + fn after_attrs(&self) -> Result { + if let Some(attrs_len) = self.attrs_len.get() { + let mut input = self.attrs_slice.clone(); + input.skip(attrs_len)?; + Ok(input) + } else { + let mut attrs = self.attrs(); + while attrs.next()?.is_some() {} + Ok(attrs.input) + } + } + + /// Use the `DW_AT_sibling` attribute to find the input buffer for the + /// next sibling. Returns `None` if the attribute is missing or invalid. + fn sibling(&self) -> Option { + let attr = self.attr_value(constants::DW_AT_sibling); + if let Ok(Some(AttributeValue::UnitRef(offset))) = attr { + if offset.0 > self.offset.0 { + if let Ok(input) = self.unit.range_from(offset..) { + return Some(input); + } + } + } + None + } + + /// Parse an entry. Returns `Ok(None)` for null entries. + #[inline(always)] + fn parse( + input: &mut R, + unit: &'unit UnitHeader, + abbreviations: &'abbrev Abbreviations, + ) -> Result> { + let offset = unit.header_size() + input.offset_from(&unit.entries_buf); + let code = input.read_uleb128()?; + if code == 0 { + return Ok(None); + }; + let abbrev = abbreviations + .get(code) + .ok_or(Error::UnknownAbbreviation(code))?; + Ok(Some(DebuggingInformationEntry { + offset: UnitOffset(offset), + attrs_slice: input.clone(), + attrs_len: Cell::new(None), + abbrev, + unit, + })) + } +} + +/// The value of an attribute in a `DebuggingInformationEntry`. +// +// Set the discriminant size so that all variants use the same alignment +// for their data. This gives better code generation in `parse_attribute`. +#[repr(u64)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum AttributeValue::Offset> +where + R: Reader, + Offset: ReaderOffset, +{ + /// "Refers to some location in the address space of the described program." + Addr(u64), + + /// A slice of an arbitrary number of bytes. + Block(R), + + /// A one byte constant data value. How to interpret the byte depends on context. + /// + /// From section 7 of the standard: "Depending on context, it may be a + /// signed integer, an unsigned integer, a floating-point constant, or + /// anything else." + Data1(u8), + + /// A two byte constant data value. How to interpret the bytes depends on context. + /// + /// These bytes have been converted from `R::Endian`. This may need to be reversed + /// if this was not required. + /// + /// From section 7 of the standard: "Depending on context, it may be a + /// signed integer, an unsigned integer, a floating-point constant, or + /// anything else." + Data2(u16), + + /// A four byte constant data value. How to interpret the bytes depends on context. + /// + /// These bytes have been converted from `R::Endian`. This may need to be reversed + /// if this was not required. + /// + /// From section 7 of the standard: "Depending on context, it may be a + /// signed integer, an unsigned integer, a floating-point constant, or + /// anything else." + Data4(u32), + + /// An eight byte constant data value. How to interpret the bytes depends on context. + /// + /// These bytes have been converted from `R::Endian`. This may need to be reversed + /// if this was not required. + /// + /// From section 7 of the standard: "Depending on context, it may be a + /// signed integer, an unsigned integer, a floating-point constant, or + /// anything else." + Data8(u64), + + /// A signed integer constant. + Sdata(i64), + + /// An unsigned integer constant. + Udata(u64), + + /// "The information bytes contain a DWARF expression (see Section 2.5) or + /// location description (see Section 2.6)." + Exprloc(Expression), + + /// A boolean that indicates presence or absence of the attribute. + Flag(bool), + + /// An offset into another section. Which section this is an offset into + /// depends on context. + SecOffset(Offset), + + /// An offset to a set of addresses in the `.debug_addr` section. + DebugAddrBase(DebugAddrBase), + + /// An index into a set of addresses in the `.debug_addr` section. + DebugAddrIndex(DebugAddrIndex), + + /// An offset into the current compilation unit. + UnitRef(UnitOffset), + + /// An offset into the current `.debug_info` section, but possibly a + /// different compilation unit from the current one. + DebugInfoRef(DebugInfoOffset), + + /// An offset into the `.debug_info` section of the supplementary object file. + DebugInfoRefSup(DebugInfoOffset), + + /// An offset into the `.debug_line` section. + DebugLineRef(DebugLineOffset), + + /// An offset into either the `.debug_loc` section or the `.debug_loclists` section. + LocationListsRef(LocationListsOffset), + + /// An offset to a set of offsets in the `.debug_loclists` section. + DebugLocListsBase(DebugLocListsBase), + + /// An index into a set of offsets in the `.debug_loclists` section. + DebugLocListsIndex(DebugLocListsIndex), + + /// An offset into the `.debug_macinfo` section. + DebugMacinfoRef(DebugMacinfoOffset), + + /// An offset into the `.debug_macro` section. + DebugMacroRef(DebugMacroOffset), + + /// An offset into the `.debug_ranges` section. + RangeListsRef(RawRangeListsOffset), + + /// An offset to a set of offsets in the `.debug_rnglists` section. + DebugRngListsBase(DebugRngListsBase), + + /// An index into a set of offsets in the `.debug_rnglists` section. + DebugRngListsIndex(DebugRngListsIndex), + + /// A type signature. + DebugTypesRef(DebugTypeSignature), + + /// An offset into the `.debug_str` section. + DebugStrRef(DebugStrOffset), + + /// An offset into the `.debug_str` section of the supplementary object file. + DebugStrRefSup(DebugStrOffset), + + /// An offset to a set of entries in the `.debug_str_offsets` section. + DebugStrOffsetsBase(DebugStrOffsetsBase), + + /// An index into a set of entries in the `.debug_str_offsets` section. + DebugStrOffsetsIndex(DebugStrOffsetsIndex), + + /// An offset into the `.debug_line_str` section. + DebugLineStrRef(DebugLineStrOffset), + + /// A slice of bytes representing a string. Does not include a final null byte. + /// Not guaranteed to be UTF-8 or anything like that. + String(R), + + /// The value of a `DW_AT_encoding` attribute. + Encoding(constants::DwAte), + + /// The value of a `DW_AT_decimal_sign` attribute. + DecimalSign(constants::DwDs), + + /// The value of a `DW_AT_endianity` attribute. + Endianity(constants::DwEnd), + + /// The value of a `DW_AT_accessibility` attribute. + Accessibility(constants::DwAccess), + + /// The value of a `DW_AT_visibility` attribute. + Visibility(constants::DwVis), + + /// The value of a `DW_AT_virtuality` attribute. + Virtuality(constants::DwVirtuality), + + /// The value of a `DW_AT_language` attribute. + Language(constants::DwLang), + + /// The value of a `DW_AT_address_class` attribute. + AddressClass(constants::DwAddr), + + /// The value of a `DW_AT_identifier_case` attribute. + IdentifierCase(constants::DwId), + + /// The value of a `DW_AT_calling_convention` attribute. + CallingConvention(constants::DwCc), + + /// The value of a `DW_AT_inline` attribute. + Inline(constants::DwInl), + + /// The value of a `DW_AT_ordering` attribute. + Ordering(constants::DwOrd), + + /// An index into the filename entries from the line number information + /// table for the compilation unit containing this value. + FileIndex(u64), + + /// An implementation-defined identifier uniquely identifying a compilation + /// unit. + DwoId(DwoId), +} + +/// An attribute in a `DebuggingInformationEntry`, consisting of a name and +/// associated value. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct Attribute { + name: constants::DwAt, + value: AttributeValue, +} + +impl Attribute { + /// Get this attribute's name. + pub fn name(&self) -> constants::DwAt { + self.name + } + + /// Get this attribute's raw value. + pub fn raw_value(&self) -> AttributeValue { + self.value.clone() + } + + /// Get this attribute's normalized value. + /// + /// Attribute values can potentially be encoded in multiple equivalent forms, + /// and may have special meaning depending on the attribute name. This method + /// converts the attribute value to a normalized form based on the attribute + /// name. + /// + /// See "Table 7.5: Attribute encodings" and "Table 7.6: Attribute form encodings". + pub fn value(&self) -> AttributeValue { + // Table 7.5 shows the possible attribute classes for each name. + // Table 7.6 shows the possible attribute classes for each form. + // For each attribute name, we need to match on the form, and + // convert it to one of the classes that is allowed for both + // the name and the form. + // + // The individual class conversions rarely vary for each name, + // so for each class conversion we define a macro that matches + // on the allowed forms for that class. + // + // For some classes, we don't need to do any conversion, so their + // macro is empty. In the future we may want to fill them in to + // provide strict checking of the forms for each class. For now, + // they simply provide a way to document the allowed classes for + // each name. + + // DW_FORM_addr + // DW_FORM_addrx + // DW_FORM_addrx1 + // DW_FORM_addrx2 + // DW_FORM_addrx3 + // DW_FORM_addrx4 + macro_rules! address { + () => {}; + } + // DW_FORM_sec_offset + macro_rules! addrptr { + () => { + if let Some(offset) = self.offset_value() { + return AttributeValue::DebugAddrBase(DebugAddrBase(offset)); + } + }; + } + // DW_FORM_block + // DW_FORM_block1 + // DW_FORM_block2 + // DW_FORM_block4 + macro_rules! block { + () => {}; + } + // DW_FORM_sdata + // DW_FORM_udata + // DW_FORM_data1 + // DW_FORM_data2 + // DW_FORM_data4 + // DW_FORM_data8 + // DW_FORM_data16 + // DW_FORM_implicit_const + macro_rules! constant { + ($value:ident, $variant:ident) => { + if let Some(value) = self.$value() { + return AttributeValue::$variant(value); + } + }; + ($value:ident, $variant:ident, $constant:ident) => { + if let Some(value) = self.$value() { + return AttributeValue::$variant(constants::$constant(value)); + } + }; + } + // DW_FORM_exprloc + macro_rules! exprloc { + () => { + if let Some(value) = self.exprloc_value() { + return AttributeValue::Exprloc(value); + } + }; + } + // DW_FORM_flag + // DW_FORM_flag_present + macro_rules! flag { + () => {}; + } + // DW_FORM_sec_offset + macro_rules! lineptr { + () => { + if let Some(offset) = self.offset_value() { + return AttributeValue::DebugLineRef(DebugLineOffset(offset)); + } + }; + } + // This also covers `loclist` in DWARF version 5. + // DW_FORM_sec_offset + // DW_FORM_loclistx + macro_rules! loclistptr { + () => { + // DebugLocListsIndex is also an allowed form in DWARF version 5. + if let Some(offset) = self.offset_value() { + return AttributeValue::LocationListsRef(LocationListsOffset(offset)); + } + }; + } + // DW_FORM_sec_offset + macro_rules! loclistsptr { + () => { + if let Some(offset) = self.offset_value() { + return AttributeValue::DebugLocListsBase(DebugLocListsBase(offset)); + } + }; + } + // DWARF version <= 4. + // DW_FORM_sec_offset + macro_rules! macinfoptr { + () => { + if let Some(offset) = self.offset_value() { + return AttributeValue::DebugMacinfoRef(DebugMacinfoOffset(offset)); + } + }; + } + // DWARF version >= 5. + // DW_FORM_sec_offset + macro_rules! macroptr { + () => { + if let Some(offset) = self.offset_value() { + return AttributeValue::DebugMacroRef(DebugMacroOffset(offset)); + } + }; + } + // DW_FORM_ref_addr + // DW_FORM_ref1 + // DW_FORM_ref2 + // DW_FORM_ref4 + // DW_FORM_ref8 + // DW_FORM_ref_udata + // DW_FORM_ref_sig8 + // DW_FORM_ref_sup4 + // DW_FORM_ref_sup8 + macro_rules! reference { + () => {}; + } + // This also covers `rnglist` in DWARF version 5. + // DW_FORM_sec_offset + // DW_FORM_rnglistx + macro_rules! rangelistptr { + () => { + // DebugRngListsIndex is also an allowed form in DWARF version 5. + if let Some(offset) = self.offset_value() { + return AttributeValue::RangeListsRef(RawRangeListsOffset(offset)); + } + }; + } + // DW_FORM_sec_offset + macro_rules! rnglistsptr { + () => { + if let Some(offset) = self.offset_value() { + return AttributeValue::DebugRngListsBase(DebugRngListsBase(offset)); + } + }; + } + // DW_FORM_string + // DW_FORM_strp + // DW_FORM_strx + // DW_FORM_strx1 + // DW_FORM_strx2 + // DW_FORM_strx3 + // DW_FORM_strx4 + // DW_FORM_strp_sup + // DW_FORM_line_strp + macro_rules! string { + () => {}; + } + // DW_FORM_sec_offset + macro_rules! stroffsetsptr { + () => { + if let Some(offset) = self.offset_value() { + return AttributeValue::DebugStrOffsetsBase(DebugStrOffsetsBase(offset)); + } + }; + } + // This isn't a separate form but it's useful to distinguish it from a generic udata. + macro_rules! dwoid { + () => { + if let Some(value) = self.udata_value() { + return AttributeValue::DwoId(DwoId(value)); + } + }; + } + + // Perform the allowed class conversions for each attribute name. + match self.name { + constants::DW_AT_sibling => { + reference!(); + } + constants::DW_AT_location => { + exprloc!(); + loclistptr!(); + } + constants::DW_AT_name => { + string!(); + } + constants::DW_AT_ordering => { + constant!(u8_value, Ordering, DwOrd); + } + constants::DW_AT_byte_size + | constants::DW_AT_bit_offset + | constants::DW_AT_bit_size => { + constant!(udata_value, Udata); + exprloc!(); + reference!(); + } + constants::DW_AT_stmt_list => { + lineptr!(); + } + constants::DW_AT_low_pc => { + address!(); + } + constants::DW_AT_high_pc => { + address!(); + constant!(udata_value, Udata); + } + constants::DW_AT_language => { + constant!(u16_value, Language, DwLang); + } + constants::DW_AT_discr => { + reference!(); + } + constants::DW_AT_discr_value => { + // constant: depends on type of DW_TAG_variant_part, + // so caller must normalize. + } + constants::DW_AT_visibility => { + constant!(u8_value, Visibility, DwVis); + } + constants::DW_AT_import => { + reference!(); + } + constants::DW_AT_string_length => { + exprloc!(); + loclistptr!(); + reference!(); + } + constants::DW_AT_common_reference => { + reference!(); + } + constants::DW_AT_comp_dir => { + string!(); + } + constants::DW_AT_const_value => { + // TODO: constant: sign depends on DW_AT_type. + block!(); + string!(); + } + constants::DW_AT_containing_type => { + reference!(); + } + constants::DW_AT_default_value => { + // TODO: constant: sign depends on DW_AT_type. + reference!(); + flag!(); + } + constants::DW_AT_inline => { + constant!(u8_value, Inline, DwInl); + } + constants::DW_AT_is_optional => { + flag!(); + } + constants::DW_AT_lower_bound => { + // TODO: constant: sign depends on DW_AT_type. + exprloc!(); + reference!(); + } + constants::DW_AT_producer => { + string!(); + } + constants::DW_AT_prototyped => { + flag!(); + } + constants::DW_AT_return_addr => { + exprloc!(); + loclistptr!(); + } + constants::DW_AT_start_scope => { + // TODO: constant + rangelistptr!(); + } + constants::DW_AT_bit_stride => { + constant!(udata_value, Udata); + exprloc!(); + reference!(); + } + constants::DW_AT_upper_bound => { + // TODO: constant: sign depends on DW_AT_type. + exprloc!(); + reference!(); + } + constants::DW_AT_abstract_origin => { + reference!(); + } + constants::DW_AT_accessibility => { + constant!(u8_value, Accessibility, DwAccess); + } + constants::DW_AT_address_class => { + constant!(udata_value, AddressClass, DwAddr); + } + constants::DW_AT_artificial => { + flag!(); + } + constants::DW_AT_base_types => { + reference!(); + } + constants::DW_AT_calling_convention => { + constant!(u8_value, CallingConvention, DwCc); + } + constants::DW_AT_count => { + // TODO: constant + exprloc!(); + reference!(); + } + constants::DW_AT_data_member_location => { + // Constants must be handled before loclistptr so that DW_FORM_data4/8 + // are correctly interpreted for DWARF version 4+. + constant!(udata_value, Udata); + exprloc!(); + loclistptr!(); + } + constants::DW_AT_decl_column => { + constant!(udata_value, Udata); + } + constants::DW_AT_decl_file => { + constant!(udata_value, FileIndex); + } + constants::DW_AT_decl_line => { + constant!(udata_value, Udata); + } + constants::DW_AT_declaration => { + flag!(); + } + constants::DW_AT_discr_list => { + block!(); + } + constants::DW_AT_encoding => { + constant!(u8_value, Encoding, DwAte); + } + constants::DW_AT_external => { + flag!(); + } + constants::DW_AT_frame_base => { + exprloc!(); + loclistptr!(); + } + constants::DW_AT_friend => { + reference!(); + } + constants::DW_AT_identifier_case => { + constant!(u8_value, IdentifierCase, DwId); + } + constants::DW_AT_macro_info => { + macinfoptr!(); + } + constants::DW_AT_namelist_item => { + reference!(); + } + constants::DW_AT_priority => { + reference!(); + } + constants::DW_AT_segment => { + exprloc!(); + loclistptr!(); + } + constants::DW_AT_specification => { + reference!(); + } + constants::DW_AT_static_link => { + exprloc!(); + loclistptr!(); + } + constants::DW_AT_type => { + reference!(); + } + constants::DW_AT_use_location => { + exprloc!(); + loclistptr!(); + } + constants::DW_AT_variable_parameter => { + flag!(); + } + constants::DW_AT_virtuality => { + constant!(u8_value, Virtuality, DwVirtuality); + } + constants::DW_AT_vtable_elem_location => { + exprloc!(); + loclistptr!(); + } + constants::DW_AT_allocated => { + // TODO: constant + exprloc!(); + reference!(); + } + constants::DW_AT_associated => { + // TODO: constant + exprloc!(); + reference!(); + } + constants::DW_AT_data_location => { + exprloc!(); + } + constants::DW_AT_byte_stride => { + constant!(udata_value, Udata); + exprloc!(); + reference!(); + } + constants::DW_AT_entry_pc => { + // TODO: constant + address!(); + } + constants::DW_AT_use_UTF8 => { + flag!(); + } + constants::DW_AT_extension => { + reference!(); + } + constants::DW_AT_ranges => { + rangelistptr!(); + } + constants::DW_AT_trampoline => { + address!(); + flag!(); + reference!(); + string!(); + } + constants::DW_AT_call_column => { + constant!(udata_value, Udata); + } + constants::DW_AT_call_file => { + constant!(udata_value, FileIndex); + } + constants::DW_AT_call_line => { + constant!(udata_value, Udata); + } + constants::DW_AT_description => { + string!(); + } + constants::DW_AT_binary_scale => { + // TODO: constant + } + constants::DW_AT_decimal_scale => { + // TODO: constant + } + constants::DW_AT_small => { + reference!(); + } + constants::DW_AT_decimal_sign => { + constant!(u8_value, DecimalSign, DwDs); + } + constants::DW_AT_digit_count => { + // TODO: constant + } + constants::DW_AT_picture_string => { + string!(); + } + constants::DW_AT_mutable => { + flag!(); + } + constants::DW_AT_threads_scaled => { + flag!(); + } + constants::DW_AT_explicit => { + flag!(); + } + constants::DW_AT_object_pointer => { + reference!(); + } + constants::DW_AT_endianity => { + constant!(u8_value, Endianity, DwEnd); + } + constants::DW_AT_elemental => { + flag!(); + } + constants::DW_AT_pure => { + flag!(); + } + constants::DW_AT_recursive => { + flag!(); + } + constants::DW_AT_signature => { + reference!(); + } + constants::DW_AT_main_subprogram => { + flag!(); + } + constants::DW_AT_data_bit_offset => { + // TODO: constant + } + constants::DW_AT_const_expr => { + flag!(); + } + constants::DW_AT_enum_class => { + flag!(); + } + constants::DW_AT_linkage_name => { + string!(); + } + constants::DW_AT_string_length_bit_size => { + // TODO: constant + } + constants::DW_AT_string_length_byte_size => { + // TODO: constant + } + constants::DW_AT_rank => { + // TODO: constant + exprloc!(); + } + constants::DW_AT_str_offsets_base => { + stroffsetsptr!(); + } + constants::DW_AT_addr_base | constants::DW_AT_GNU_addr_base => { + addrptr!(); + } + constants::DW_AT_rnglists_base | constants::DW_AT_GNU_ranges_base => { + rnglistsptr!(); + } + constants::DW_AT_dwo_name => { + string!(); + } + constants::DW_AT_reference => { + flag!(); + } + constants::DW_AT_rvalue_reference => { + flag!(); + } + constants::DW_AT_macros => { + macroptr!(); + } + constants::DW_AT_call_all_calls => { + flag!(); + } + constants::DW_AT_call_all_source_calls => { + flag!(); + } + constants::DW_AT_call_all_tail_calls => { + flag!(); + } + constants::DW_AT_call_return_pc => { + address!(); + } + constants::DW_AT_call_value => { + exprloc!(); + } + constants::DW_AT_call_origin => { + exprloc!(); + } + constants::DW_AT_call_parameter => { + reference!(); + } + constants::DW_AT_call_pc => { + address!(); + } + constants::DW_AT_call_tail_call => { + flag!(); + } + constants::DW_AT_call_target => { + exprloc!(); + } + constants::DW_AT_call_target_clobbered => { + exprloc!(); + } + constants::DW_AT_call_data_location => { + exprloc!(); + } + constants::DW_AT_call_data_value => { + exprloc!(); + } + constants::DW_AT_noreturn => { + flag!(); + } + constants::DW_AT_alignment => { + // TODO: constant + } + constants::DW_AT_export_symbols => { + flag!(); + } + constants::DW_AT_deleted => { + flag!(); + } + constants::DW_AT_defaulted => { + // TODO: constant + } + constants::DW_AT_loclists_base => { + loclistsptr!(); + } + constants::DW_AT_GNU_dwo_id => { + dwoid!(); + } + _ => {} + } + self.value.clone() + } + + /// Try to convert this attribute's value to a u8. + #[inline] + pub fn u8_value(&self) -> Option { + self.value.u8_value() + } + + /// Try to convert this attribute's value to a u16. + #[inline] + pub fn u16_value(&self) -> Option { + self.value.u16_value() + } + + /// Try to convert this attribute's value to an unsigned integer. + #[inline] + pub fn udata_value(&self) -> Option { + self.value.udata_value() + } + + /// Try to convert this attribute's value to a signed integer. + #[inline] + pub fn sdata_value(&self) -> Option { + self.value.sdata_value() + } + + /// Try to convert this attribute's value to an offset. + #[inline] + pub fn offset_value(&self) -> Option { + self.value.offset_value() + } + + /// Try to convert this attribute's value to an expression or location buffer. + /// + /// Expressions and locations may be `DW_FORM_block*` or `DW_FORM_exprloc`. + /// The standard doesn't mention `DW_FORM_block*` as a possible form, but + /// it is encountered in practice. + #[inline] + pub fn exprloc_value(&self) -> Option> { + self.value.exprloc_value() + } + + /// Try to return this attribute's value as a string slice. + /// + /// If this attribute's value is either an inline `DW_FORM_string` string, + /// or a `DW_FORM_strp` reference to an offset into the `.debug_str` + /// section, return the attribute's string value as `Some`. Other attribute + /// value forms are returned as `None`. + /// + /// Warning: this function does not handle all possible string forms. + /// Use `Dwarf::attr_string` instead. + #[inline] + pub fn string_value(&self, debug_str: &DebugStr) -> Option { + self.value.string_value(debug_str) + } + + /// Try to return this attribute's value as a string slice. + /// + /// If this attribute's value is either an inline `DW_FORM_string` string, + /// or a `DW_FORM_strp` reference to an offset into the `.debug_str` + /// section, or a `DW_FORM_strp_sup` reference to an offset into a supplementary + /// object file, return the attribute's string value as `Some`. Other attribute + /// value forms are returned as `None`. + /// + /// Warning: this function does not handle all possible string forms. + /// Use `Dwarf::attr_string` instead. + #[inline] + pub fn string_value_sup( + &self, + debug_str: &DebugStr, + debug_str_sup: Option<&DebugStr>, + ) -> Option { + self.value.string_value_sup(debug_str, debug_str_sup) + } +} + +impl AttributeValue +where + R: Reader, + Offset: ReaderOffset, +{ + /// Try to convert this attribute's value to a u8. + pub fn u8_value(&self) -> Option { + if let Some(value) = self.udata_value() { + if value <= u64::from(u8::MAX) { + return Some(value as u8); + } + } + None + } + + /// Try to convert this attribute's value to a u16. + pub fn u16_value(&self) -> Option { + if let Some(value) = self.udata_value() { + if value <= u64::from(u16::MAX) { + return Some(value as u16); + } + } + None + } + + /// Try to convert this attribute's value to an unsigned integer. + pub fn udata_value(&self) -> Option { + Some(match *self { + AttributeValue::Data1(data) => u64::from(data), + AttributeValue::Data2(data) => u64::from(data), + AttributeValue::Data4(data) => u64::from(data), + AttributeValue::Data8(data) => data, + AttributeValue::Udata(data) => data, + AttributeValue::Sdata(data) => { + if data < 0 { + // Maybe we should emit a warning here + return None; + } + data as u64 + } + _ => return None, + }) + } + + /// Try to convert this attribute's value to a signed integer. + pub fn sdata_value(&self) -> Option { + Some(match *self { + AttributeValue::Data1(data) => i64::from(data as i8), + AttributeValue::Data2(data) => i64::from(data as i16), + AttributeValue::Data4(data) => i64::from(data as i32), + AttributeValue::Data8(data) => data as i64, + AttributeValue::Sdata(data) => data, + AttributeValue::Udata(data) => { + if data > i64::max_value() as u64 { + // Maybe we should emit a warning here + return None; + } + data as i64 + } + _ => return None, + }) + } + + /// Try to convert this attribute's value to an offset. + pub fn offset_value(&self) -> Option { + // While offsets will be DW_FORM_data4/8 in DWARF version 2/3, + // these have already been converted to `SecOffset. + if let AttributeValue::SecOffset(offset) = *self { + Some(offset) + } else { + None + } + } + + /// Try to convert this attribute's value to an expression or location buffer. + /// + /// Expressions and locations may be `DW_FORM_block*` or `DW_FORM_exprloc`. + /// The standard doesn't mention `DW_FORM_block*` as a possible form, but + /// it is encountered in practice. + pub fn exprloc_value(&self) -> Option> { + Some(match *self { + AttributeValue::Block(ref data) => Expression(data.clone()), + AttributeValue::Exprloc(ref data) => data.clone(), + _ => return None, + }) + } + + /// Try to return this attribute's value as a string slice. + /// + /// If this attribute's value is either an inline `DW_FORM_string` string, + /// or a `DW_FORM_strp` reference to an offset into the `.debug_str` + /// section, return the attribute's string value as `Some`. Other attribute + /// value forms are returned as `None`. + /// + /// Warning: this function does not handle all possible string forms. + /// Use `Dwarf::attr_string` instead. + pub fn string_value(&self, debug_str: &DebugStr) -> Option { + match *self { + AttributeValue::String(ref string) => Some(string.clone()), + AttributeValue::DebugStrRef(offset) => debug_str.get_str(offset).ok(), + _ => None, + } + } + + /// Try to return this attribute's value as a string slice. + /// + /// If this attribute's value is either an inline `DW_FORM_string` string, + /// or a `DW_FORM_strp` reference to an offset into the `.debug_str` + /// section, or a `DW_FORM_strp_sup` reference to an offset into a supplementary + /// object file, return the attribute's string value as `Some`. Other attribute + /// value forms are returned as `None`. + /// + /// Warning: this function does not handle all possible string forms. + /// Use `Dwarf::attr_string` instead. + pub fn string_value_sup( + &self, + debug_str: &DebugStr, + debug_str_sup: Option<&DebugStr>, + ) -> Option { + match *self { + AttributeValue::String(ref string) => Some(string.clone()), + AttributeValue::DebugStrRef(offset) => debug_str.get_str(offset).ok(), + AttributeValue::DebugStrRefSup(offset) => { + debug_str_sup.and_then(|s| s.get_str(offset).ok()) + } + _ => None, + } + } +} + +fn length_u8_value(input: &mut R) -> Result { + let len = input.read_u8().map(R::Offset::from_u8)?; + input.split(len) +} + +fn length_u16_value(input: &mut R) -> Result { + let len = input.read_u16().map(R::Offset::from_u16)?; + input.split(len) +} + +fn length_u32_value(input: &mut R) -> Result { + let len = input.read_u32().map(R::Offset::from_u32)?; + input.split(len) +} + +fn length_uleb128_value(input: &mut R) -> Result { + let len = input.read_uleb128().and_then(R::Offset::from_u64)?; + input.split(len) +} + +// Return true if the given `name` can be a section offset in DWARF version 2/3. +// This is required to correctly handle relocations. +fn allow_section_offset(name: constants::DwAt, version: u16) -> bool { + match name { + constants::DW_AT_location + | constants::DW_AT_stmt_list + | constants::DW_AT_string_length + | constants::DW_AT_return_addr + | constants::DW_AT_start_scope + | constants::DW_AT_frame_base + | constants::DW_AT_macro_info + | constants::DW_AT_macros + | constants::DW_AT_segment + | constants::DW_AT_static_link + | constants::DW_AT_use_location + | constants::DW_AT_vtable_elem_location + | constants::DW_AT_ranges => true, + constants::DW_AT_data_member_location => version == 2 || version == 3, + _ => false, + } +} + +pub(crate) fn parse_attribute( + input: &mut R, + encoding: Encoding, + spec: AttributeSpecification, +) -> Result> { + let mut form = spec.form(); + loop { + let value = match form { + constants::DW_FORM_indirect => { + let dynamic_form = input.read_uleb128_u16()?; + form = constants::DwForm(dynamic_form); + continue; + } + constants::DW_FORM_addr => { + let addr = input.read_address(encoding.address_size)?; + AttributeValue::Addr(addr) + } + constants::DW_FORM_block1 => { + let block = length_u8_value(input)?; + AttributeValue::Block(block) + } + constants::DW_FORM_block2 => { + let block = length_u16_value(input)?; + AttributeValue::Block(block) + } + constants::DW_FORM_block4 => { + let block = length_u32_value(input)?; + AttributeValue::Block(block) + } + constants::DW_FORM_block => { + let block = length_uleb128_value(input)?; + AttributeValue::Block(block) + } + constants::DW_FORM_data1 => { + let data = input.read_u8()?; + AttributeValue::Data1(data) + } + constants::DW_FORM_data2 => { + let data = input.read_u16()?; + AttributeValue::Data2(data) + } + constants::DW_FORM_data4 => { + // DWARF version 2/3 may use DW_FORM_data4/8 for section offsets. + // Ensure we handle relocations here. + if encoding.format == Format::Dwarf32 + && allow_section_offset(spec.name(), encoding.version) + { + let offset = input.read_offset(Format::Dwarf32)?; + AttributeValue::SecOffset(offset) + } else { + let data = input.read_u32()?; + AttributeValue::Data4(data) + } + } + constants::DW_FORM_data8 => { + // DWARF version 2/3 may use DW_FORM_data4/8 for section offsets. + // Ensure we handle relocations here. + if encoding.format == Format::Dwarf64 + && allow_section_offset(spec.name(), encoding.version) + { + let offset = input.read_offset(Format::Dwarf64)?; + AttributeValue::SecOffset(offset) + } else { + let data = input.read_u64()?; + AttributeValue::Data8(data) + } + } + constants::DW_FORM_data16 => { + let block = input.split(R::Offset::from_u8(16))?; + AttributeValue::Block(block) + } + constants::DW_FORM_udata => { + let data = input.read_uleb128()?; + AttributeValue::Udata(data) + } + constants::DW_FORM_sdata => { + let data = input.read_sleb128()?; + AttributeValue::Sdata(data) + } + constants::DW_FORM_exprloc => { + let block = length_uleb128_value(input)?; + AttributeValue::Exprloc(Expression(block)) + } + constants::DW_FORM_flag => { + let present = input.read_u8()?; + AttributeValue::Flag(present != 0) + } + constants::DW_FORM_flag_present => { + // FlagPresent is this weird compile time always true thing that + // isn't actually present in the serialized DIEs, only in the abbreviation. + AttributeValue::Flag(true) + } + constants::DW_FORM_sec_offset => { + let offset = input.read_offset(encoding.format)?; + AttributeValue::SecOffset(offset) + } + constants::DW_FORM_ref1 => { + let reference = input.read_u8().map(R::Offset::from_u8)?; + AttributeValue::UnitRef(UnitOffset(reference)) + } + constants::DW_FORM_ref2 => { + let reference = input.read_u16().map(R::Offset::from_u16)?; + AttributeValue::UnitRef(UnitOffset(reference)) + } + constants::DW_FORM_ref4 => { + let reference = input.read_u32().map(R::Offset::from_u32)?; + AttributeValue::UnitRef(UnitOffset(reference)) + } + constants::DW_FORM_ref8 => { + let reference = input.read_u64().and_then(R::Offset::from_u64)?; + AttributeValue::UnitRef(UnitOffset(reference)) + } + constants::DW_FORM_ref_udata => { + let reference = input.read_uleb128().and_then(R::Offset::from_u64)?; + AttributeValue::UnitRef(UnitOffset(reference)) + } + constants::DW_FORM_ref_addr => { + // This is an offset, but DWARF version 2 specifies that DW_FORM_ref_addr + // has the same size as an address on the target system. This was changed + // in DWARF version 3. + let offset = if encoding.version == 2 { + input.read_sized_offset(encoding.address_size)? + } else { + input.read_offset(encoding.format)? + }; + AttributeValue::DebugInfoRef(DebugInfoOffset(offset)) + } + constants::DW_FORM_ref_sig8 => { + let signature = input.read_u64()?; + AttributeValue::DebugTypesRef(DebugTypeSignature(signature)) + } + constants::DW_FORM_ref_sup4 => { + let offset = input.read_u32().map(R::Offset::from_u32)?; + AttributeValue::DebugInfoRefSup(DebugInfoOffset(offset)) + } + constants::DW_FORM_ref_sup8 => { + let offset = input.read_u64().and_then(R::Offset::from_u64)?; + AttributeValue::DebugInfoRefSup(DebugInfoOffset(offset)) + } + constants::DW_FORM_GNU_ref_alt => { + let offset = input.read_offset(encoding.format)?; + AttributeValue::DebugInfoRefSup(DebugInfoOffset(offset)) + } + constants::DW_FORM_string => { + let string = input.read_null_terminated_slice()?; + AttributeValue::String(string) + } + constants::DW_FORM_strp => { + let offset = input.read_offset(encoding.format)?; + AttributeValue::DebugStrRef(DebugStrOffset(offset)) + } + constants::DW_FORM_strp_sup | constants::DW_FORM_GNU_strp_alt => { + let offset = input.read_offset(encoding.format)?; + AttributeValue::DebugStrRefSup(DebugStrOffset(offset)) + } + constants::DW_FORM_line_strp => { + let offset = input.read_offset(encoding.format)?; + AttributeValue::DebugLineStrRef(DebugLineStrOffset(offset)) + } + constants::DW_FORM_implicit_const => { + let data = spec + .implicit_const_value() + .ok_or(Error::InvalidImplicitConst)?; + AttributeValue::Sdata(data) + } + constants::DW_FORM_strx | constants::DW_FORM_GNU_str_index => { + let index = input.read_uleb128().and_then(R::Offset::from_u64)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + constants::DW_FORM_strx1 => { + let index = input.read_u8().map(R::Offset::from_u8)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + constants::DW_FORM_strx2 => { + let index = input.read_u16().map(R::Offset::from_u16)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + constants::DW_FORM_strx3 => { + let index = input.read_uint(3).and_then(R::Offset::from_u64)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + constants::DW_FORM_strx4 => { + let index = input.read_u32().map(R::Offset::from_u32)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + constants::DW_FORM_addrx | constants::DW_FORM_GNU_addr_index => { + let index = input.read_uleb128().and_then(R::Offset::from_u64)?; + AttributeValue::DebugAddrIndex(DebugAddrIndex(index)) + } + constants::DW_FORM_addrx1 => { + let index = input.read_u8().map(R::Offset::from_u8)?; + AttributeValue::DebugAddrIndex(DebugAddrIndex(index)) + } + constants::DW_FORM_addrx2 => { + let index = input.read_u16().map(R::Offset::from_u16)?; + AttributeValue::DebugAddrIndex(DebugAddrIndex(index)) + } + constants::DW_FORM_addrx3 => { + let index = input.read_uint(3).and_then(R::Offset::from_u64)?; + AttributeValue::DebugAddrIndex(DebugAddrIndex(index)) + } + constants::DW_FORM_addrx4 => { + let index = input.read_u32().map(R::Offset::from_u32)?; + AttributeValue::DebugAddrIndex(DebugAddrIndex(index)) + } + constants::DW_FORM_loclistx => { + let index = input.read_uleb128().and_then(R::Offset::from_u64)?; + AttributeValue::DebugLocListsIndex(DebugLocListsIndex(index)) + } + constants::DW_FORM_rnglistx => { + let index = input.read_uleb128().and_then(R::Offset::from_u64)?; + AttributeValue::DebugRngListsIndex(DebugRngListsIndex(index)) + } + _ => { + return Err(Error::UnknownForm(form)); + } + }; + let attr = Attribute { + name: spec.name(), + value, + }; + return Ok(attr); + } +} + +pub(crate) fn skip_attributes( + input: &mut R, + encoding: Encoding, + specs: &[AttributeSpecification], +) -> Result<()> { + let mut skip_bytes = R::Offset::from_u8(0); + for spec in specs { + let mut form = spec.form(); + loop { + if let Some(len) = get_attribute_size(form, encoding) { + // We know the length of this attribute. Accumulate that length. + skip_bytes += R::Offset::from_u8(len); + break; + } + + // We have encountered a variable-length attribute. + if skip_bytes != R::Offset::from_u8(0) { + // Skip the accumulated skip bytes and then read the attribute normally. + input.skip(skip_bytes)?; + skip_bytes = R::Offset::from_u8(0); + } + + match form { + constants::DW_FORM_indirect => { + let dynamic_form = input.read_uleb128_u16()?; + form = constants::DwForm(dynamic_form); + continue; + } + constants::DW_FORM_block1 => { + skip_bytes = input.read_u8().map(R::Offset::from_u8)?; + } + constants::DW_FORM_block2 => { + skip_bytes = input.read_u16().map(R::Offset::from_u16)?; + } + constants::DW_FORM_block4 => { + skip_bytes = input.read_u32().map(R::Offset::from_u32)?; + } + constants::DW_FORM_block | constants::DW_FORM_exprloc => { + skip_bytes = input.read_uleb128().and_then(R::Offset::from_u64)?; + } + constants::DW_FORM_string => { + let _ = input.read_null_terminated_slice()?; + } + constants::DW_FORM_udata + | constants::DW_FORM_sdata + | constants::DW_FORM_ref_udata + | constants::DW_FORM_strx + | constants::DW_FORM_GNU_str_index + | constants::DW_FORM_addrx + | constants::DW_FORM_GNU_addr_index + | constants::DW_FORM_loclistx + | constants::DW_FORM_rnglistx => { + input.skip_leb128()?; + } + _ => { + return Err(Error::UnknownForm(form)); + } + }; + break; + } + } + if skip_bytes != R::Offset::from_u8(0) { + // Skip the remaining accumulated skip bytes. + input.skip(skip_bytes)?; + } + Ok(()) +} + +/// An iterator over a particular entry's attributes. +/// +/// See [the documentation for +/// `DebuggingInformationEntry::attrs()`](./struct.DebuggingInformationEntry.html#method.attrs) +/// for details. +/// +/// Can be [used with +/// `FallibleIterator`](./index.html#using-with-fallibleiterator). +#[derive(Clone, Copy, Debug)] +pub struct AttrsIter<'abbrev, 'entry, 'unit, R: Reader> { + input: R, + attributes: &'abbrev [AttributeSpecification], + entry: &'entry DebuggingInformationEntry<'abbrev, 'unit, R>, +} + +impl<'abbrev, 'entry, 'unit, R: Reader> AttrsIter<'abbrev, 'entry, 'unit, R> { + /// Advance the iterator and return the next attribute. + /// + /// Returns `None` when iteration is finished. If an error + /// occurs while parsing the next attribute, then this error + /// is returned, and all subsequent calls return `None`. + #[inline(always)] + pub fn next(&mut self) -> Result>> { + if self.attributes.is_empty() { + // Now that we have parsed all of the attributes, we know where + // either (1) this entry's children start, if the abbreviation says + // this entry has children; or (2) where this entry's siblings + // begin. + if let Some(end) = self.entry.attrs_len.get() { + debug_assert_eq!(end, self.input.offset_from(&self.entry.attrs_slice)); + } else { + self.entry + .attrs_len + .set(Some(self.input.offset_from(&self.entry.attrs_slice))); + } + + return Ok(None); + } + + let spec = self.attributes[0]; + let rest_spec = &self.attributes[1..]; + match parse_attribute(&mut self.input, self.entry.unit.encoding(), spec) { + Ok(attr) => { + self.attributes = rest_spec; + Ok(Some(attr)) + } + Err(e) => { + self.input.empty(); + Err(e) + } + } + } +} + +#[cfg(feature = "fallible-iterator")] +impl<'abbrev, 'entry, 'unit, R: Reader> fallible_iterator::FallibleIterator + for AttrsIter<'abbrev, 'entry, 'unit, R> +{ + type Item = Attribute; + type Error = Error; + + fn next(&mut self) -> ::core::result::Result, Self::Error> { + AttrsIter::next(self) + } +} + +/// A raw reader of the data that defines the Debugging Information Entries. +/// +/// `EntriesRaw` provides primitives to read the components of Debugging Information +/// Entries (DIEs). A DIE consists of an abbreviation code (read with `read_abbreviation`) +/// followed by a number of attributes (read with `read_attribute`). +/// The user must provide the control flow to read these correctly. +/// In particular, all attributes must always be read before reading another +/// abbreviation code. +/// +/// `EntriesRaw` lacks some features of `EntriesCursor`, such as the ability to skip +/// to the next sibling DIE. However, this also allows it to optimize better, since it +/// does not need to perform the extra bookkeeping required to support these features, +/// and thus it is suitable for cases where performance is important. +/// +/// ## Example Usage +/// ```rust,no_run +/// # fn example() -> Result<(), gimli::Error> { +/// # let debug_info = gimli::DebugInfo::new(&[], gimli::LittleEndian); +/// # let get_some_unit = || debug_info.units().next().unwrap().unwrap(); +/// let unit = get_some_unit(); +/// # let debug_abbrev = gimli::DebugAbbrev::new(&[], gimli::LittleEndian); +/// # let get_abbrevs_for_unit = |_| unit.abbreviations(&debug_abbrev).unwrap(); +/// let abbrevs = get_abbrevs_for_unit(&unit); +/// +/// let mut entries = unit.entries_raw(&abbrevs, None)?; +/// while !entries.is_empty() { +/// let abbrev = if let Some(abbrev) = entries.read_abbreviation()? { +/// abbrev +/// } else { +/// // Null entry with no attributes. +/// continue +/// }; +/// match abbrev.tag() { +/// gimli::DW_TAG_subprogram => { +/// // Loop over attributes for DIEs we care about. +/// for spec in abbrev.attributes() { +/// let attr = entries.read_attribute(*spec)?; +/// match attr.name() { +/// // Handle attributes. +/// _ => {} +/// } +/// } +/// } +/// _ => { +/// // Skip attributes for DIEs we don't care about. +/// entries.skip_attributes(abbrev.attributes()); +/// } +/// } +/// } +/// # unreachable!() +/// # } +/// ``` +#[derive(Clone, Debug)] +pub struct EntriesRaw<'abbrev, 'unit, R> +where + R: Reader, +{ + input: R, + unit: &'unit UnitHeader, + abbreviations: &'abbrev Abbreviations, + depth: isize, +} + +impl<'abbrev, 'unit, R: Reader> EntriesRaw<'abbrev, 'unit, R> { + /// Return true if there is no more input. + #[inline] + pub fn is_empty(&self) -> bool { + self.input.is_empty() + } + + /// Return the unit offset at which the reader will read next. + /// + /// If you want the offset of the next entry, then this must be called prior to reading + /// the next entry. + pub fn next_offset(&self) -> UnitOffset { + UnitOffset(self.unit.header_size() + self.input.offset_from(&self.unit.entries_buf)) + } + + /// Return the depth of the next entry. + /// + /// This depth is updated when `read_abbreviation` is called, and is updated + /// based on null entries and the `has_children` field in the abbreviation. + #[inline] + pub fn next_depth(&self) -> isize { + self.depth + } + + /// Read an abbreviation code and lookup the corresponding `Abbreviation`. + /// + /// Returns `Ok(None)` for null entries. + #[inline] + pub fn read_abbreviation(&mut self) -> Result> { + let code = self.input.read_uleb128()?; + if code == 0 { + self.depth -= 1; + return Ok(None); + }; + let abbrev = self + .abbreviations + .get(code) + .ok_or(Error::UnknownAbbreviation(code))?; + if abbrev.has_children() { + self.depth += 1; + } + Ok(Some(abbrev)) + } + + /// Read an attribute. + #[inline] + pub fn read_attribute(&mut self, spec: AttributeSpecification) -> Result> { + parse_attribute(&mut self.input, self.unit.encoding(), spec) + } + + /// Skip all the attributes of an abbreviation. + #[inline] + pub fn skip_attributes(&mut self, specs: &[AttributeSpecification]) -> Result<()> { + skip_attributes(&mut self.input, self.unit.encoding(), specs) + } +} + +/// A cursor into the Debugging Information Entries tree for a compilation unit. +/// +/// The `EntriesCursor` can traverse the DIE tree in DFS order using `next_dfs()`, +/// or skip to the next sibling of the entry the cursor is currently pointing to +/// using `next_sibling()`. +/// +/// It is also possible to traverse the DIE tree at a lower abstraction level +/// using `next_entry()`. This method does not skip over null entries, or provide +/// any indication of the current tree depth. In this case, you must use `current()` +/// to obtain the current entry, and `current().has_children()` to determine if +/// the entry following the current entry will be a sibling or child. `current()` +/// will return `None` if the current entry is a null entry, which signifies the +/// end of the current tree depth. +#[derive(Clone, Debug)] +pub struct EntriesCursor<'abbrev, 'unit, R> +where + R: Reader, +{ + input: R, + unit: &'unit UnitHeader, + abbreviations: &'abbrev Abbreviations, + cached_current: Option>, + delta_depth: isize, +} + +impl<'abbrev, 'unit, R: Reader> EntriesCursor<'abbrev, 'unit, R> { + /// Get a reference to the entry that the cursor is currently pointing to. + /// + /// If the cursor is not pointing at an entry, or if the current entry is a + /// null entry, then `None` is returned. + #[inline] + pub fn current(&self) -> Option<&DebuggingInformationEntry<'abbrev, 'unit, R>> { + self.cached_current.as_ref() + } + + /// Move the cursor to the next DIE in the tree. + /// + /// Returns `Some` if there is a next entry, even if this entry is null. + /// If there is no next entry, then `None` is returned. + pub fn next_entry(&mut self) -> Result> { + if let Some(ref current) = self.cached_current { + self.input = current.after_attrs()?; + } + + if self.input.is_empty() { + self.cached_current = None; + self.delta_depth = 0; + return Ok(None); + } + + match DebuggingInformationEntry::parse(&mut self.input, self.unit, self.abbreviations) { + Ok(Some(entry)) => { + self.delta_depth = entry.has_children() as isize; + self.cached_current = Some(entry); + Ok(Some(())) + } + Ok(None) => { + self.delta_depth = -1; + self.cached_current = None; + Ok(Some(())) + } + Err(e) => { + self.input.empty(); + self.delta_depth = 0; + self.cached_current = None; + Err(e) + } + } + } + + /// Move the cursor to the next DIE in the tree in DFS order. + /// + /// Upon successful movement of the cursor, return the delta traversal + /// depth and the entry: + /// + /// * If we moved down into the previous current entry's children, we get + /// `Some((1, entry))`. + /// + /// * If we moved to the previous current entry's sibling, we get + /// `Some((0, entry))`. + /// + /// * If the previous entry does not have any siblings and we move up to + /// its parent's next sibling, then we get `Some((-1, entry))`. Note that + /// if the parent doesn't have a next sibling, then it could go up to the + /// parent's parent's next sibling and return `Some((-2, entry))`, etc. + /// + /// If there is no next entry, then `None` is returned. + /// + /// Here is an example that finds the first entry in a compilation unit that + /// does not have any children. + /// + /// ``` + /// # use gimli::{DebugAbbrev, DebugInfo, LittleEndian}; + /// # let info_buf = [ + /// # // Comilation unit header + /// # + /// # // 32-bit unit length = 25 + /// # 0x19, 0x00, 0x00, 0x00, + /// # // Version 4 + /// # 0x04, 0x00, + /// # // debug_abbrev_offset + /// # 0x00, 0x00, 0x00, 0x00, + /// # // Address size + /// # 0x04, + /// # + /// # // DIEs + /// # + /// # // Abbreviation code + /// # 0x01, + /// # // Attribute of form DW_FORM_string = "foo\0" + /// # 0x66, 0x6f, 0x6f, 0x00, + /// # + /// # // Children + /// # + /// # // Abbreviation code + /// # 0x01, + /// # // Attribute of form DW_FORM_string = "foo\0" + /// # 0x66, 0x6f, 0x6f, 0x00, + /// # + /// # // Children + /// # + /// # // Abbreviation code + /// # 0x01, + /// # // Attribute of form DW_FORM_string = "foo\0" + /// # 0x66, 0x6f, 0x6f, 0x00, + /// # + /// # // Children + /// # + /// # // End of children + /// # 0x00, + /// # + /// # // End of children + /// # 0x00, + /// # + /// # // End of children + /// # 0x00, + /// # ]; + /// # let debug_info = DebugInfo::new(&info_buf, LittleEndian); + /// # + /// # let abbrev_buf = [ + /// # // Code + /// # 0x01, + /// # // DW_TAG_subprogram + /// # 0x2e, + /// # // DW_CHILDREN_yes + /// # 0x01, + /// # // Begin attributes + /// # // Attribute name = DW_AT_name + /// # 0x03, + /// # // Attribute form = DW_FORM_string + /// # 0x08, + /// # // End attributes + /// # 0x00, + /// # 0x00, + /// # // Null terminator + /// # 0x00 + /// # ]; + /// # let debug_abbrev = DebugAbbrev::new(&abbrev_buf, LittleEndian); + /// # + /// # let get_some_unit = || debug_info.units().next().unwrap().unwrap(); + /// + /// let unit = get_some_unit(); + /// # let get_abbrevs_for_unit = |_| unit.abbreviations(&debug_abbrev).unwrap(); + /// let abbrevs = get_abbrevs_for_unit(&unit); + /// + /// let mut first_entry_with_no_children = None; + /// let mut cursor = unit.entries(&abbrevs); + /// + /// // Move the cursor to the root. + /// assert!(cursor.next_dfs().unwrap().is_some()); + /// + /// // Traverse the DIE tree in depth-first search order. + /// let mut depth = 0; + /// while let Some((delta_depth, current)) = cursor.next_dfs().expect("Should parse next dfs") { + /// // Update depth value, and break out of the loop when we + /// // return to the original starting position. + /// depth += delta_depth; + /// if depth <= 0 { + /// break; + /// } + /// + /// first_entry_with_no_children = Some(current.clone()); + /// } + /// + /// println!("The first entry with no children is {:?}", + /// first_entry_with_no_children.unwrap()); + /// ``` + pub fn next_dfs( + &mut self, + ) -> Result)>> { + let mut delta_depth = self.delta_depth; + loop { + // The next entry should be the one we want. + if self.next_entry()?.is_some() { + if let Some(ref entry) = self.cached_current { + return Ok(Some((delta_depth, entry))); + } + + // next_entry() read a null entry. + delta_depth += self.delta_depth; + } else { + return Ok(None); + } + } + } + + /// Move the cursor to the next sibling DIE of the current one. + /// + /// Returns `Ok(Some(entry))` when the cursor has been moved to + /// the next sibling, `Ok(None)` when there is no next sibling. + /// + /// The depth of the cursor is never changed if this method returns `Ok`. + /// Once `Ok(None)` is returned, this method will continue to return + /// `Ok(None)` until either `next_entry` or `next_dfs` is called. + /// + /// Here is an example that iterates over all of the direct children of the + /// root entry: + /// + /// ``` + /// # use gimli::{DebugAbbrev, DebugInfo, LittleEndian}; + /// # let info_buf = [ + /// # // Comilation unit header + /// # + /// # // 32-bit unit length = 25 + /// # 0x19, 0x00, 0x00, 0x00, + /// # // Version 4 + /// # 0x04, 0x00, + /// # // debug_abbrev_offset + /// # 0x00, 0x00, 0x00, 0x00, + /// # // Address size + /// # 0x04, + /// # + /// # // DIEs + /// # + /// # // Abbreviation code + /// # 0x01, + /// # // Attribute of form DW_FORM_string = "foo\0" + /// # 0x66, 0x6f, 0x6f, 0x00, + /// # + /// # // Children + /// # + /// # // Abbreviation code + /// # 0x01, + /// # // Attribute of form DW_FORM_string = "foo\0" + /// # 0x66, 0x6f, 0x6f, 0x00, + /// # + /// # // Children + /// # + /// # // Abbreviation code + /// # 0x01, + /// # // Attribute of form DW_FORM_string = "foo\0" + /// # 0x66, 0x6f, 0x6f, 0x00, + /// # + /// # // Children + /// # + /// # // End of children + /// # 0x00, + /// # + /// # // End of children + /// # 0x00, + /// # + /// # // End of children + /// # 0x00, + /// # ]; + /// # let debug_info = DebugInfo::new(&info_buf, LittleEndian); + /// # + /// # let get_some_unit = || debug_info.units().next().unwrap().unwrap(); + /// + /// # let abbrev_buf = [ + /// # // Code + /// # 0x01, + /// # // DW_TAG_subprogram + /// # 0x2e, + /// # // DW_CHILDREN_yes + /// # 0x01, + /// # // Begin attributes + /// # // Attribute name = DW_AT_name + /// # 0x03, + /// # // Attribute form = DW_FORM_string + /// # 0x08, + /// # // End attributes + /// # 0x00, + /// # 0x00, + /// # // Null terminator + /// # 0x00 + /// # ]; + /// # let debug_abbrev = DebugAbbrev::new(&abbrev_buf, LittleEndian); + /// # + /// let unit = get_some_unit(); + /// # let get_abbrevs_for_unit = |_| unit.abbreviations(&debug_abbrev).unwrap(); + /// let abbrevs = get_abbrevs_for_unit(&unit); + /// + /// let mut cursor = unit.entries(&abbrevs); + /// + /// // Move the cursor to the root. + /// assert!(cursor.next_dfs().unwrap().is_some()); + /// + /// // Move the cursor to the root's first child. + /// assert!(cursor.next_dfs().unwrap().is_some()); + /// + /// // Iterate the root's children. + /// loop { + /// { + /// let current = cursor.current().expect("Should be at an entry"); + /// println!("{:?} is a child of the root", current); + /// } + /// + /// if cursor.next_sibling().expect("Should parse next sibling").is_none() { + /// break; + /// } + /// } + /// ``` + pub fn next_sibling( + &mut self, + ) -> Result>> { + if self.current().is_none() { + // We're already at the null for the end of the sibling list. + return Ok(None); + } + + // Loop until we find an entry at the current level. + let mut depth = 0; + loop { + // Use is_some() and unwrap() to keep borrow checker happy. + if self.current().is_some() && self.current().unwrap().has_children() { + if let Some(sibling_input) = self.current().unwrap().sibling() { + // Fast path: this entry has a DW_AT_sibling + // attribute pointing to its sibling, so jump + // to it (which keeps us at the same depth). + self.input = sibling_input; + self.cached_current = None; + } else { + // This entry has children, so the next entry is + // down one level. + depth += 1; + } + } + + if self.next_entry()?.is_none() { + // End of input. + return Ok(None); + } + + if depth == 0 { + // Found an entry at the current level. + return Ok(self.current()); + } + + if self.current().is_none() { + // A null entry means the end of a child list, so we're + // back up a level. + depth -= 1; + } + } + } +} + +/// The state information for a tree view of the Debugging Information Entries. +/// +/// The `EntriesTree` can be used to recursively iterate through the DIE +/// tree, following the parent/child relationships. The `EntriesTree` contains +/// shared state for all nodes in the tree, avoiding any duplicate parsing of +/// entries during the traversal. +/// +/// ## Example Usage +/// ```rust,no_run +/// # fn example() -> Result<(), gimli::Error> { +/// # let debug_info = gimli::DebugInfo::new(&[], gimli::LittleEndian); +/// # let get_some_unit = || debug_info.units().next().unwrap().unwrap(); +/// let unit = get_some_unit(); +/// # let debug_abbrev = gimli::DebugAbbrev::new(&[], gimli::LittleEndian); +/// # let get_abbrevs_for_unit = |_| unit.abbreviations(&debug_abbrev).unwrap(); +/// let abbrevs = get_abbrevs_for_unit(&unit); +/// +/// let mut tree = unit.entries_tree(&abbrevs, None)?; +/// let root = tree.root()?; +/// process_tree(root)?; +/// # unreachable!() +/// # } +/// +/// fn process_tree(mut node: gimli::EntriesTreeNode) -> gimli::Result<()> +/// where R: gimli::Reader +/// { +/// { +/// // Examine the entry attributes. +/// let mut attrs = node.entry().attrs(); +/// while let Some(attr) = attrs.next()? { +/// } +/// } +/// let mut children = node.children(); +/// while let Some(child) = children.next()? { +/// // Recursively process a child. +/// process_tree(child); +/// } +/// Ok(()) +/// } +/// ``` +#[derive(Clone, Debug)] +pub struct EntriesTree<'abbrev, 'unit, R> +where + R: Reader, +{ + root: R, + unit: &'unit UnitHeader, + abbreviations: &'abbrev Abbreviations, + input: R, + entry: Option>, + depth: isize, +} + +impl<'abbrev, 'unit, R: Reader> EntriesTree<'abbrev, 'unit, R> { + fn new(root: R, unit: &'unit UnitHeader, abbreviations: &'abbrev Abbreviations) -> Self { + let input = root.clone(); + EntriesTree { + root, + unit, + abbreviations, + input, + entry: None, + depth: 0, + } + } + + /// Returns the root node of the tree. + pub fn root<'me>(&'me mut self) -> Result> { + self.input = self.root.clone(); + self.entry = + DebuggingInformationEntry::parse(&mut self.input, self.unit, self.abbreviations)?; + if self.entry.is_none() { + return Err(Error::UnexpectedNull); + } + self.depth = 0; + Ok(EntriesTreeNode::new(self, 1)) + } + + /// Move the cursor to the next entry at the specified depth. + /// + /// Requires `depth <= self.depth + 1`. + /// + /// Returns `true` if successful. + fn next(&mut self, depth: isize) -> Result { + if self.depth < depth { + debug_assert_eq!(self.depth + 1, depth); + + match self.entry { + Some(ref entry) => { + if !entry.has_children() { + return Ok(false); + } + self.depth += 1; + self.input = entry.after_attrs()?; + } + None => return Ok(false), + } + + if self.input.is_empty() { + self.entry = None; + return Ok(false); + } + + return match DebuggingInformationEntry::parse( + &mut self.input, + self.unit, + self.abbreviations, + ) { + Ok(entry) => { + self.entry = entry; + Ok(self.entry.is_some()) + } + Err(e) => { + self.input.empty(); + self.entry = None; + Err(e) + } + }; + } + + loop { + match self.entry { + Some(ref entry) => { + if entry.has_children() { + if let Some(sibling_input) = entry.sibling() { + // Fast path: this entry has a DW_AT_sibling + // attribute pointing to its sibling, so jump + // to it (which keeps us at the same depth). + self.input = sibling_input; + } else { + // This entry has children, so the next entry is + // down one level. + self.depth += 1; + self.input = entry.after_attrs()?; + } + } else { + // This entry has no children, so next entry is at same depth. + self.input = entry.after_attrs()?; + } + } + None => { + // This entry is a null, so next entry is up one level. + self.depth -= 1; + } + } + + if self.input.is_empty() { + self.entry = None; + return Ok(false); + } + + match DebuggingInformationEntry::parse(&mut self.input, self.unit, self.abbreviations) { + Ok(entry) => { + self.entry = entry; + if self.depth == depth { + return Ok(self.entry.is_some()); + } + } + Err(e) => { + self.input.empty(); + self.entry = None; + return Err(e); + } + } + } + } +} + +/// A node in the Debugging Information Entry tree. +/// +/// The root node of a tree can be obtained +/// via [`EntriesTree::root`](./struct.EntriesTree.html#method.root). +#[derive(Debug)] +pub struct EntriesTreeNode<'abbrev, 'unit, 'tree, R: Reader> { + tree: &'tree mut EntriesTree<'abbrev, 'unit, R>, + depth: isize, +} + +impl<'abbrev, 'unit, 'tree, R: Reader> EntriesTreeNode<'abbrev, 'unit, 'tree, R> { + fn new( + tree: &'tree mut EntriesTree<'abbrev, 'unit, R>, + depth: isize, + ) -> EntriesTreeNode<'abbrev, 'unit, 'tree, R> { + debug_assert!(tree.entry.is_some()); + EntriesTreeNode { tree, depth } + } + + /// Returns the current entry in the tree. + pub fn entry(&self) -> &DebuggingInformationEntry<'abbrev, 'unit, R> { + // We never create a node without an entry. + self.tree.entry.as_ref().unwrap() + } + + /// Create an iterator for the children of the current entry. + /// + /// The current entry can no longer be accessed after creating the + /// iterator. + pub fn children(self) -> EntriesTreeIter<'abbrev, 'unit, 'tree, R> { + EntriesTreeIter::new(self.tree, self.depth) + } +} + +/// An iterator that allows traversal of the children of an +/// `EntriesTreeNode`. +/// +/// The items returned by this iterator are also `EntriesTreeNode`s, +/// which allow recursive traversal of grandchildren, etc. +#[derive(Debug)] +pub struct EntriesTreeIter<'abbrev, 'unit, 'tree, R: Reader> { + tree: &'tree mut EntriesTree<'abbrev, 'unit, R>, + depth: isize, + empty: bool, +} + +impl<'abbrev, 'unit, 'tree, R: Reader> EntriesTreeIter<'abbrev, 'unit, 'tree, R> { + fn new( + tree: &'tree mut EntriesTree<'abbrev, 'unit, R>, + depth: isize, + ) -> EntriesTreeIter<'abbrev, 'unit, 'tree, R> { + EntriesTreeIter { + tree, + depth, + empty: false, + } + } + + /// Returns an `EntriesTreeNode` for the next child entry. + /// + /// Returns `None` if there are no more children. + pub fn next<'me>(&'me mut self) -> Result>> { + if self.empty { + Ok(None) + } else if self.tree.next(self.depth)? { + Ok(Some(EntriesTreeNode::new(self.tree, self.depth + 1))) + } else { + self.empty = true; + Ok(None) + } + } +} + +/// Parse a type unit header's unique type signature. Callers should handle +/// unique-ness checking. +fn parse_type_signature(input: &mut R) -> Result { + input.read_u64().map(DebugTypeSignature) +} + +/// Parse a type unit header's type offset. +fn parse_type_offset(input: &mut R, format: Format) -> Result> { + input.read_offset(format).map(UnitOffset) +} + +/// The `DebugTypes` struct represents the DWARF type information +/// found in the `.debug_types` section. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugTypes { + debug_types_section: R, +} + +impl<'input, Endian> DebugTypes> +where + Endian: Endianity, +{ + /// Construct a new `DebugTypes` instance from the data in the `.debug_types` + /// section. + /// + /// It is the caller's responsibility to read the `.debug_types` section and + /// present it as a `&[u8]` slice. That means using some ELF loader on + /// Linux, a Mach-O loader on macOS, etc. + /// + /// ``` + /// use gimli::{DebugTypes, LittleEndian}; + /// + /// # let buf = [0x00, 0x01, 0x02, 0x03]; + /// # let read_debug_types_section_somehow = || &buf; + /// let debug_types = DebugTypes::new(read_debug_types_section_somehow(), LittleEndian); + /// ``` + pub fn new(debug_types_section: &'input [u8], endian: Endian) -> Self { + Self::from(EndianSlice::new(debug_types_section, endian)) + } +} + +impl DebugTypes { + /// Create a `DebugTypes` section that references the data in `self`. + /// + /// This is useful when `R` implements `Reader` but `T` does not. + /// + /// Used by `DwarfSections::borrow`. + pub fn borrow<'a, F, R>(&'a self, mut borrow: F) -> DebugTypes + where + F: FnMut(&'a T) -> R, + { + borrow(&self.debug_types_section).into() + } +} + +impl Section for DebugTypes { + fn id() -> SectionId { + SectionId::DebugTypes + } + + fn reader(&self) -> &R { + &self.debug_types_section + } +} + +impl From for DebugTypes { + fn from(debug_types_section: R) -> Self { + DebugTypes { + debug_types_section, + } + } +} + +impl DebugTypes { + /// Iterate the type-units in this `.debug_types` section. + /// + /// ``` + /// use gimli::{DebugTypes, LittleEndian}; + /// + /// # let buf = []; + /// # let read_debug_types_section_somehow = || &buf; + /// let debug_types = DebugTypes::new(read_debug_types_section_somehow(), LittleEndian); + /// + /// let mut iter = debug_types.units(); + /// while let Some(unit) = iter.next().unwrap() { + /// println!("unit's length is {}", unit.unit_length()); + /// } + /// ``` + /// + /// Can be [used with + /// `FallibleIterator`](./index.html#using-with-fallibleiterator). + pub fn units(&self) -> DebugTypesUnitHeadersIter { + DebugTypesUnitHeadersIter { + input: self.debug_types_section.clone(), + offset: DebugTypesOffset(R::Offset::from_u8(0)), + } + } +} + +/// An iterator over the type-units of this `.debug_types` section. +/// +/// See the [documentation on +/// `DebugTypes::units`](./struct.DebugTypes.html#method.units) for +/// more detail. +#[derive(Clone, Debug)] +pub struct DebugTypesUnitHeadersIter { + input: R, + offset: DebugTypesOffset, +} + +impl DebugTypesUnitHeadersIter { + /// Advance the iterator to the next type unit header. + pub fn next(&mut self) -> Result>> { + if self.input.is_empty() { + Ok(None) + } else { + let len = self.input.len(); + match parse_unit_header(&mut self.input, self.offset.into()) { + Ok(header) => { + self.offset.0 += len - self.input.len(); + Ok(Some(header)) + } + Err(e) => { + self.input.empty(); + Err(e) + } + } + } + } +} + +#[cfg(feature = "fallible-iterator")] +impl fallible_iterator::FallibleIterator for DebugTypesUnitHeadersIter { + type Item = UnitHeader; + type Error = Error; + + fn next(&mut self) -> ::core::result::Result, Self::Error> { + DebugTypesUnitHeadersIter::next(self) + } +} + +#[cfg(test)] +// Tests require leb128::write. +#[cfg(feature = "write")] +mod tests { + use super::*; + use crate::constants; + use crate::constants::*; + use crate::endianity::{Endianity, LittleEndian}; + use crate::leb128; + use crate::read::abbrev::tests::AbbrevSectionMethods; + use crate::read::{ + Abbreviation, AttributeSpecification, DebugAbbrev, EndianSlice, Error, Result, + }; + use crate::test_util::GimliSectionMethods; + use alloc::vec::Vec; + use core::cell::Cell; + use test_assembler::{Endian, Label, LabelMaker, Section}; + + // Mixin methods for `Section` to help define binary test data. + + trait UnitSectionMethods { + fn unit(self, unit: &mut UnitHeader>) -> Self + where + E: Endianity; + fn die(self, code: u64, attr: F) -> Self + where + F: Fn(Section) -> Section; + fn die_null(self) -> Self; + fn attr_string(self, s: &str) -> Self; + fn attr_ref1(self, o: u8) -> Self; + fn offset(self, offset: usize, format: Format) -> Self; + } + + impl UnitSectionMethods for Section { + fn unit(self, unit: &mut UnitHeader>) -> Self + where + E: Endianity, + { + let size = self.size(); + let length = Label::new(); + let start = Label::new(); + let end = Label::new(); + + let section = match unit.format() { + Format::Dwarf32 => self.L32(&length), + Format::Dwarf64 => self.L32(0xffff_ffff).L64(&length), + }; + + let section = match unit.version() { + 2..=4 => section + .mark(&start) + .L16(unit.version()) + .offset(unit.debug_abbrev_offset.0, unit.format()) + .D8(unit.address_size()), + 5 => section + .mark(&start) + .L16(unit.version()) + .D8(unit.type_().dw_ut().0) + .D8(unit.address_size()) + .offset(unit.debug_abbrev_offset.0, unit.format()), + _ => unreachable!(), + }; + + let section = match unit.type_() { + UnitType::Compilation | UnitType::Partial => { + unit.unit_offset = DebugInfoOffset(size as usize).into(); + section + } + UnitType::Type { + type_signature, + type_offset, + } + | UnitType::SplitType { + type_signature, + type_offset, + } => { + if unit.version() == 5 { + unit.unit_offset = DebugInfoOffset(size as usize).into(); + } else { + unit.unit_offset = DebugTypesOffset(size as usize).into(); + } + section + .L64(type_signature.0) + .offset(type_offset.0, unit.format()) + } + UnitType::Skeleton(dwo_id) | UnitType::SplitCompilation(dwo_id) => { + unit.unit_offset = DebugInfoOffset(size as usize).into(); + section.L64(dwo_id.0) + } + }; + + let section = section.append_bytes(unit.entries_buf.slice()).mark(&end); + + unit.unit_length = (&end - &start) as usize; + length.set_const(unit.unit_length as u64); + + section + } + + fn die(self, code: u64, attr: F) -> Self + where + F: Fn(Section) -> Section, + { + let section = self.uleb(code); + attr(section) + } + + fn die_null(self) -> Self { + self.D8(0) + } + + fn attr_string(self, attr: &str) -> Self { + self.append_bytes(attr.as_bytes()).D8(0) + } + + fn attr_ref1(self, attr: u8) -> Self { + self.D8(attr) + } + + fn offset(self, offset: usize, format: Format) -> Self { + match format { + Format::Dwarf32 => self.L32(offset as u32), + Format::Dwarf64 => self.L64(offset as u64), + } + } + } + + /// Ensure that `UnitHeader` is covariant wrt R. + #[test] + fn test_unit_header_variance() { + /// This only needs to compile. + fn _f<'a: 'b, 'b, E: Endianity>( + x: UnitHeader>, + ) -> UnitHeader> { + x + } + } + + #[test] + fn test_parse_debug_abbrev_offset_32() { + let section = Section::with_endian(Endian::Little).L32(0x0403_0201); + let buf = section.get_contents().unwrap(); + let buf = &mut EndianSlice::new(&buf, LittleEndian); + + match parse_debug_abbrev_offset(buf, Format::Dwarf32) { + Ok(val) => assert_eq!(val, DebugAbbrevOffset(0x0403_0201)), + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_debug_abbrev_offset_32_incomplete() { + let buf = [0x01, 0x02]; + let buf = &mut EndianSlice::new(&buf, LittleEndian); + + match parse_debug_abbrev_offset(buf, Format::Dwarf32) { + Err(Error::UnexpectedEof(_)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_debug_abbrev_offset_64() { + let section = Section::with_endian(Endian::Little).L64(0x0807_0605_0403_0201); + let buf = section.get_contents().unwrap(); + let buf = &mut EndianSlice::new(&buf, LittleEndian); + + match parse_debug_abbrev_offset(buf, Format::Dwarf64) { + Ok(val) => assert_eq!(val, DebugAbbrevOffset(0x0807_0605_0403_0201)), + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_debug_abbrev_offset_64_incomplete() { + let buf = [0x01, 0x02]; + let buf = &mut EndianSlice::new(&buf, LittleEndian); + + match parse_debug_abbrev_offset(buf, Format::Dwarf64) { + Err(Error::UnexpectedEof(_)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_debug_info_offset_32() { + let section = Section::with_endian(Endian::Little).L32(0x0403_0201); + let buf = section.get_contents().unwrap(); + let buf = &mut EndianSlice::new(&buf, LittleEndian); + + match parse_debug_info_offset(buf, Format::Dwarf32) { + Ok(val) => assert_eq!(val, DebugInfoOffset(0x0403_0201)), + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_debug_info_offset_32_incomplete() { + let buf = [0x01, 0x02]; + let buf = &mut EndianSlice::new(&buf, LittleEndian); + + match parse_debug_info_offset(buf, Format::Dwarf32) { + Err(Error::UnexpectedEof(_)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_debug_info_offset_64() { + let section = Section::with_endian(Endian::Little).L64(0x0807_0605_0403_0201); + let buf = section.get_contents().unwrap(); + let buf = &mut EndianSlice::new(&buf, LittleEndian); + + match parse_debug_info_offset(buf, Format::Dwarf64) { + Ok(val) => assert_eq!(val, DebugInfoOffset(0x0807_0605_0403_0201)), + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_debug_info_offset_64_incomplete() { + let buf = [0x01, 0x02]; + let buf = &mut EndianSlice::new(&buf, LittleEndian); + + match parse_debug_info_offset(buf, Format::Dwarf64) { + Err(Error::UnexpectedEof(_)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_units() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let mut unit64 = UnitHeader { + encoding: Encoding { + format: Format::Dwarf64, + version: 4, + address_size: 8, + }, + unit_length: 0, + unit_type: UnitType::Compilation, + debug_abbrev_offset: DebugAbbrevOffset(0x0102_0304_0506_0708), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let mut unit32 = UnitHeader { + encoding: Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }, + unit_length: 0, + unit_type: UnitType::Compilation, + debug_abbrev_offset: DebugAbbrevOffset(0x0807_0605), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut unit64) + .unit(&mut unit32); + let buf = section.get_contents().unwrap(); + + let debug_info = DebugInfo::new(&buf, LittleEndian); + let mut units = debug_info.units(); + + assert_eq!(units.next(), Ok(Some(unit64))); + assert_eq!(units.next(), Ok(Some(unit32))); + assert_eq!(units.next(), Ok(None)); + } + + #[test] + fn test_unit_version_unknown_version() { + let buf = [0x02, 0x00, 0x00, 0x00, 0xab, 0xcd]; + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + match parse_unit_header(rest, DebugInfoOffset(0).into()) { + Err(Error::UnknownVersion(0xcdab)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + + let buf = [0x02, 0x00, 0x00, 0x00, 0x1, 0x0]; + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + match parse_unit_header(rest, DebugInfoOffset(0).into()) { + Err(Error::UnknownVersion(1)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_unit_version_incomplete() { + let buf = [0x01, 0x00, 0x00, 0x00, 0x04]; + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + match parse_unit_header(rest, DebugInfoOffset(0).into()) { + Err(Error::UnexpectedEof(_)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_unit_header_32_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Compilation, + debug_abbrev_offset: DebugAbbrevOffset(0x0807_0605), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugInfoOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_unit_header_64_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf64, + version: 4, + address_size: 8, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Compilation, + debug_abbrev_offset: DebugAbbrevOffset(0x0102_0304_0506_0708), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugInfoOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_v5_unit_header_32_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf32, + version: 5, + address_size: 4, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Compilation, + debug_abbrev_offset: DebugAbbrevOffset(0x0807_0605), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugInfoOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_v5_unit_header_64_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf64, + version: 5, + address_size: 8, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Compilation, + debug_abbrev_offset: DebugAbbrevOffset(0x0102_0304_0506_0708), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugInfoOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_v5_partial_unit_header_32_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf32, + version: 5, + address_size: 4, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Partial, + debug_abbrev_offset: DebugAbbrevOffset(0x0807_0605), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugInfoOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_v5_partial_unit_header_64_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf64, + version: 5, + address_size: 8, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Partial, + debug_abbrev_offset: DebugAbbrevOffset(0x0102_0304_0506_0708), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugInfoOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_v5_skeleton_unit_header_32_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf32, + version: 5, + address_size: 4, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Skeleton(DwoId(0x0706_5040_0302_1000)), + debug_abbrev_offset: DebugAbbrevOffset(0x0807_0605), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugInfoOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_v5_skeleton_unit_header_64_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf64, + version: 5, + address_size: 8, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Skeleton(DwoId(0x0706_5040_0302_1000)), + debug_abbrev_offset: DebugAbbrevOffset(0x0102_0304_0506_0708), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugInfoOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_v5_split_compilation_unit_header_32_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf32, + version: 5, + address_size: 4, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::SplitCompilation(DwoId(0x0706_5040_0302_1000)), + debug_abbrev_offset: DebugAbbrevOffset(0x0807_0605), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugInfoOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_v5_split_compilation_unit_header_64_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf64, + version: 5, + address_size: 8, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::SplitCompilation(DwoId(0x0706_5040_0302_1000)), + debug_abbrev_offset: DebugAbbrevOffset(0x0102_0304_0506_0708), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugInfoOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_type_offset_32_ok() { + let buf = [0x12, 0x34, 0x56, 0x78, 0x00]; + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + match parse_type_offset(rest, Format::Dwarf32) { + Ok(offset) => { + assert_eq!(rest.len(), 1); + assert_eq!(UnitOffset(0x7856_3412), offset); + } + otherwise => panic!("Unexpected result: {:?}", otherwise), + } + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_type_offset_64_ok() { + let buf = [0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xff, 0x00]; + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + match parse_type_offset(rest, Format::Dwarf64) { + Ok(offset) => { + assert_eq!(rest.len(), 1); + assert_eq!(UnitOffset(0xffde_bc9a_7856_3412), offset); + } + otherwise => panic!("Unexpected result: {:?}", otherwise), + } + } + + #[test] + fn test_parse_type_offset_incomplete() { + // Need at least 4 bytes. + let buf = [0xff, 0xff, 0xff]; + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + match parse_type_offset(rest, Format::Dwarf32) { + Err(Error::UnexpectedEof(_)) => {} + otherwise => panic!("Unexpected result: {:?}", otherwise), + }; + } + + #[test] + fn test_parse_type_unit_header_32_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 8, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Type { + type_signature: DebugTypeSignature(0xdead_beef_dead_beef), + type_offset: UnitOffset(0x7856_3412), + }, + debug_abbrev_offset: DebugAbbrevOffset(0x0807_0605), + unit_offset: DebugTypesOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugTypesOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_type_unit_header_64_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf64, + version: 4, + address_size: 8, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Type { + type_signature: DebugTypeSignature(0xdead_beef_dead_beef), + type_offset: UnitOffset(0x7856_3412_7856_3412), + }, + debug_abbrev_offset: DebugAbbrevOffset(0x0807_0605), + unit_offset: DebugTypesOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugTypesOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_v5_type_unit_header_32_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf32, + version: 5, + address_size: 8, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Type { + type_signature: DebugTypeSignature(0xdead_beef_dead_beef), + type_offset: UnitOffset(0x7856_3412), + }, + debug_abbrev_offset: DebugAbbrevOffset(0x0807_0605), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugInfoOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_v5_type_unit_header_64_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf64, + version: 5, + address_size: 8, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Type { + type_signature: DebugTypeSignature(0xdead_beef_dead_beef), + type_offset: UnitOffset(0x7856_3412_7856_3412), + }, + debug_abbrev_offset: DebugAbbrevOffset(0x0807_0605), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugInfoOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + fn test_parse_v5_split_type_unit_header_32_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf32, + version: 5, + address_size: 8, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::SplitType { + type_signature: DebugTypeSignature(0xdead_beef_dead_beef), + type_offset: UnitOffset(0x7856_3412), + }, + debug_abbrev_offset: DebugAbbrevOffset(0x0807_0605), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugInfoOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_v5_split_type_unit_header_64_ok() { + let expected_rest = &[1, 2, 3, 4, 5, 6, 7, 8, 9]; + let encoding = Encoding { + format: Format::Dwarf64, + version: 5, + address_size: 8, + }; + let mut expected_unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::SplitType { + type_signature: DebugTypeSignature(0xdead_beef_dead_beef), + type_offset: UnitOffset(0x7856_3412_7856_3412), + }, + debug_abbrev_offset: DebugAbbrevOffset(0x0807_0605), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(expected_rest, LittleEndian), + }; + let section = Section::with_endian(Endian::Little) + .unit(&mut expected_unit) + .append_bytes(expected_rest); + let buf = section.get_contents().unwrap(); + let rest = &mut EndianSlice::new(&buf, LittleEndian); + + assert_eq!( + parse_unit_header(rest, DebugInfoOffset(0).into()), + Ok(expected_unit) + ); + assert_eq!(*rest, EndianSlice::new(expected_rest, LittleEndian)); + } + + fn section_contents(f: F) -> Vec + where + F: Fn(Section) -> Section, + { + f(Section::with_endian(Endian::Little)) + .get_contents() + .unwrap() + } + + #[test] + fn test_attribute_value() { + let mut unit = test_parse_attribute_unit_default(); + let endian = unit.entries_buf.endian(); + + let block_data = &[1, 2, 3, 4]; + let buf = section_contents(|s| s.uleb(block_data.len() as u64).append_bytes(block_data)); + let block = EndianSlice::new(&buf, endian); + + let buf = section_contents(|s| s.L32(0x0102_0304)); + let data4 = EndianSlice::new(&buf, endian); + + let buf = section_contents(|s| s.L64(0x0102_0304_0506_0708)); + let data8 = EndianSlice::new(&buf, endian); + + let tests = [ + ( + Format::Dwarf32, + 2, + constants::DW_AT_data_member_location, + constants::DW_FORM_block, + block, + AttributeValue::Block(EndianSlice::new(block_data, endian)), + AttributeValue::Exprloc(Expression(EndianSlice::new(block_data, endian))), + ), + ( + Format::Dwarf32, + 2, + constants::DW_AT_data_member_location, + constants::DW_FORM_data4, + data4, + AttributeValue::SecOffset(0x0102_0304), + AttributeValue::LocationListsRef(LocationListsOffset(0x0102_0304)), + ), + ( + Format::Dwarf64, + 2, + constants::DW_AT_data_member_location, + constants::DW_FORM_data4, + data4, + AttributeValue::Data4(0x0102_0304), + AttributeValue::Udata(0x0102_0304), + ), + ( + Format::Dwarf32, + 4, + constants::DW_AT_data_member_location, + constants::DW_FORM_data4, + data4, + AttributeValue::Data4(0x0102_0304), + AttributeValue::Udata(0x0102_0304), + ), + ( + Format::Dwarf32, + 2, + constants::DW_AT_data_member_location, + constants::DW_FORM_data8, + data8, + AttributeValue::Data8(0x0102_0304_0506_0708), + AttributeValue::Udata(0x0102_0304_0506_0708), + ), + #[cfg(target_pointer_width = "64")] + ( + Format::Dwarf64, + 2, + constants::DW_AT_data_member_location, + constants::DW_FORM_data8, + data8, + AttributeValue::SecOffset(0x0102_0304_0506_0708), + AttributeValue::LocationListsRef(LocationListsOffset(0x0102_0304_0506_0708)), + ), + ( + Format::Dwarf64, + 4, + constants::DW_AT_data_member_location, + constants::DW_FORM_data8, + data8, + AttributeValue::Data8(0x0102_0304_0506_0708), + AttributeValue::Udata(0x0102_0304_0506_0708), + ), + ( + Format::Dwarf32, + 4, + constants::DW_AT_location, + constants::DW_FORM_data4, + data4, + AttributeValue::SecOffset(0x0102_0304), + AttributeValue::LocationListsRef(LocationListsOffset(0x0102_0304)), + ), + #[cfg(target_pointer_width = "64")] + ( + Format::Dwarf64, + 4, + constants::DW_AT_location, + constants::DW_FORM_data8, + data8, + AttributeValue::SecOffset(0x0102_0304_0506_0708), + AttributeValue::LocationListsRef(LocationListsOffset(0x0102_0304_0506_0708)), + ), + ( + Format::Dwarf32, + 4, + constants::DW_AT_str_offsets_base, + constants::DW_FORM_sec_offset, + data4, + AttributeValue::SecOffset(0x0102_0304), + AttributeValue::DebugStrOffsetsBase(DebugStrOffsetsBase(0x0102_0304)), + ), + ( + Format::Dwarf32, + 4, + constants::DW_AT_stmt_list, + constants::DW_FORM_sec_offset, + data4, + AttributeValue::SecOffset(0x0102_0304), + AttributeValue::DebugLineRef(DebugLineOffset(0x0102_0304)), + ), + ( + Format::Dwarf32, + 4, + constants::DW_AT_addr_base, + constants::DW_FORM_sec_offset, + data4, + AttributeValue::SecOffset(0x0102_0304), + AttributeValue::DebugAddrBase(DebugAddrBase(0x0102_0304)), + ), + ( + Format::Dwarf32, + 4, + constants::DW_AT_rnglists_base, + constants::DW_FORM_sec_offset, + data4, + AttributeValue::SecOffset(0x0102_0304), + AttributeValue::DebugRngListsBase(DebugRngListsBase(0x0102_0304)), + ), + ( + Format::Dwarf32, + 4, + constants::DW_AT_loclists_base, + constants::DW_FORM_sec_offset, + data4, + AttributeValue::SecOffset(0x0102_0304), + AttributeValue::DebugLocListsBase(DebugLocListsBase(0x0102_0304)), + ), + ]; + + for test in tests.iter() { + let (format, version, name, form, mut input, expect_raw, expect_value) = *test; + unit.encoding.format = format; + unit.encoding.version = version; + let spec = AttributeSpecification::new(name, form, None); + let attribute = + parse_attribute(&mut input, unit.encoding(), spec).expect("Should parse attribute"); + assert_eq!(attribute.raw_value(), expect_raw); + assert_eq!(attribute.value(), expect_value); + } + } + + #[test] + fn test_attribute_udata_sdata_value() { + #[allow(clippy::type_complexity)] + let tests: &[( + AttributeValue>, + Option, + Option, + )] = &[ + (AttributeValue::Data1(1), Some(1), Some(1)), + ( + AttributeValue::Data1(core::u8::MAX), + Some(u64::from(std::u8::MAX)), + Some(-1), + ), + (AttributeValue::Data2(1), Some(1), Some(1)), + ( + AttributeValue::Data2(core::u16::MAX), + Some(u64::from(std::u16::MAX)), + Some(-1), + ), + (AttributeValue::Data4(1), Some(1), Some(1)), + ( + AttributeValue::Data4(core::u32::MAX), + Some(u64::from(std::u32::MAX)), + Some(-1), + ), + (AttributeValue::Data8(1), Some(1), Some(1)), + ( + AttributeValue::Data8(core::u64::MAX), + Some(core::u64::MAX), + Some(-1), + ), + (AttributeValue::Sdata(1), Some(1), Some(1)), + (AttributeValue::Sdata(-1), None, Some(-1)), + (AttributeValue::Udata(1), Some(1), Some(1)), + (AttributeValue::Udata(1u64 << 63), Some(1u64 << 63), None), + ]; + for test in tests.iter() { + let (value, expect_udata, expect_sdata) = *test; + let attribute = Attribute { + name: DW_AT_data_member_location, + value, + }; + assert_eq!(attribute.udata_value(), expect_udata); + assert_eq!(attribute.sdata_value(), expect_sdata); + } + } + + fn test_parse_attribute_unit( + address_size: u8, + format: Format, + endian: Endian, + ) -> UnitHeader> + where + Endian: Endianity, + { + let encoding = Encoding { + format, + version: 4, + address_size, + }; + UnitHeader::new( + encoding, + 7, + UnitType::Compilation, + DebugAbbrevOffset(0x0807_0605), + DebugInfoOffset(0).into(), + EndianSlice::new(&[], endian), + ) + } + + fn test_parse_attribute_unit_default() -> UnitHeader> { + test_parse_attribute_unit(4, Format::Dwarf32, LittleEndian) + } + + fn test_parse_attribute<'input, Endian>( + buf: &'input [u8], + len: usize, + unit: &UnitHeader>, + form: constants::DwForm, + value: AttributeValue>, + ) where + Endian: Endianity, + { + let spec = AttributeSpecification::new(constants::DW_AT_low_pc, form, None); + + let expect = Attribute { + name: constants::DW_AT_low_pc, + value, + }; + + let rest = &mut EndianSlice::new(buf, Endian::default()); + match parse_attribute(rest, unit.encoding(), spec) { + Ok(attr) => { + assert_eq!(attr, expect); + assert_eq!(*rest, EndianSlice::new(&buf[len..], Endian::default())); + if let Some(size) = spec.size(unit) { + assert_eq!(rest.len() + size, buf.len()); + } + } + otherwise => { + panic!("Unexpected parse result = {:#?}", otherwise); + } + }; + } + + #[test] + fn test_parse_attribute_addr() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]; + let unit = test_parse_attribute_unit(4, Format::Dwarf32, LittleEndian); + let form = constants::DW_FORM_addr; + let value = AttributeValue::Addr(0x0403_0201); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + fn test_parse_attribute_addr8() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]; + let unit = test_parse_attribute_unit(8, Format::Dwarf32, LittleEndian); + let form = constants::DW_FORM_addr; + let value = AttributeValue::Addr(0x0807_0605_0403_0201); + test_parse_attribute(&buf, 8, &unit, form, value); + } + + #[test] + fn test_parse_attribute_block1() { + // Length of data (3), three bytes of data, two bytes of left over input. + let buf = [0x03, 0x09, 0x09, 0x09, 0x00, 0x00]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_block1; + let value = AttributeValue::Block(EndianSlice::new(&buf[1..4], LittleEndian)); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + fn test_parse_attribute_block2() { + // Two byte length of data (2), two bytes of data, two bytes of left over input. + let buf = [0x02, 0x00, 0x09, 0x09, 0x00, 0x00]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_block2; + let value = AttributeValue::Block(EndianSlice::new(&buf[2..4], LittleEndian)); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + fn test_parse_attribute_block4() { + // Four byte length of data (2), two bytes of data, no left over input. + let buf = [0x02, 0x00, 0x00, 0x00, 0x99, 0x99]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_block4; + let value = AttributeValue::Block(EndianSlice::new(&buf[4..], LittleEndian)); + test_parse_attribute(&buf, 6, &unit, form, value); + } + + #[test] + fn test_parse_attribute_block() { + // LEB length of data (2, one byte), two bytes of data, no left over input. + let buf = [0x02, 0x99, 0x99]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_block; + let value = AttributeValue::Block(EndianSlice::new(&buf[1..], LittleEndian)); + test_parse_attribute(&buf, 3, &unit, form, value); + } + + #[test] + fn test_parse_attribute_data1() { + let buf = [0x03]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_data1; + let value = AttributeValue::Data1(0x03); + test_parse_attribute(&buf, 1, &unit, form, value); + } + + #[test] + fn test_parse_attribute_data2() { + let buf = [0x02, 0x01, 0x0]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_data2; + let value = AttributeValue::Data2(0x0102); + test_parse_attribute(&buf, 2, &unit, form, value); + } + + #[test] + fn test_parse_attribute_data4() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x99, 0x99]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_data4; + let value = AttributeValue::Data4(0x0403_0201); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + fn test_parse_attribute_data8() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_data8; + let value = AttributeValue::Data8(0x0807_0605_0403_0201); + test_parse_attribute(&buf, 8, &unit, form, value); + } + + #[test] + fn test_parse_attribute_udata() { + let mut buf = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + + let bytes_written = { + let mut writable = &mut buf[..]; + leb128::write::unsigned(&mut writable, 4097).expect("should write ok") + }; + + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_udata; + let value = AttributeValue::Udata(4097); + test_parse_attribute(&buf, bytes_written, &unit, form, value); + } + + #[test] + fn test_parse_attribute_sdata() { + let mut buf = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + + let bytes_written = { + let mut writable = &mut buf[..]; + leb128::write::signed(&mut writable, -4097).expect("should write ok") + }; + + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_sdata; + let value = AttributeValue::Sdata(-4097); + test_parse_attribute(&buf, bytes_written, &unit, form, value); + } + + #[test] + fn test_parse_attribute_exprloc() { + // LEB length of data (2, one byte), two bytes of data, one byte left over input. + let buf = [0x02, 0x99, 0x99, 0x11]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_exprloc; + let value = AttributeValue::Exprloc(Expression(EndianSlice::new(&buf[1..3], LittleEndian))); + test_parse_attribute(&buf, 3, &unit, form, value); + } + + #[test] + fn test_parse_attribute_flag_true() { + let buf = [0x42]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_flag; + let value = AttributeValue::Flag(true); + test_parse_attribute(&buf, 1, &unit, form, value); + } + + #[test] + fn test_parse_attribute_flag_false() { + let buf = [0x00]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_flag; + let value = AttributeValue::Flag(false); + test_parse_attribute(&buf, 1, &unit, form, value); + } + + #[test] + fn test_parse_attribute_flag_present() { + let buf = [0x01, 0x02, 0x03, 0x04]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_flag_present; + let value = AttributeValue::Flag(true); + // DW_FORM_flag_present does not consume any bytes of the input stream. + test_parse_attribute(&buf, 0, &unit, form, value); + } + + #[test] + fn test_parse_attribute_sec_offset_32() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x10]; + let unit = test_parse_attribute_unit(4, Format::Dwarf32, LittleEndian); + let form = constants::DW_FORM_sec_offset; + let value = AttributeValue::SecOffset(0x0403_0201); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_attribute_sec_offset_64() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x10]; + let unit = test_parse_attribute_unit(4, Format::Dwarf64, LittleEndian); + let form = constants::DW_FORM_sec_offset; + let value = AttributeValue::SecOffset(0x0807_0605_0403_0201); + test_parse_attribute(&buf, 8, &unit, form, value); + } + + #[test] + fn test_parse_attribute_ref1() { + let buf = [0x03]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_ref1; + let value = AttributeValue::UnitRef(UnitOffset(3)); + test_parse_attribute(&buf, 1, &unit, form, value); + } + + #[test] + fn test_parse_attribute_ref2() { + let buf = [0x02, 0x01, 0x0]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_ref2; + let value = AttributeValue::UnitRef(UnitOffset(258)); + test_parse_attribute(&buf, 2, &unit, form, value); + } + + #[test] + fn test_parse_attribute_ref4() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x99, 0x99]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_ref4; + let value = AttributeValue::UnitRef(UnitOffset(0x0403_0201)); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_attribute_ref8() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_ref8; + let value = AttributeValue::UnitRef(UnitOffset(0x0807_0605_0403_0201)); + test_parse_attribute(&buf, 8, &unit, form, value); + } + + #[test] + fn test_parse_attribute_ref_sup4() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x99, 0x99]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_ref_sup4; + let value = AttributeValue::DebugInfoRefSup(DebugInfoOffset(0x0403_0201)); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_attribute_ref_sup8() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_ref_sup8; + let value = AttributeValue::DebugInfoRefSup(DebugInfoOffset(0x0807_0605_0403_0201)); + test_parse_attribute(&buf, 8, &unit, form, value); + } + + #[test] + fn test_parse_attribute_refudata() { + let mut buf = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + + let bytes_written = { + let mut writable = &mut buf[..]; + leb128::write::unsigned(&mut writable, 4097).expect("should write ok") + }; + + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_ref_udata; + let value = AttributeValue::UnitRef(UnitOffset(4097)); + test_parse_attribute(&buf, bytes_written, &unit, form, value); + } + + #[test] + fn test_parse_attribute_refaddr_32() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf32, LittleEndian); + let form = constants::DW_FORM_ref_addr; + let value = AttributeValue::DebugInfoRef(DebugInfoOffset(0x0403_0201)); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_attribute_refaddr_64() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf64, LittleEndian); + let form = constants::DW_FORM_ref_addr; + let value = AttributeValue::DebugInfoRef(DebugInfoOffset(0x0807_0605_0403_0201)); + test_parse_attribute(&buf, 8, &unit, form, value); + } + + #[test] + fn test_parse_attribute_refaddr_version2() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let mut unit = test_parse_attribute_unit(4, Format::Dwarf32, LittleEndian); + unit.encoding.version = 2; + let form = constants::DW_FORM_ref_addr; + let value = AttributeValue::DebugInfoRef(DebugInfoOffset(0x0403_0201)); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_attribute_refaddr8_version2() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let mut unit = test_parse_attribute_unit(8, Format::Dwarf32, LittleEndian); + unit.encoding.version = 2; + let form = constants::DW_FORM_ref_addr; + let value = AttributeValue::DebugInfoRef(DebugInfoOffset(0x0807_0605_0403_0201)); + test_parse_attribute(&buf, 8, &unit, form, value); + } + + #[test] + fn test_parse_attribute_gnu_ref_alt_32() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf32, LittleEndian); + let form = constants::DW_FORM_GNU_ref_alt; + let value = AttributeValue::DebugInfoRefSup(DebugInfoOffset(0x0403_0201)); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_attribute_gnu_ref_alt_64() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf64, LittleEndian); + let form = constants::DW_FORM_GNU_ref_alt; + let value = AttributeValue::DebugInfoRefSup(DebugInfoOffset(0x0807_0605_0403_0201)); + test_parse_attribute(&buf, 8, &unit, form, value); + } + + #[test] + fn test_parse_attribute_refsig8() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_ref_sig8; + let value = AttributeValue::DebugTypesRef(DebugTypeSignature(0x0807_0605_0403_0201)); + test_parse_attribute(&buf, 8, &unit, form, value); + } + + #[test] + fn test_parse_attribute_string() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x0, 0x99, 0x99]; + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_string; + let value = AttributeValue::String(EndianSlice::new(&buf[..5], LittleEndian)); + test_parse_attribute(&buf, 6, &unit, form, value); + } + + #[test] + fn test_parse_attribute_strp_32() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf32, LittleEndian); + let form = constants::DW_FORM_strp; + let value = AttributeValue::DebugStrRef(DebugStrOffset(0x0403_0201)); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_attribute_strp_64() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf64, LittleEndian); + let form = constants::DW_FORM_strp; + let value = AttributeValue::DebugStrRef(DebugStrOffset(0x0807_0605_0403_0201)); + test_parse_attribute(&buf, 8, &unit, form, value); + } + + #[test] + fn test_parse_attribute_strp_sup_32() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf32, LittleEndian); + let form = constants::DW_FORM_strp_sup; + let value = AttributeValue::DebugStrRefSup(DebugStrOffset(0x0403_0201)); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_attribute_strp_sup_64() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf64, LittleEndian); + let form = constants::DW_FORM_strp_sup; + let value = AttributeValue::DebugStrRefSup(DebugStrOffset(0x0807_0605_0403_0201)); + test_parse_attribute(&buf, 8, &unit, form, value); + } + + #[test] + fn test_parse_attribute_gnu_strp_alt_32() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf32, LittleEndian); + let form = constants::DW_FORM_GNU_strp_alt; + let value = AttributeValue::DebugStrRefSup(DebugStrOffset(0x0403_0201)); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_attribute_gnu_strp_alt_64() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf64, LittleEndian); + let form = constants::DW_FORM_GNU_strp_alt; + let value = AttributeValue::DebugStrRefSup(DebugStrOffset(0x0807_0605_0403_0201)); + test_parse_attribute(&buf, 8, &unit, form, value); + } + + #[test] + fn test_parse_attribute_strx() { + let mut buf = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + + let bytes_written = { + let mut writable = &mut buf[..]; + leb128::write::unsigned(&mut writable, 4097).expect("should write ok") + }; + + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_strx; + let value = AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(4097)); + test_parse_attribute(&buf, bytes_written, &unit, form, value); + } + + #[test] + fn test_parse_attribute_strx1() { + let buf = [0x01, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf64, LittleEndian); + let form = constants::DW_FORM_strx1; + let value = AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(0x01)); + test_parse_attribute(&buf, 1, &unit, form, value); + } + + #[test] + fn test_parse_attribute_strx2() { + let buf = [0x01, 0x02, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf64, LittleEndian); + let form = constants::DW_FORM_strx2; + let value = AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(0x0201)); + test_parse_attribute(&buf, 2, &unit, form, value); + } + + #[test] + fn test_parse_attribute_strx3() { + let buf = [0x01, 0x02, 0x03, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf64, LittleEndian); + let form = constants::DW_FORM_strx3; + let value = AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(0x03_0201)); + test_parse_attribute(&buf, 3, &unit, form, value); + } + + #[test] + fn test_parse_attribute_strx4() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf64, LittleEndian); + let form = constants::DW_FORM_strx4; + let value = AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(0x0403_0201)); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + fn test_parse_attribute_addrx() { + let mut buf = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + + let bytes_written = { + let mut writable = &mut buf[..]; + leb128::write::unsigned(&mut writable, 4097).expect("should write ok") + }; + + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_addrx; + let value = AttributeValue::DebugAddrIndex(DebugAddrIndex(4097)); + test_parse_attribute(&buf, bytes_written, &unit, form, value); + } + + #[test] + fn test_parse_attribute_addrx1() { + let buf = [0x01, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf64, LittleEndian); + let form = constants::DW_FORM_addrx1; + let value = AttributeValue::DebugAddrIndex(DebugAddrIndex(0x01)); + test_parse_attribute(&buf, 1, &unit, form, value); + } + + #[test] + fn test_parse_attribute_addrx2() { + let buf = [0x01, 0x02, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf64, LittleEndian); + let form = constants::DW_FORM_addrx2; + let value = AttributeValue::DebugAddrIndex(DebugAddrIndex(0x0201)); + test_parse_attribute(&buf, 2, &unit, form, value); + } + + #[test] + fn test_parse_attribute_addrx3() { + let buf = [0x01, 0x02, 0x03, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf64, LittleEndian); + let form = constants::DW_FORM_addrx3; + let value = AttributeValue::DebugAddrIndex(DebugAddrIndex(0x03_0201)); + test_parse_attribute(&buf, 3, &unit, form, value); + } + + #[test] + fn test_parse_attribute_addrx4() { + let buf = [0x01, 0x02, 0x03, 0x04, 0x99, 0x99]; + let unit = test_parse_attribute_unit(4, Format::Dwarf64, LittleEndian); + let form = constants::DW_FORM_addrx4; + let value = AttributeValue::DebugAddrIndex(DebugAddrIndex(0x0403_0201)); + test_parse_attribute(&buf, 4, &unit, form, value); + } + + #[test] + fn test_parse_attribute_loclistx() { + let mut buf = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + + let bytes_written = { + let mut writable = &mut buf[..]; + leb128::write::unsigned(&mut writable, 4097).expect("should write ok") + }; + + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_loclistx; + let value = AttributeValue::DebugLocListsIndex(DebugLocListsIndex(4097)); + test_parse_attribute(&buf, bytes_written, &unit, form, value); + } + + #[test] + fn test_parse_attribute_rnglistx() { + let mut buf = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + + let bytes_written = { + let mut writable = &mut buf[..]; + leb128::write::unsigned(&mut writable, 4097).expect("should write ok") + }; + + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_rnglistx; + let value = AttributeValue::DebugRngListsIndex(DebugRngListsIndex(4097)); + test_parse_attribute(&buf, bytes_written, &unit, form, value); + } + + #[test] + fn test_parse_attribute_indirect() { + let mut buf = [0; 100]; + + let bytes_written = { + let mut writable = &mut buf[..]; + leb128::write::unsigned(&mut writable, constants::DW_FORM_udata.0.into()) + .expect("should write udata") + + leb128::write::unsigned(&mut writable, 9_999_999).expect("should write value") + }; + + let unit = test_parse_attribute_unit_default(); + let form = constants::DW_FORM_indirect; + let value = AttributeValue::Udata(9_999_999); + test_parse_attribute(&buf, bytes_written, &unit, form, value); + } + + #[test] + fn test_parse_attribute_indirect_implicit_const() { + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let mut buf = [0; 100]; + let mut writable = &mut buf[..]; + leb128::write::unsigned(&mut writable, constants::DW_FORM_implicit_const.0.into()) + .expect("should write implicit_const"); + + let input = &mut EndianSlice::new(&buf, LittleEndian); + let spec = + AttributeSpecification::new(constants::DW_AT_low_pc, constants::DW_FORM_indirect, None); + assert_eq!( + parse_attribute(input, encoding, spec), + Err(Error::InvalidImplicitConst) + ); + } + + #[test] + fn test_attrs_iter() { + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let unit = UnitHeader::new( + encoding, + 7, + UnitType::Compilation, + DebugAbbrevOffset(0x0807_0605), + DebugInfoOffset(0).into(), + EndianSlice::new(&[], LittleEndian), + ); + + let abbrev = Abbreviation::new( + 42, + constants::DW_TAG_subprogram, + constants::DW_CHILDREN_yes, + vec![ + AttributeSpecification::new(constants::DW_AT_name, constants::DW_FORM_string, None), + AttributeSpecification::new(constants::DW_AT_low_pc, constants::DW_FORM_addr, None), + AttributeSpecification::new( + constants::DW_AT_high_pc, + constants::DW_FORM_addr, + None, + ), + ] + .into(), + ); + + // "foo", 42, 1337, 4 dangling bytes of 0xaa where children would be + let buf = [ + 0x66, 0x6f, 0x6f, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x39, 0x05, 0x00, 0x00, 0xaa, 0xaa, + 0xaa, 0xaa, + ]; + + let entry = DebuggingInformationEntry { + offset: UnitOffset(0), + attrs_slice: EndianSlice::new(&buf, LittleEndian), + attrs_len: Cell::new(None), + abbrev: &abbrev, + unit: &unit, + }; + + let mut attrs = AttrsIter { + input: EndianSlice::new(&buf, LittleEndian), + attributes: abbrev.attributes(), + entry: &entry, + }; + + match attrs.next() { + Ok(Some(attr)) => { + assert_eq!( + attr, + Attribute { + name: constants::DW_AT_name, + value: AttributeValue::String(EndianSlice::new(b"foo", LittleEndian)), + } + ); + } + otherwise => { + panic!("Unexpected parse result = {:#?}", otherwise); + } + } + + assert!(entry.attrs_len.get().is_none()); + + match attrs.next() { + Ok(Some(attr)) => { + assert_eq!( + attr, + Attribute { + name: constants::DW_AT_low_pc, + value: AttributeValue::Addr(0x2a), + } + ); + } + otherwise => { + panic!("Unexpected parse result = {:#?}", otherwise); + } + } + + assert!(entry.attrs_len.get().is_none()); + + match attrs.next() { + Ok(Some(attr)) => { + assert_eq!( + attr, + Attribute { + name: constants::DW_AT_high_pc, + value: AttributeValue::Addr(0x539), + } + ); + } + otherwise => { + panic!("Unexpected parse result = {:#?}", otherwise); + } + } + + assert!(entry.attrs_len.get().is_none()); + + assert!(attrs.next().expect("should parse next").is_none()); + assert!(entry.attrs_len.get().is_some()); + assert_eq!( + entry.attrs_len.get().expect("should have entry.attrs_len"), + buf.len() - 4 + ) + } + + #[test] + fn test_attrs_iter_incomplete() { + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let unit = UnitHeader::new( + encoding, + 7, + UnitType::Compilation, + DebugAbbrevOffset(0x0807_0605), + DebugInfoOffset(0).into(), + EndianSlice::new(&[], LittleEndian), + ); + + let abbrev = Abbreviation::new( + 42, + constants::DW_TAG_subprogram, + constants::DW_CHILDREN_yes, + vec![ + AttributeSpecification::new(constants::DW_AT_name, constants::DW_FORM_string, None), + AttributeSpecification::new(constants::DW_AT_low_pc, constants::DW_FORM_addr, None), + AttributeSpecification::new( + constants::DW_AT_high_pc, + constants::DW_FORM_addr, + None, + ), + ] + .into(), + ); + + // "foo" + let buf = [0x66, 0x6f, 0x6f, 0x00]; + + let entry = DebuggingInformationEntry { + offset: UnitOffset(0), + attrs_slice: EndianSlice::new(&buf, LittleEndian), + attrs_len: Cell::new(None), + abbrev: &abbrev, + unit: &unit, + }; + + let mut attrs = AttrsIter { + input: EndianSlice::new(&buf, LittleEndian), + attributes: abbrev.attributes(), + entry: &entry, + }; + + match attrs.next() { + Ok(Some(attr)) => { + assert_eq!( + attr, + Attribute { + name: constants::DW_AT_name, + value: AttributeValue::String(EndianSlice::new(b"foo", LittleEndian)), + } + ); + } + otherwise => { + panic!("Unexpected parse result = {:#?}", otherwise); + } + } + + assert!(entry.attrs_len.get().is_none()); + + // Return error for incomplete attribute. + assert!(attrs.next().is_err()); + assert!(entry.attrs_len.get().is_none()); + + // Return error for all subsequent calls. + assert!(attrs.next().is_err()); + assert!(attrs.next().is_err()); + assert!(attrs.next().is_err()); + assert!(attrs.next().is_err()); + assert!(entry.attrs_len.get().is_none()); + } + + fn assert_entry_name( + entry: &DebuggingInformationEntry<'_, '_, EndianSlice<'_, Endian>>, + name: &str, + ) where + Endian: Endianity, + { + let value = entry + .attr_value(constants::DW_AT_name) + .expect("Should have parsed the name attribute") + .expect("Should have found the name attribute"); + + assert_eq!( + value, + AttributeValue::String(EndianSlice::new(name.as_bytes(), Endian::default())) + ); + } + + fn assert_current_name( + cursor: &EntriesCursor<'_, '_, EndianSlice<'_, Endian>>, + name: &str, + ) where + Endian: Endianity, + { + let entry = cursor.current().expect("Should have an entry result"); + assert_entry_name(entry, name); + } + + fn assert_next_entry( + cursor: &mut EntriesCursor<'_, '_, EndianSlice<'_, Endian>>, + name: &str, + ) where + Endian: Endianity, + { + cursor + .next_entry() + .expect("Should parse next entry") + .expect("Should have an entry"); + assert_current_name(cursor, name); + } + + fn assert_next_entry_null(cursor: &mut EntriesCursor<'_, '_, EndianSlice<'_, Endian>>) + where + Endian: Endianity, + { + cursor + .next_entry() + .expect("Should parse next entry") + .expect("Should have an entry"); + assert!(cursor.current().is_none()); + } + + fn assert_next_dfs( + cursor: &mut EntriesCursor<'_, '_, EndianSlice<'_, Endian>>, + name: &str, + depth: isize, + ) where + Endian: Endianity, + { + { + let (val, entry) = cursor + .next_dfs() + .expect("Should parse next dfs") + .expect("Should not be done with traversal"); + assert_eq!(val, depth); + assert_entry_name(entry, name); + } + assert_current_name(cursor, name); + } + + fn assert_next_sibling( + cursor: &mut EntriesCursor<'_, '_, EndianSlice<'_, Endian>>, + name: &str, + ) where + Endian: Endianity, + { + { + let entry = cursor + .next_sibling() + .expect("Should parse next sibling") + .expect("Should not be done with traversal"); + assert_entry_name(entry, name); + } + assert_current_name(cursor, name); + } + + fn assert_valid_sibling_ptr(cursor: &EntriesCursor<'_, '_, EndianSlice<'_, Endian>>) + where + Endian: Endianity, + { + let sibling_ptr = cursor + .current() + .expect("Should have current entry") + .attr_value(constants::DW_AT_sibling); + match sibling_ptr { + Ok(Some(AttributeValue::UnitRef(offset))) => { + cursor + .unit + .range_from(offset..) + .expect("Sibling offset should be valid"); + } + _ => panic!("Invalid sibling pointer {:?}", sibling_ptr), + } + } + + fn entries_cursor_tests_abbrev_buf() -> Vec { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + .abbrev(1, DW_TAG_subprogram, DW_CHILDREN_yes) + .abbrev_attr(DW_AT_name, DW_FORM_string) + .abbrev_attr_null() + .abbrev_null(); + section.get_contents().unwrap() + } + + fn entries_cursor_tests_debug_info_buf() -> Vec { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + .die(1, |s| s.attr_string("001")) + .die(1, |s| s.attr_string("002")) + .die(1, |s| s.attr_string("003")) + .die_null() + .die_null() + .die(1, |s| s.attr_string("004")) + .die(1, |s| s.attr_string("005")) + .die_null() + .die(1, |s| s.attr_string("006")) + .die_null() + .die_null() + .die(1, |s| s.attr_string("007")) + .die(1, |s| s.attr_string("008")) + .die(1, |s| s.attr_string("009")) + .die_null() + .die_null() + .die_null() + .die(1, |s| s.attr_string("010")) + .die_null() + .die_null(); + let entries_buf = section.get_contents().unwrap(); + + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let mut unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Compilation, + debug_abbrev_offset: DebugAbbrevOffset(0), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(&entries_buf, LittleEndian), + }; + let section = Section::with_endian(Endian::Little).unit(&mut unit); + section.get_contents().unwrap() + } + + #[test] + fn test_cursor_next_entry_incomplete() { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + .die(1, |s| s.attr_string("001")) + .die(1, |s| s.attr_string("002")) + .die(1, |s| s); + let entries_buf = section.get_contents().unwrap(); + + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let mut unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Compilation, + debug_abbrev_offset: DebugAbbrevOffset(0), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(&entries_buf, LittleEndian), + }; + let section = Section::with_endian(Endian::Little).unit(&mut unit); + let info_buf = §ion.get_contents().unwrap(); + let debug_info = DebugInfo::new(info_buf, LittleEndian); + + let unit = debug_info + .units() + .next() + .expect("should have a unit result") + .expect("and it should be ok"); + + let abbrevs_buf = &entries_cursor_tests_abbrev_buf(); + let debug_abbrev = DebugAbbrev::new(abbrevs_buf, LittleEndian); + + let abbrevs = unit + .abbreviations(&debug_abbrev) + .expect("Should parse abbreviations"); + + let mut cursor = unit.entries(&abbrevs); + + assert_next_entry(&mut cursor, "001"); + assert_next_entry(&mut cursor, "002"); + + { + // Entry code is present, but none of the attributes. + cursor + .next_entry() + .expect("Should parse next entry") + .expect("Should have an entry"); + let entry = cursor.current().expect("Should have an entry result"); + assert!(entry.attrs().next().is_err()); + } + + assert!(cursor.next_entry().is_err()); + assert!(cursor.next_entry().is_err()); + } + + #[test] + fn test_cursor_next_entry() { + let info_buf = &entries_cursor_tests_debug_info_buf(); + let debug_info = DebugInfo::new(info_buf, LittleEndian); + + let unit = debug_info + .units() + .next() + .expect("should have a unit result") + .expect("and it should be ok"); + + let abbrevs_buf = &entries_cursor_tests_abbrev_buf(); + let debug_abbrev = DebugAbbrev::new(abbrevs_buf, LittleEndian); + + let abbrevs = unit + .abbreviations(&debug_abbrev) + .expect("Should parse abbreviations"); + + let mut cursor = unit.entries(&abbrevs); + + assert_next_entry(&mut cursor, "001"); + assert_next_entry(&mut cursor, "002"); + assert_next_entry(&mut cursor, "003"); + assert_next_entry_null(&mut cursor); + assert_next_entry_null(&mut cursor); + assert_next_entry(&mut cursor, "004"); + assert_next_entry(&mut cursor, "005"); + assert_next_entry_null(&mut cursor); + assert_next_entry(&mut cursor, "006"); + assert_next_entry_null(&mut cursor); + assert_next_entry_null(&mut cursor); + assert_next_entry(&mut cursor, "007"); + assert_next_entry(&mut cursor, "008"); + assert_next_entry(&mut cursor, "009"); + assert_next_entry_null(&mut cursor); + assert_next_entry_null(&mut cursor); + assert_next_entry_null(&mut cursor); + assert_next_entry(&mut cursor, "010"); + assert_next_entry_null(&mut cursor); + assert_next_entry_null(&mut cursor); + + assert!(cursor + .next_entry() + .expect("Should parse next entry") + .is_none()); + assert!(cursor.current().is_none()); + } + + #[test] + fn test_cursor_next_dfs() { + let info_buf = &entries_cursor_tests_debug_info_buf(); + let debug_info = DebugInfo::new(info_buf, LittleEndian); + + let unit = debug_info + .units() + .next() + .expect("should have a unit result") + .expect("and it should be ok"); + + let abbrevs_buf = &entries_cursor_tests_abbrev_buf(); + let debug_abbrev = DebugAbbrev::new(abbrevs_buf, LittleEndian); + + let abbrevs = unit + .abbreviations(&debug_abbrev) + .expect("Should parse abbreviations"); + + let mut cursor = unit.entries(&abbrevs); + + assert_next_dfs(&mut cursor, "001", 0); + assert_next_dfs(&mut cursor, "002", 1); + assert_next_dfs(&mut cursor, "003", 1); + assert_next_dfs(&mut cursor, "004", -1); + assert_next_dfs(&mut cursor, "005", 1); + assert_next_dfs(&mut cursor, "006", 0); + assert_next_dfs(&mut cursor, "007", -1); + assert_next_dfs(&mut cursor, "008", 1); + assert_next_dfs(&mut cursor, "009", 1); + assert_next_dfs(&mut cursor, "010", -2); + + assert!(cursor.next_dfs().expect("Should parse next dfs").is_none()); + assert!(cursor.current().is_none()); + } + + #[test] + fn test_cursor_next_sibling_no_sibling_ptr() { + let info_buf = &entries_cursor_tests_debug_info_buf(); + let debug_info = DebugInfo::new(info_buf, LittleEndian); + + let unit = debug_info + .units() + .next() + .expect("should have a unit result") + .expect("and it should be ok"); + + let abbrevs_buf = &entries_cursor_tests_abbrev_buf(); + let debug_abbrev = DebugAbbrev::new(abbrevs_buf, LittleEndian); + + let abbrevs = unit + .abbreviations(&debug_abbrev) + .expect("Should parse abbreviations"); + + let mut cursor = unit.entries(&abbrevs); + + assert_next_dfs(&mut cursor, "001", 0); + + // Down to the first child of the root entry. + + assert_next_dfs(&mut cursor, "002", 1); + + // Now iterate all children of the root via `next_sibling`. + + assert_next_sibling(&mut cursor, "004"); + assert_next_sibling(&mut cursor, "007"); + assert_next_sibling(&mut cursor, "010"); + + // There should be no more siblings. + + assert!(cursor + .next_sibling() + .expect("Should parse next sibling") + .is_none()); + assert!(cursor.current().is_none()); + } + + #[test] + fn test_cursor_next_sibling_continuation() { + let info_buf = &entries_cursor_tests_debug_info_buf(); + let debug_info = DebugInfo::new(info_buf, LittleEndian); + + let unit = debug_info + .units() + .next() + .expect("should have a unit result") + .expect("and it should be ok"); + + let abbrevs_buf = &entries_cursor_tests_abbrev_buf(); + let debug_abbrev = DebugAbbrev::new(abbrevs_buf, LittleEndian); + + let abbrevs = unit + .abbreviations(&debug_abbrev) + .expect("Should parse abbreviations"); + + let mut cursor = unit.entries(&abbrevs); + + assert_next_dfs(&mut cursor, "001", 0); + + // Down to the first child of the root entry. + + assert_next_dfs(&mut cursor, "002", 1); + + // Get the next sibling, then iterate its children + + assert_next_sibling(&mut cursor, "004"); + assert_next_dfs(&mut cursor, "005", 1); + assert_next_sibling(&mut cursor, "006"); + assert!(cursor + .next_sibling() + .expect("Should parse next sibling") + .is_none()); + assert!(cursor + .next_sibling() + .expect("Should parse next sibling") + .is_none()); + assert!(cursor + .next_sibling() + .expect("Should parse next sibling") + .is_none()); + assert!(cursor + .next_sibling() + .expect("Should parse next sibling") + .is_none()); + + // And we should be able to continue with the children of the root entry. + + assert_next_dfs(&mut cursor, "007", -1); + assert_next_sibling(&mut cursor, "010"); + + // There should be no more siblings. + + assert!(cursor + .next_sibling() + .expect("Should parse next sibling") + .is_none()); + assert!(cursor.current().is_none()); + } + + fn entries_cursor_sibling_abbrev_buf() -> Vec { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + .abbrev(1, DW_TAG_subprogram, DW_CHILDREN_yes) + .abbrev_attr(DW_AT_name, DW_FORM_string) + .abbrev_attr(DW_AT_sibling, DW_FORM_ref1) + .abbrev_attr_null() + .abbrev(2, DW_TAG_subprogram, DW_CHILDREN_yes) + .abbrev_attr(DW_AT_name, DW_FORM_string) + .abbrev_attr_null() + .abbrev_null(); + section.get_contents().unwrap() + } + + fn entries_cursor_sibling_entries_buf(header_size: usize) -> Vec { + let start = Label::new(); + let sibling004_ref = Label::new(); + let sibling004 = Label::new(); + let sibling009_ref = Label::new(); + let sibling009 = Label::new(); + + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + .mark(&start) + .die(2, |s| s.attr_string("001")) + // Valid sibling attribute. + .die(1, |s| s.attr_string("002").D8(&sibling004_ref)) + // Invalid code to ensure the sibling attribute was used. + .die(10, |s| s.attr_string("003")) + .die_null() + .die_null() + .mark(&sibling004) + // Invalid sibling attribute. + .die(1, |s| s.attr_string("004").attr_ref1(255)) + .die(2, |s| s.attr_string("005")) + .die_null() + .die_null() + // Sibling attribute in child only. + .die(2, |s| s.attr_string("006")) + // Valid sibling attribute. + .die(1, |s| s.attr_string("007").D8(&sibling009_ref)) + // Invalid code to ensure the sibling attribute was used. + .die(10, |s| s.attr_string("008")) + .die_null() + .die_null() + .mark(&sibling009) + .die(2, |s| s.attr_string("009")) + .die_null() + .die_null() + // No sibling attribute. + .die(2, |s| s.attr_string("010")) + .die(2, |s| s.attr_string("011")) + .die_null() + .die_null() + .die_null(); + + let offset = header_size as u64 + (&sibling004 - &start) as u64; + sibling004_ref.set_const(offset); + + let offset = header_size as u64 + (&sibling009 - &start) as u64; + sibling009_ref.set_const(offset); + + section.get_contents().unwrap() + } + + fn test_cursor_next_sibling_with_ptr( + cursor: &mut EntriesCursor<'_, '_, EndianSlice<'_, LittleEndian>>, + ) { + assert_next_dfs(cursor, "001", 0); + + // Down to the first child of the root. + + assert_next_dfs(cursor, "002", 1); + + // Now iterate all children of the root via `next_sibling`. + + assert_valid_sibling_ptr(cursor); + assert_next_sibling(cursor, "004"); + assert_next_sibling(cursor, "006"); + assert_next_sibling(cursor, "010"); + + // There should be no more siblings. + + assert!(cursor + .next_sibling() + .expect("Should parse next sibling") + .is_none()); + assert!(cursor.current().is_none()); + } + + #[test] + fn test_debug_info_next_sibling_with_ptr() { + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + + let mut unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Compilation, + debug_abbrev_offset: DebugAbbrevOffset(0), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(&[], LittleEndian), + }; + let header_size = unit.size_of_header(); + let entries_buf = entries_cursor_sibling_entries_buf(header_size); + unit.entries_buf = EndianSlice::new(&entries_buf, LittleEndian); + let section = Section::with_endian(Endian::Little).unit(&mut unit); + let info_buf = section.get_contents().unwrap(); + let debug_info = DebugInfo::new(&info_buf, LittleEndian); + + let unit = debug_info + .units() + .next() + .expect("should have a unit result") + .expect("and it should be ok"); + + let abbrev_buf = entries_cursor_sibling_abbrev_buf(); + let debug_abbrev = DebugAbbrev::new(&abbrev_buf, LittleEndian); + + let abbrevs = unit + .abbreviations(&debug_abbrev) + .expect("Should parse abbreviations"); + + let mut cursor = unit.entries(&abbrevs); + test_cursor_next_sibling_with_ptr(&mut cursor); + } + + #[test] + fn test_debug_types_next_sibling_with_ptr() { + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let mut unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Type { + type_signature: DebugTypeSignature(0), + type_offset: UnitOffset(0), + }, + debug_abbrev_offset: DebugAbbrevOffset(0), + unit_offset: DebugTypesOffset(0).into(), + entries_buf: EndianSlice::new(&[], LittleEndian), + }; + let header_size = unit.size_of_header(); + let entries_buf = entries_cursor_sibling_entries_buf(header_size); + unit.entries_buf = EndianSlice::new(&entries_buf, LittleEndian); + let section = Section::with_endian(Endian::Little).unit(&mut unit); + let info_buf = section.get_contents().unwrap(); + let debug_types = DebugTypes::new(&info_buf, LittleEndian); + + let unit = debug_types + .units() + .next() + .expect("should have a unit result") + .expect("and it should be ok"); + + let abbrev_buf = entries_cursor_sibling_abbrev_buf(); + let debug_abbrev = DebugAbbrev::new(&abbrev_buf, LittleEndian); + + let abbrevs = unit + .abbreviations(&debug_abbrev) + .expect("Should parse abbreviations"); + + let mut cursor = unit.entries(&abbrevs); + test_cursor_next_sibling_with_ptr(&mut cursor); + } + + #[test] + fn test_entries_at_offset() { + let info_buf = &entries_cursor_tests_debug_info_buf(); + let debug_info = DebugInfo::new(info_buf, LittleEndian); + + let unit = debug_info + .units() + .next() + .expect("should have a unit result") + .expect("and it should be ok"); + + let abbrevs_buf = &entries_cursor_tests_abbrev_buf(); + let debug_abbrev = DebugAbbrev::new(abbrevs_buf, LittleEndian); + + let abbrevs = unit + .abbreviations(&debug_abbrev) + .expect("Should parse abbreviations"); + + let mut cursor = unit + .entries_at_offset(&abbrevs, UnitOffset(unit.header_size())) + .unwrap(); + assert_next_entry(&mut cursor, "001"); + + let cursor = unit.entries_at_offset(&abbrevs, UnitOffset(0)); + match cursor { + Err(Error::OffsetOutOfBounds) => {} + otherwise => { + panic!("Unexpected parse result = {:#?}", otherwise); + } + } + } + + fn entries_tree_tests_debug_abbrevs_buf() -> Vec { + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + .abbrev(1, DW_TAG_subprogram, DW_CHILDREN_yes) + .abbrev_attr(DW_AT_name, DW_FORM_string) + .abbrev_attr_null() + .abbrev(2, DW_TAG_subprogram, DW_CHILDREN_no) + .abbrev_attr(DW_AT_name, DW_FORM_string) + .abbrev_attr_null() + .abbrev_null() + .get_contents() + .unwrap(); + section + } + + fn entries_tree_tests_debug_info_buf(header_size: usize) -> (Vec, UnitOffset) { + let start = Label::new(); + let entry2 = Label::new(); + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + .mark(&start) + .die(1, |s| s.attr_string("root")) + .die(1, |s| s.attr_string("1")) + .die(1, |s| s.attr_string("1a")) + .die_null() + .die(2, |s| s.attr_string("1b")) + .die_null() + .mark(&entry2) + .die(1, |s| s.attr_string("2")) + .die(1, |s| s.attr_string("2a")) + .die(1, |s| s.attr_string("2a1")) + .die_null() + .die_null() + .die(1, |s| s.attr_string("2b")) + .die(2, |s| s.attr_string("2b1")) + .die_null() + .die_null() + .die(1, |s| s.attr_string("3")) + .die(1, |s| s.attr_string("3a")) + .die(2, |s| s.attr_string("3a1")) + .die(2, |s| s.attr_string("3a2")) + .die_null() + .die(2, |s| s.attr_string("3b")) + .die_null() + .die(2, |s| s.attr_string("final")) + .die_null() + .get_contents() + .unwrap(); + let entry2 = UnitOffset(header_size + (&entry2 - &start) as usize); + (section, entry2) + } + + #[test] + fn test_entries_tree() { + fn assert_entry<'input, 'abbrev, 'unit, 'tree, Endian>( + node: Result< + Option>>, + >, + name: &str, + ) -> EntriesTreeIter<'abbrev, 'unit, 'tree, EndianSlice<'input, Endian>> + where + Endian: Endianity, + { + let node = node + .expect("Should parse entry") + .expect("Should have entry"); + assert_entry_name(node.entry(), name); + node.children() + } + + fn assert_null( + node: Result>>>, + ) { + match node { + Ok(None) => {} + otherwise => { + panic!("Unexpected parse result = {:#?}", otherwise); + } + } + } + + let abbrevs_buf = entries_tree_tests_debug_abbrevs_buf(); + let debug_abbrev = DebugAbbrev::new(&abbrevs_buf, LittleEndian); + + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let mut unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Compilation, + debug_abbrev_offset: DebugAbbrevOffset(0), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(&[], LittleEndian), + }; + let header_size = unit.size_of_header(); + let (entries_buf, entry2) = entries_tree_tests_debug_info_buf(header_size); + unit.entries_buf = EndianSlice::new(&entries_buf, LittleEndian); + let info_buf = Section::with_endian(Endian::Little) + .unit(&mut unit) + .get_contents() + .unwrap(); + let debug_info = DebugInfo::new(&info_buf, LittleEndian); + + let unit = debug_info + .units() + .next() + .expect("Should parse unit") + .expect("and it should be some"); + let abbrevs = unit + .abbreviations(&debug_abbrev) + .expect("Should parse abbreviations"); + let mut tree = unit + .entries_tree(&abbrevs, None) + .expect("Should have entries tree"); + + // Test we can restart iteration of the tree. + { + let mut iter = assert_entry(tree.root().map(Some), "root"); + assert_entry(iter.next(), "1"); + } + { + let mut iter = assert_entry(tree.root().map(Some), "root"); + assert_entry(iter.next(), "1"); + } + + let mut iter = assert_entry(tree.root().map(Some), "root"); + { + // Test iteration with children. + let mut iter = assert_entry(iter.next(), "1"); + { + // Test iteration with children flag, but no children. + let mut iter = assert_entry(iter.next(), "1a"); + assert_null(iter.next()); + assert_null(iter.next()); + } + { + // Test iteration without children flag. + let mut iter = assert_entry(iter.next(), "1b"); + assert_null(iter.next()); + assert_null(iter.next()); + } + assert_null(iter.next()); + assert_null(iter.next()); + } + { + // Test skipping over children. + let mut iter = assert_entry(iter.next(), "2"); + assert_entry(iter.next(), "2a"); + assert_entry(iter.next(), "2b"); + assert_null(iter.next()); + } + { + // Test skipping after partial iteration. + let mut iter = assert_entry(iter.next(), "3"); + { + let mut iter = assert_entry(iter.next(), "3a"); + assert_entry(iter.next(), "3a1"); + // Parent iter should be able to skip over "3a2". + } + assert_entry(iter.next(), "3b"); + assert_null(iter.next()); + } + assert_entry(iter.next(), "final"); + assert_null(iter.next()); + + // Test starting at an offset. + let mut tree = unit + .entries_tree(&abbrevs, Some(entry2)) + .expect("Should have entries tree"); + let mut iter = assert_entry(tree.root().map(Some), "2"); + assert_entry(iter.next(), "2a"); + assert_entry(iter.next(), "2b"); + assert_null(iter.next()); + } + + #[test] + fn test_entries_raw() { + fn assert_abbrev<'abbrev, Endian>( + entries: &mut EntriesRaw<'abbrev, '_, EndianSlice<'_, Endian>>, + tag: DwTag, + ) -> &'abbrev Abbreviation + where + Endian: Endianity, + { + let abbrev = entries + .read_abbreviation() + .expect("Should parse abbrev") + .expect("Should have abbrev"); + assert_eq!(abbrev.tag(), tag); + abbrev + } + + fn assert_null(entries: &mut EntriesRaw<'_, '_, EndianSlice<'_, Endian>>) + where + Endian: Endianity, + { + match entries.read_abbreviation() { + Ok(None) => {} + otherwise => { + panic!("Unexpected parse result = {:#?}", otherwise); + } + } + } + + fn assert_attr( + entries: &mut EntriesRaw<'_, '_, EndianSlice<'_, Endian>>, + spec: Option, + name: DwAt, + value: &str, + ) where + Endian: Endianity, + { + let spec = spec.expect("Should have attribute specification"); + let attr = entries + .read_attribute(spec) + .expect("Should parse attribute"); + assert_eq!(attr.name(), name); + assert_eq!( + attr.value(), + AttributeValue::String(EndianSlice::new(value.as_bytes(), Endian::default())) + ); + } + + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + .abbrev(1, DW_TAG_subprogram, DW_CHILDREN_yes) + .abbrev_attr(DW_AT_name, DW_FORM_string) + .abbrev_attr(DW_AT_linkage_name, DW_FORM_string) + .abbrev_attr_null() + .abbrev(2, DW_TAG_variable, DW_CHILDREN_no) + .abbrev_attr(DW_AT_name, DW_FORM_string) + .abbrev_attr_null() + .abbrev_null(); + let abbrevs_buf = section.get_contents().unwrap(); + let debug_abbrev = DebugAbbrev::new(&abbrevs_buf, LittleEndian); + + #[rustfmt::skip] + let section = Section::with_endian(Endian::Little) + .die(1, |s| s.attr_string("f1").attr_string("l1")) + .die(2, |s| s.attr_string("v1")) + .die(2, |s| s.attr_string("v2")) + .die(1, |s| s.attr_string("f2").attr_string("l2")) + .die_null() + .die_null(); + let entries_buf = section.get_contents().unwrap(); + + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let mut unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Compilation, + debug_abbrev_offset: DebugAbbrevOffset(0), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(&entries_buf, LittleEndian), + }; + let section = Section::with_endian(Endian::Little).unit(&mut unit); + let info_buf = section.get_contents().unwrap(); + let debug_info = DebugInfo::new(&info_buf, LittleEndian); + + let unit = debug_info + .units() + .next() + .expect("should have a unit result") + .expect("and it should be ok"); + + let abbrevs = unit + .abbreviations(&debug_abbrev) + .expect("Should parse abbreviations"); + + let mut entries = unit + .entries_raw(&abbrevs, None) + .expect("Should have entries"); + + assert_eq!(entries.next_depth(), 0); + let abbrev = assert_abbrev(&mut entries, DW_TAG_subprogram); + let mut attrs = abbrev.attributes().iter().copied(); + assert_attr(&mut entries, attrs.next(), DW_AT_name, "f1"); + assert_attr(&mut entries, attrs.next(), DW_AT_linkage_name, "l1"); + assert!(attrs.next().is_none()); + + assert_eq!(entries.next_depth(), 1); + let abbrev = assert_abbrev(&mut entries, DW_TAG_variable); + let mut attrs = abbrev.attributes().iter().copied(); + assert_attr(&mut entries, attrs.next(), DW_AT_name, "v1"); + assert!(attrs.next().is_none()); + + assert_eq!(entries.next_depth(), 1); + let abbrev = assert_abbrev(&mut entries, DW_TAG_variable); + let mut attrs = abbrev.attributes().iter().copied(); + assert_attr(&mut entries, attrs.next(), DW_AT_name, "v2"); + assert!(attrs.next().is_none()); + + assert_eq!(entries.next_depth(), 1); + let abbrev = assert_abbrev(&mut entries, DW_TAG_subprogram); + let mut attrs = abbrev.attributes().iter().copied(); + assert_attr(&mut entries, attrs.next(), DW_AT_name, "f2"); + assert_attr(&mut entries, attrs.next(), DW_AT_linkage_name, "l2"); + assert!(attrs.next().is_none()); + + assert_eq!(entries.next_depth(), 2); + assert_null(&mut entries); + + assert_eq!(entries.next_depth(), 1); + assert_null(&mut entries); + + assert_eq!(entries.next_depth(), 0); + assert!(entries.is_empty()); + } + + #[test] + fn test_debug_info_offset() { + let padding = &[0; 10]; + let entries = &[0; 20]; + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let mut unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Compilation, + debug_abbrev_offset: DebugAbbrevOffset(0), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(entries, LittleEndian), + }; + Section::with_endian(Endian::Little) + .append_bytes(padding) + .unit(&mut unit); + let offset = padding.len(); + let header_length = unit.size_of_header(); + let length = unit.length_including_self(); + assert_eq!(DebugInfoOffset(0).to_unit_offset(&unit), None); + assert_eq!(DebugInfoOffset(offset - 1).to_unit_offset(&unit), None); + assert_eq!(DebugInfoOffset(offset).to_unit_offset(&unit), None); + assert_eq!( + DebugInfoOffset(offset + header_length - 1).to_unit_offset(&unit), + None + ); + assert_eq!( + DebugInfoOffset(offset + header_length).to_unit_offset(&unit), + Some(UnitOffset(header_length)) + ); + assert_eq!( + DebugInfoOffset(offset + length - 1).to_unit_offset(&unit), + Some(UnitOffset(length - 1)) + ); + assert_eq!(DebugInfoOffset(offset + length).to_unit_offset(&unit), None); + assert_eq!( + UnitOffset(header_length).to_debug_info_offset(&unit), + Some(DebugInfoOffset(offset + header_length)) + ); + assert_eq!( + UnitOffset(length - 1).to_debug_info_offset(&unit), + Some(DebugInfoOffset(offset + length - 1)) + ); + } + + #[test] + fn test_debug_types_offset() { + let padding = &[0; 10]; + let entries = &[0; 20]; + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let mut unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Type { + type_signature: DebugTypeSignature(0), + type_offset: UnitOffset(0), + }, + debug_abbrev_offset: DebugAbbrevOffset(0), + unit_offset: DebugTypesOffset(0).into(), + entries_buf: EndianSlice::new(entries, LittleEndian), + }; + Section::with_endian(Endian::Little) + .append_bytes(padding) + .unit(&mut unit); + let offset = padding.len(); + let header_length = unit.size_of_header(); + let length = unit.length_including_self(); + assert_eq!(DebugTypesOffset(0).to_unit_offset(&unit), None); + assert_eq!(DebugTypesOffset(offset - 1).to_unit_offset(&unit), None); + assert_eq!(DebugTypesOffset(offset).to_unit_offset(&unit), None); + assert_eq!( + DebugTypesOffset(offset + header_length - 1).to_unit_offset(&unit), + None + ); + assert_eq!( + DebugTypesOffset(offset + header_length).to_unit_offset(&unit), + Some(UnitOffset(header_length)) + ); + assert_eq!( + DebugTypesOffset(offset + length - 1).to_unit_offset(&unit), + Some(UnitOffset(length - 1)) + ); + assert_eq!( + DebugTypesOffset(offset + length).to_unit_offset(&unit), + None + ); + assert_eq!( + UnitOffset(header_length).to_debug_types_offset(&unit), + Some(DebugTypesOffset(offset + header_length)) + ); + assert_eq!( + UnitOffset(length - 1).to_debug_types_offset(&unit), + Some(DebugTypesOffset(offset + length - 1)) + ); + } + + #[test] + fn test_length_including_self() { + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let mut unit = UnitHeader { + encoding, + unit_length: 0, + unit_type: UnitType::Compilation, + debug_abbrev_offset: DebugAbbrevOffset(0), + unit_offset: DebugInfoOffset(0).into(), + entries_buf: EndianSlice::new(&[], LittleEndian), + }; + unit.encoding.format = Format::Dwarf32; + assert_eq!(unit.length_including_self(), 4); + unit.encoding.format = Format::Dwarf64; + assert_eq!(unit.length_including_self(), 12); + unit.unit_length = 10; + assert_eq!(unit.length_including_self(), 22); + } + + #[test] + fn test_parse_type_unit_abbrevs() { + let types_buf = [ + // Type unit header + 0x25, 0x00, 0x00, 0x00, // 32-bit unit length = 37 + 0x04, 0x00, // Version 4 + 0x00, 0x00, 0x00, 0x00, // debug_abbrev_offset + 0x04, // Address size + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, // Type signature + 0x01, 0x02, 0x03, 0x04, // Type offset + // DIEs + // Abbreviation code + 0x01, // Attribute of form DW_FORM_string = "foo\0" + 0x66, 0x6f, 0x6f, 0x00, // Children + // Abbreviation code + 0x01, // Attribute of form DW_FORM_string = "foo\0" + 0x66, 0x6f, 0x6f, 0x00, // Children + // Abbreviation code + 0x01, // Attribute of form DW_FORM_string = "foo\0" + 0x66, 0x6f, 0x6f, 0x00, // Children + 0x00, // End of children + 0x00, // End of children + 0x00, // End of children + ]; + let debug_types = DebugTypes::new(&types_buf, LittleEndian); + + let abbrev_buf = [ + // Code + 0x01, // DW_TAG_subprogram + 0x2e, // DW_CHILDREN_yes + 0x01, // Begin attributes + 0x03, // Attribute name = DW_AT_name + 0x08, // Attribute form = DW_FORM_string + 0x00, 0x00, // End attributes + 0x00, // Null terminator + ]; + + let get_some_type_unit = || debug_types.units().next().unwrap().unwrap(); + + let unit = get_some_type_unit(); + + let read_debug_abbrev_section_somehow = || &abbrev_buf; + let debug_abbrev = DebugAbbrev::new(read_debug_abbrev_section_somehow(), LittleEndian); + let _abbrevs_for_unit = unit.abbreviations(&debug_abbrev).unwrap(); + } +} diff --git a/third_party/rust/gimli/src/read/util.rs b/third_party/rust/gimli/src/read/util.rs new file mode 100644 index 000000000000..041ca5af90da --- /dev/null +++ b/third_party/rust/gimli/src/read/util.rs @@ -0,0 +1,283 @@ +#[cfg(feature = "read")] +use alloc::boxed::Box; +#[cfg(feature = "read")] +use alloc::vec::Vec; +use core::fmt; +use core::mem::MaybeUninit; +use core::ops; +use core::ptr; +use core::slice; + +mod sealed { + /// # Safety + /// Implementer must not modify the content in storage. + pub unsafe trait Sealed { + type Storage; + + fn new_storage() -> Self::Storage; + + fn grow(_storage: &mut Self::Storage, _additional: usize) -> Result<(), CapacityFull> { + Err(CapacityFull) + } + } + + #[derive(Clone, Copy, Debug)] + pub struct CapacityFull; +} + +use sealed::*; + +/// Marker trait for types that can be used as backing storage when a growable array type is needed. +/// +/// This trait is sealed and cannot be implemented for types outside this crate. +pub trait ArrayLike: Sealed { + /// Type of the elements being stored. + type Item; + + #[doc(hidden)] + fn as_slice(storage: &Self::Storage) -> &[MaybeUninit]; + + #[doc(hidden)] + fn as_mut_slice(storage: &mut Self::Storage) -> &mut [MaybeUninit]; +} + +// Use macro since const generics can't be used due to MSRV. +macro_rules! impl_array { + () => {}; + ($n:literal $($rest:tt)*) => { + // SAFETY: does not modify the content in storage. + unsafe impl Sealed for [T; $n] { + type Storage = [MaybeUninit; $n]; + + fn new_storage() -> Self::Storage { + // SAFETY: An uninitialized `[MaybeUninit<_>; _]` is valid. + unsafe { MaybeUninit::uninit().assume_init() } + } + } + + impl ArrayLike for [T; $n] { + type Item = T; + + fn as_slice(storage: &Self::Storage) -> &[MaybeUninit] { + storage + } + + fn as_mut_slice(storage: &mut Self::Storage) -> &mut [MaybeUninit] { + storage + } + } + + impl_array!($($rest)*); + } +} + +#[cfg(feature = "read")] +macro_rules! impl_box { + () => {}; + ($n:literal $($rest:tt)*) => { + // SAFETY: does not modify the content in storage. + unsafe impl Sealed for Box<[T; $n]> { + type Storage = Box<[MaybeUninit; $n]>; + + fn new_storage() -> Self::Storage { + // SAFETY: An uninitialized `[MaybeUninit<_>; _]` is valid. + Box::new(unsafe { MaybeUninit::uninit().assume_init() }) + } + } + + impl ArrayLike for Box<[T; $n]> { + type Item = T; + + fn as_slice(storage: &Self::Storage) -> &[MaybeUninit] { + &storage[..] + } + + fn as_mut_slice(storage: &mut Self::Storage) -> &mut [MaybeUninit] { + &mut storage[..] + } + } + + impl_box!($($rest)*); + } +} + +impl_array!(0 1 2 3 4 8 16 32 64 128 192); +#[cfg(feature = "read")] +impl_box!(0 1 2 3 4 8 16 32 64 128 192); + +#[cfg(feature = "read")] +unsafe impl Sealed for Vec { + type Storage = Box<[MaybeUninit]>; + + fn new_storage() -> Self::Storage { + Box::new([]) + } + + fn grow(storage: &mut Self::Storage, additional: usize) -> Result<(), CapacityFull> { + let mut vec: Vec<_> = core::mem::replace(storage, Box::new([])).into(); + vec.reserve(additional); + // SAFETY: This is a `Vec` of `MaybeUninit`. + unsafe { vec.set_len(vec.capacity()) }; + *storage = vec.into_boxed_slice(); + Ok(()) + } +} + +#[cfg(feature = "read")] +impl ArrayLike for Vec { + type Item = T; + + fn as_slice(storage: &Self::Storage) -> &[MaybeUninit] { + storage + } + + fn as_mut_slice(storage: &mut Self::Storage) -> &mut [MaybeUninit] { + storage + } +} + +pub(crate) struct ArrayVec { + storage: A::Storage, + len: usize, +} + +impl ArrayVec { + pub fn new() -> Self { + Self { + storage: A::new_storage(), + len: 0, + } + } + + pub fn clear(&mut self) { + let ptr: *mut [A::Item] = &mut **self; + // Set length first so the type invariant is upheld even if `drop_in_place` panicks. + self.len = 0; + // SAFETY: `ptr` contains valid elements only and we "forget" them by setting the length. + unsafe { ptr::drop_in_place(ptr) }; + } + + pub fn try_push(&mut self, value: A::Item) -> Result<(), CapacityFull> { + let mut storage = A::as_mut_slice(&mut self.storage); + if self.len >= storage.len() { + A::grow(&mut self.storage, 1)?; + storage = A::as_mut_slice(&mut self.storage); + } + + storage[self.len] = MaybeUninit::new(value); + self.len += 1; + Ok(()) + } + + pub fn try_insert(&mut self, index: usize, element: A::Item) -> Result<(), CapacityFull> { + assert!(index <= self.len); + + let mut storage = A::as_mut_slice(&mut self.storage); + if self.len >= storage.len() { + A::grow(&mut self.storage, 1)?; + storage = A::as_mut_slice(&mut self.storage); + } + + // SAFETY: storage[index] is filled later. + unsafe { + let p = storage.as_mut_ptr().add(index); + core::ptr::copy(p as *const _, p.add(1), self.len - index); + } + storage[index] = MaybeUninit::new(element); + self.len += 1; + Ok(()) + } + + pub fn pop(&mut self) -> Option { + if self.len == 0 { + None + } else { + self.len -= 1; + // SAFETY: this element is valid and we "forget" it by setting the length. + Some(unsafe { A::as_slice(&self.storage)[self.len].as_ptr().read() }) + } + } + + pub fn swap_remove(&mut self, index: usize) -> A::Item { + assert!(self.len > 0); + A::as_mut_slice(&mut self.storage).swap(index, self.len - 1); + self.pop().unwrap() + } +} + +#[cfg(feature = "read")] +impl ArrayVec> { + pub fn into_vec(mut self) -> Vec { + let len = core::mem::replace(&mut self.len, 0); + let storage = core::mem::replace(&mut self.storage, Box::new([])); + let slice = Box::leak(storage); + debug_assert!(len <= slice.len()); + // SAFETY: valid elements. + unsafe { Vec::from_raw_parts(slice.as_mut_ptr() as *mut T, len, slice.len()) } + } +} + +impl Drop for ArrayVec { + fn drop(&mut self) { + self.clear(); + } +} + +impl Default for ArrayVec { + fn default() -> Self { + Self::new() + } +} + +impl ops::Deref for ArrayVec { + type Target = [A::Item]; + + fn deref(&self) -> &[A::Item] { + let slice = &A::as_slice(&self.storage); + debug_assert!(self.len <= slice.len()); + // SAFETY: valid elements. + unsafe { slice::from_raw_parts(slice.as_ptr() as _, self.len) } + } +} + +impl ops::DerefMut for ArrayVec { + fn deref_mut(&mut self) -> &mut [A::Item] { + let slice = &mut A::as_mut_slice(&mut self.storage); + debug_assert!(self.len <= slice.len()); + // SAFETY: valid elements. + unsafe { slice::from_raw_parts_mut(slice.as_mut_ptr() as _, self.len) } + } +} + +impl Clone for ArrayVec +where + A::Item: Clone, +{ + fn clone(&self) -> Self { + let mut new = Self::default(); + for value in &**self { + new.try_push(value.clone()).unwrap(); + } + new + } +} + +impl PartialEq for ArrayVec +where + A::Item: PartialEq, +{ + fn eq(&self, other: &Self) -> bool { + **self == **other + } +} + +impl Eq for ArrayVec where A::Item: Eq {} + +impl fmt::Debug for ArrayVec +where + A::Item: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} diff --git a/third_party/rust/gimli/src/read/value.rs b/third_party/rust/gimli/src/read/value.rs new file mode 100644 index 000000000000..114736dbbed8 --- /dev/null +++ b/third_party/rust/gimli/src/read/value.rs @@ -0,0 +1,1621 @@ +//! Definitions for values used in DWARF expressions. + +use crate::constants; +#[cfg(feature = "read")] +use crate::read::{AttributeValue, DebuggingInformationEntry}; +use crate::read::{Error, Reader, Result}; + +/// Convert a u64 to an i64, with sign extension if required. +/// +/// This is primarily used when needing to treat `Value::Generic` +/// as a signed value. +#[inline] +fn sign_extend(value: u64, mask: u64) -> i64 { + let value = (value & mask) as i64; + let sign = ((mask >> 1) + 1) as i64; + (value ^ sign).wrapping_sub(sign) +} + +#[inline] +fn mask_bit_size(addr_mask: u64) -> u32 { + 64 - addr_mask.leading_zeros() +} + +/// The type of an entry on the DWARF stack. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ValueType { + /// The generic type, which is address-sized and of unspecified sign, + /// as specified in the DWARF 5 standard, section 2.5.1. + /// This type is also used to represent address base types. + Generic, + /// Signed 8-bit integer type. + I8, + /// Unsigned 8-bit integer type. + U8, + /// Signed 16-bit integer type. + I16, + /// Unsigned 16-bit integer type. + U16, + /// Signed 32-bit integer type. + I32, + /// Unsigned 32-bit integer type. + U32, + /// Signed 64-bit integer type. + I64, + /// Unsigned 64-bit integer type. + U64, + /// 32-bit floating point type. + F32, + /// 64-bit floating point type. + F64, +} + +/// The value of an entry on the DWARF stack. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Value { + /// A generic value, which is address-sized and of unspecified sign. + Generic(u64), + /// A signed 8-bit integer value. + I8(i8), + /// An unsigned 8-bit integer value. + U8(u8), + /// A signed 16-bit integer value. + I16(i16), + /// An unsigned 16-bit integer value. + U16(u16), + /// A signed 32-bit integer value. + I32(i32), + /// An unsigned 32-bit integer value. + U32(u32), + /// A signed 64-bit integer value. + I64(i64), + /// An unsigned 64-bit integer value. + U64(u64), + /// A 32-bit floating point value. + F32(f32), + /// A 64-bit floating point value. + F64(f64), +} + +impl ValueType { + /// The size in bits of a value for this type. + pub fn bit_size(self, addr_mask: u64) -> u32 { + match self { + ValueType::Generic => mask_bit_size(addr_mask), + ValueType::I8 | ValueType::U8 => 8, + ValueType::I16 | ValueType::U16 => 16, + ValueType::I32 | ValueType::U32 | ValueType::F32 => 32, + ValueType::I64 | ValueType::U64 | ValueType::F64 => 64, + } + } + + /// Construct a `ValueType` from the attributes of a base type DIE. + pub fn from_encoding(encoding: constants::DwAte, byte_size: u64) -> Option { + Some(match (encoding, byte_size) { + (constants::DW_ATE_signed, 1) => ValueType::I8, + (constants::DW_ATE_signed, 2) => ValueType::I16, + (constants::DW_ATE_signed, 4) => ValueType::I32, + (constants::DW_ATE_signed, 8) => ValueType::I64, + (constants::DW_ATE_unsigned, 1) => ValueType::U8, + (constants::DW_ATE_unsigned, 2) => ValueType::U16, + (constants::DW_ATE_unsigned, 4) => ValueType::U32, + (constants::DW_ATE_unsigned, 8) => ValueType::U64, + (constants::DW_ATE_float, 4) => ValueType::F32, + (constants::DW_ATE_float, 8) => ValueType::F64, + _ => return None, + }) + } + + /// Construct a `ValueType` from a base type DIE. + #[cfg(feature = "read")] + pub fn from_entry( + entry: &DebuggingInformationEntry<'_, '_, R>, + ) -> Result> { + if entry.tag() != constants::DW_TAG_base_type { + return Ok(None); + } + let mut encoding = None; + let mut byte_size = None; + let mut endianity = constants::DW_END_default; + let mut attrs = entry.attrs(); + while let Some(attr) = attrs.next()? { + match attr.name() { + constants::DW_AT_byte_size => byte_size = attr.udata_value(), + constants::DW_AT_encoding => { + if let AttributeValue::Encoding(x) = attr.value() { + encoding = Some(x); + } + } + constants::DW_AT_endianity => { + if let AttributeValue::Endianity(x) = attr.value() { + endianity = x; + } + } + _ => {} + } + } + + if endianity != constants::DW_END_default { + // TODO: we could check if it matches the reader endianity, + // but normally it would use DW_END_default in that case. + return Ok(None); + } + + if let (Some(encoding), Some(byte_size)) = (encoding, byte_size) { + Ok(ValueType::from_encoding(encoding, byte_size)) + } else { + Ok(None) + } + } +} + +impl Value { + /// Return the `ValueType` corresponding to this `Value`. + pub fn value_type(&self) -> ValueType { + match *self { + Value::Generic(_) => ValueType::Generic, + Value::I8(_) => ValueType::I8, + Value::U8(_) => ValueType::U8, + Value::I16(_) => ValueType::I16, + Value::U16(_) => ValueType::U16, + Value::I32(_) => ValueType::I32, + Value::U32(_) => ValueType::U32, + Value::I64(_) => ValueType::I64, + Value::U64(_) => ValueType::U64, + Value::F32(_) => ValueType::F32, + Value::F64(_) => ValueType::F64, + } + } + + /// Read a `Value` with the given `value_type` from a `Reader`. + pub fn parse(value_type: ValueType, mut bytes: R) -> Result { + let value = match value_type { + ValueType::I8 => Value::I8(bytes.read_i8()?), + ValueType::U8 => Value::U8(bytes.read_u8()?), + ValueType::I16 => Value::I16(bytes.read_i16()?), + ValueType::U16 => Value::U16(bytes.read_u16()?), + ValueType::I32 => Value::I32(bytes.read_i32()?), + ValueType::U32 => Value::U32(bytes.read_u32()?), + ValueType::I64 => Value::I64(bytes.read_i64()?), + ValueType::U64 => Value::U64(bytes.read_u64()?), + ValueType::F32 => Value::F32(bytes.read_f32()?), + ValueType::F64 => Value::F64(bytes.read_f64()?), + _ => return Err(Error::UnsupportedTypeOperation), + }; + Ok(value) + } + + /// Convert a `Value` to a `u64`. + /// + /// The `ValueType` of `self` must be integral. + /// Values are sign extended if the source value is signed. + pub fn to_u64(self, addr_mask: u64) -> Result { + let value = match self { + Value::Generic(value) => value & addr_mask, + Value::I8(value) => value as u64, + Value::U8(value) => u64::from(value), + Value::I16(value) => value as u64, + Value::U16(value) => u64::from(value), + Value::I32(value) => value as u64, + Value::U32(value) => u64::from(value), + Value::I64(value) => value as u64, + Value::U64(value) => value, + _ => return Err(Error::IntegralTypeRequired), + }; + Ok(value) + } + + /// Create a `Value` with the given `value_type` from a `u64` value. + /// + /// The `value_type` may be integral or floating point. + /// The result is truncated if the `u64` value does + /// not fit the bounds of the `value_type`. + pub fn from_u64(value_type: ValueType, value: u64) -> Result { + let value = match value_type { + ValueType::Generic => Value::Generic(value), + ValueType::I8 => Value::I8(value as i8), + ValueType::U8 => Value::U8(value as u8), + ValueType::I16 => Value::I16(value as i16), + ValueType::U16 => Value::U16(value as u16), + ValueType::I32 => Value::I32(value as i32), + ValueType::U32 => Value::U32(value as u32), + ValueType::I64 => Value::I64(value as i64), + ValueType::U64 => Value::U64(value), + ValueType::F32 => Value::F32(value as f32), + ValueType::F64 => Value::F64(value as f64), + }; + Ok(value) + } + + /// Create a `Value` with the given `value_type` from a `f32` value. + /// + /// The `value_type` may be integral or floating point. + /// The result is not defined if the `f32` value does + /// not fit the bounds of the `value_type`. + fn from_f32(value_type: ValueType, value: f32) -> Result { + let value = match value_type { + ValueType::Generic => Value::Generic(value as u64), + ValueType::I8 => Value::I8(value as i8), + ValueType::U8 => Value::U8(value as u8), + ValueType::I16 => Value::I16(value as i16), + ValueType::U16 => Value::U16(value as u16), + ValueType::I32 => Value::I32(value as i32), + ValueType::U32 => Value::U32(value as u32), + ValueType::I64 => Value::I64(value as i64), + ValueType::U64 => Value::U64(value as u64), + ValueType::F32 => Value::F32(value), + ValueType::F64 => Value::F64(f64::from(value)), + }; + Ok(value) + } + + /// Create a `Value` with the given `value_type` from a `f64` value. + /// + /// The `value_type` may be integral or floating point. + /// The result is not defined if the `f64` value does + /// not fit the bounds of the `value_type`. + fn from_f64(value_type: ValueType, value: f64) -> Result { + let value = match value_type { + ValueType::Generic => Value::Generic(value as u64), + ValueType::I8 => Value::I8(value as i8), + ValueType::U8 => Value::U8(value as u8), + ValueType::I16 => Value::I16(value as i16), + ValueType::U16 => Value::U16(value as u16), + ValueType::I32 => Value::I32(value as i32), + ValueType::U32 => Value::U32(value as u32), + ValueType::I64 => Value::I64(value as i64), + ValueType::U64 => Value::U64(value as u64), + ValueType::F32 => Value::F32(value as f32), + ValueType::F64 => Value::F64(value), + }; + Ok(value) + } + + /// Convert a `Value` to the given `value_type`. + /// + /// When converting between integral types, the result is truncated + /// if the source value does not fit the bounds of the `value_type`. + /// When converting from floating point types, the result is not defined + /// if the source value does not fit the bounds of the `value_type`. + /// + /// This corresponds to the DWARF `DW_OP_convert` operation. + pub fn convert(self, value_type: ValueType, addr_mask: u64) -> Result { + match self { + Value::F32(value) => Value::from_f32(value_type, value), + Value::F64(value) => Value::from_f64(value_type, value), + _ => Value::from_u64(value_type, self.to_u64(addr_mask)?), + } + } + + /// Reinterpret the bits in a `Value` as the given `value_type`. + /// + /// The source and result value types must have equal sizes. + /// + /// This corresponds to the DWARF `DW_OP_reinterpret` operation. + pub fn reinterpret(self, value_type: ValueType, addr_mask: u64) -> Result { + if self.value_type().bit_size(addr_mask) != value_type.bit_size(addr_mask) { + return Err(Error::TypeMismatch); + } + let bits = match self { + Value::Generic(value) => value, + Value::I8(value) => value as u64, + Value::U8(value) => u64::from(value), + Value::I16(value) => value as u64, + Value::U16(value) => u64::from(value), + Value::I32(value) => value as u64, + Value::U32(value) => u64::from(value), + Value::I64(value) => value as u64, + Value::U64(value) => value, + Value::F32(value) => u64::from(f32::to_bits(value)), + Value::F64(value) => f64::to_bits(value), + }; + let value = match value_type { + ValueType::Generic => Value::Generic(bits), + ValueType::I8 => Value::I8(bits as i8), + ValueType::U8 => Value::U8(bits as u8), + ValueType::I16 => Value::I16(bits as i16), + ValueType::U16 => Value::U16(bits as u16), + ValueType::I32 => Value::I32(bits as i32), + ValueType::U32 => Value::U32(bits as u32), + ValueType::I64 => Value::I64(bits as i64), + ValueType::U64 => Value::U64(bits), + ValueType::F32 => Value::F32(f32::from_bits(bits as u32)), + ValueType::F64 => Value::F64(f64::from_bits(bits)), + }; + Ok(value) + } + + /// Perform an absolute value operation. + /// + /// If the value type is `Generic`, then it is interpreted as a signed value. + /// + /// This corresponds to the DWARF `DW_OP_abs` operation. + pub fn abs(self, addr_mask: u64) -> Result { + // wrapping_abs() can be used because DWARF specifies that the result is undefined + // for negative minimal values. + let value = match self { + Value::Generic(value) => { + Value::Generic(sign_extend(value, addr_mask).wrapping_abs() as u64) + } + Value::I8(value) => Value::I8(value.wrapping_abs()), + Value::I16(value) => Value::I16(value.wrapping_abs()), + Value::I32(value) => Value::I32(value.wrapping_abs()), + Value::I64(value) => Value::I64(value.wrapping_abs()), + // f32/f64::abs() is not available in libcore + Value::F32(value) => Value::F32(if value < 0. { -value } else { value }), + Value::F64(value) => Value::F64(if value < 0. { -value } else { value }), + Value::U8(_) | Value::U16(_) | Value::U32(_) | Value::U64(_) => self, + }; + Ok(value) + } + + /// Perform a negation operation. + /// + /// If the value type is `Generic`, then it is interpreted as a signed value. + /// + /// This corresponds to the DWARF `DW_OP_neg` operation. + pub fn neg(self, addr_mask: u64) -> Result { + // wrapping_neg() can be used because DWARF specifies that the result is undefined + // for negative minimal values. + let value = match self { + Value::Generic(value) => { + Value::Generic(sign_extend(value, addr_mask).wrapping_neg() as u64) + } + Value::I8(value) => Value::I8(value.wrapping_neg()), + Value::I16(value) => Value::I16(value.wrapping_neg()), + Value::I32(value) => Value::I32(value.wrapping_neg()), + Value::I64(value) => Value::I64(value.wrapping_neg()), + Value::F32(value) => Value::F32(-value), + Value::F64(value) => Value::F64(-value), + // It's unclear if these should implicitly convert to a signed value. + // For now, we don't support them. + Value::U8(_) | Value::U16(_) | Value::U32(_) | Value::U64(_) => { + return Err(Error::UnsupportedTypeOperation); + } + }; + Ok(value) + } + + /// Perform an addition operation. + /// + /// This operation requires matching types. + /// + /// This corresponds to the DWARF `DW_OP_plus` operation. + pub fn add(self, rhs: Value, addr_mask: u64) -> Result { + let value = match (self, rhs) { + (Value::Generic(v1), Value::Generic(v2)) => { + Value::Generic(v1.wrapping_add(v2) & addr_mask) + } + (Value::I8(v1), Value::I8(v2)) => Value::I8(v1.wrapping_add(v2)), + (Value::U8(v1), Value::U8(v2)) => Value::U8(v1.wrapping_add(v2)), + (Value::I16(v1), Value::I16(v2)) => Value::I16(v1.wrapping_add(v2)), + (Value::U16(v1), Value::U16(v2)) => Value::U16(v1.wrapping_add(v2)), + (Value::I32(v1), Value::I32(v2)) => Value::I32(v1.wrapping_add(v2)), + (Value::U32(v1), Value::U32(v2)) => Value::U32(v1.wrapping_add(v2)), + (Value::I64(v1), Value::I64(v2)) => Value::I64(v1.wrapping_add(v2)), + (Value::U64(v1), Value::U64(v2)) => Value::U64(v1.wrapping_add(v2)), + (Value::F32(v1), Value::F32(v2)) => Value::F32(v1 + v2), + (Value::F64(v1), Value::F64(v2)) => Value::F64(v1 + v2), + _ => return Err(Error::TypeMismatch), + }; + Ok(value) + } + + /// Perform a subtraction operation. + /// + /// This operation requires matching types. + /// + /// This corresponds to the DWARF `DW_OP_minus` operation. + pub fn sub(self, rhs: Value, addr_mask: u64) -> Result { + let value = match (self, rhs) { + (Value::Generic(v1), Value::Generic(v2)) => { + Value::Generic(v1.wrapping_sub(v2) & addr_mask) + } + (Value::I8(v1), Value::I8(v2)) => Value::I8(v1.wrapping_sub(v2)), + (Value::U8(v1), Value::U8(v2)) => Value::U8(v1.wrapping_sub(v2)), + (Value::I16(v1), Value::I16(v2)) => Value::I16(v1.wrapping_sub(v2)), + (Value::U16(v1), Value::U16(v2)) => Value::U16(v1.wrapping_sub(v2)), + (Value::I32(v1), Value::I32(v2)) => Value::I32(v1.wrapping_sub(v2)), + (Value::U32(v1), Value::U32(v2)) => Value::U32(v1.wrapping_sub(v2)), + (Value::I64(v1), Value::I64(v2)) => Value::I64(v1.wrapping_sub(v2)), + (Value::U64(v1), Value::U64(v2)) => Value::U64(v1.wrapping_sub(v2)), + (Value::F32(v1), Value::F32(v2)) => Value::F32(v1 - v2), + (Value::F64(v1), Value::F64(v2)) => Value::F64(v1 - v2), + _ => return Err(Error::TypeMismatch), + }; + Ok(value) + } + + /// Perform a multiplication operation. + /// + /// This operation requires matching types. + /// + /// This corresponds to the DWARF `DW_OP_mul` operation. + pub fn mul(self, rhs: Value, addr_mask: u64) -> Result { + let value = match (self, rhs) { + (Value::Generic(v1), Value::Generic(v2)) => { + Value::Generic(v1.wrapping_mul(v2) & addr_mask) + } + (Value::I8(v1), Value::I8(v2)) => Value::I8(v1.wrapping_mul(v2)), + (Value::U8(v1), Value::U8(v2)) => Value::U8(v1.wrapping_mul(v2)), + (Value::I16(v1), Value::I16(v2)) => Value::I16(v1.wrapping_mul(v2)), + (Value::U16(v1), Value::U16(v2)) => Value::U16(v1.wrapping_mul(v2)), + (Value::I32(v1), Value::I32(v2)) => Value::I32(v1.wrapping_mul(v2)), + (Value::U32(v1), Value::U32(v2)) => Value::U32(v1.wrapping_mul(v2)), + (Value::I64(v1), Value::I64(v2)) => Value::I64(v1.wrapping_mul(v2)), + (Value::U64(v1), Value::U64(v2)) => Value::U64(v1.wrapping_mul(v2)), + (Value::F32(v1), Value::F32(v2)) => Value::F32(v1 * v2), + (Value::F64(v1), Value::F64(v2)) => Value::F64(v1 * v2), + _ => return Err(Error::TypeMismatch), + }; + Ok(value) + } + + /// Perform a division operation. + /// + /// This operation requires matching types. + /// If the value type is `Generic`, then it is interpreted as a signed value. + /// + /// This corresponds to the DWARF `DW_OP_div` operation. + pub fn div(self, rhs: Value, addr_mask: u64) -> Result { + match rhs { + Value::Generic(v2) if sign_extend(v2, addr_mask) == 0 => { + return Err(Error::DivisionByZero); + } + Value::I8(0) + | Value::U8(0) + | Value::I16(0) + | Value::U16(0) + | Value::I32(0) + | Value::U32(0) + | Value::I64(0) + | Value::U64(0) => { + return Err(Error::DivisionByZero); + } + _ => {} + } + let value = match (self, rhs) { + (Value::Generic(v1), Value::Generic(v2)) => { + // Signed division + Value::Generic( + sign_extend(v1, addr_mask).wrapping_div(sign_extend(v2, addr_mask)) as u64, + ) + } + (Value::I8(v1), Value::I8(v2)) => Value::I8(v1.wrapping_div(v2)), + (Value::U8(v1), Value::U8(v2)) => Value::U8(v1.wrapping_div(v2)), + (Value::I16(v1), Value::I16(v2)) => Value::I16(v1.wrapping_div(v2)), + (Value::U16(v1), Value::U16(v2)) => Value::U16(v1.wrapping_div(v2)), + (Value::I32(v1), Value::I32(v2)) => Value::I32(v1.wrapping_div(v2)), + (Value::U32(v1), Value::U32(v2)) => Value::U32(v1.wrapping_div(v2)), + (Value::I64(v1), Value::I64(v2)) => Value::I64(v1.wrapping_div(v2)), + (Value::U64(v1), Value::U64(v2)) => Value::U64(v1.wrapping_div(v2)), + (Value::F32(v1), Value::F32(v2)) => Value::F32(v1 / v2), + (Value::F64(v1), Value::F64(v2)) => Value::F64(v1 / v2), + _ => return Err(Error::TypeMismatch), + }; + Ok(value) + } + + /// Perform a remainder operation. + /// + /// This operation requires matching integral types. + /// If the value type is `Generic`, then it is interpreted as an unsigned value. + /// + /// This corresponds to the DWARF `DW_OP_mod` operation. + pub fn rem(self, rhs: Value, addr_mask: u64) -> Result { + match rhs { + Value::Generic(rhs) if (rhs & addr_mask) == 0 => { + return Err(Error::DivisionByZero); + } + Value::I8(0) + | Value::U8(0) + | Value::I16(0) + | Value::U16(0) + | Value::I32(0) + | Value::U32(0) + | Value::I64(0) + | Value::U64(0) => { + return Err(Error::DivisionByZero); + } + _ => {} + } + let value = match (self, rhs) { + (Value::Generic(v1), Value::Generic(v2)) => { + // Unsigned modulus + Value::Generic((v1 & addr_mask).wrapping_rem(v2 & addr_mask)) + } + (Value::I8(v1), Value::I8(v2)) => Value::I8(v1.wrapping_rem(v2)), + (Value::U8(v1), Value::U8(v2)) => Value::U8(v1.wrapping_rem(v2)), + (Value::I16(v1), Value::I16(v2)) => Value::I16(v1.wrapping_rem(v2)), + (Value::U16(v1), Value::U16(v2)) => Value::U16(v1.wrapping_rem(v2)), + (Value::I32(v1), Value::I32(v2)) => Value::I32(v1.wrapping_rem(v2)), + (Value::U32(v1), Value::U32(v2)) => Value::U32(v1.wrapping_rem(v2)), + (Value::I64(v1), Value::I64(v2)) => Value::I64(v1.wrapping_rem(v2)), + (Value::U64(v1), Value::U64(v2)) => Value::U64(v1.wrapping_rem(v2)), + (Value::F32(_), Value::F32(_)) => return Err(Error::IntegralTypeRequired), + (Value::F64(_), Value::F64(_)) => return Err(Error::IntegralTypeRequired), + _ => return Err(Error::TypeMismatch), + }; + Ok(value) + } + + /// Perform a bitwise not operation. + /// + /// This operation requires matching integral types. + /// + /// This corresponds to the DWARF `DW_OP_not` operation. + pub fn not(self, addr_mask: u64) -> Result { + let value_type = self.value_type(); + let v = self.to_u64(addr_mask)?; + Value::from_u64(value_type, !v) + } + + /// Perform a bitwise and operation. + /// + /// This operation requires matching integral types. + /// + /// This corresponds to the DWARF `DW_OP_and` operation. + pub fn and(self, rhs: Value, addr_mask: u64) -> Result { + let value_type = self.value_type(); + if value_type != rhs.value_type() { + return Err(Error::TypeMismatch); + } + let v1 = self.to_u64(addr_mask)?; + let v2 = rhs.to_u64(addr_mask)?; + Value::from_u64(value_type, v1 & v2) + } + + /// Perform a bitwise or operation. + /// + /// This operation requires matching integral types. + /// + /// This corresponds to the DWARF `DW_OP_or` operation. + pub fn or(self, rhs: Value, addr_mask: u64) -> Result { + let value_type = self.value_type(); + if value_type != rhs.value_type() { + return Err(Error::TypeMismatch); + } + let v1 = self.to_u64(addr_mask)?; + let v2 = rhs.to_u64(addr_mask)?; + Value::from_u64(value_type, v1 | v2) + } + + /// Perform a bitwise exclusive-or operation. + /// + /// This operation requires matching integral types. + /// + /// This corresponds to the DWARF `DW_OP_xor` operation. + pub fn xor(self, rhs: Value, addr_mask: u64) -> Result { + let value_type = self.value_type(); + if value_type != rhs.value_type() { + return Err(Error::TypeMismatch); + } + let v1 = self.to_u64(addr_mask)?; + let v2 = rhs.to_u64(addr_mask)?; + Value::from_u64(value_type, v1 ^ v2) + } + + /// Convert value to bit length suitable for a shift operation. + /// + /// If the value is negative then an error is returned. + fn shift_length(self) -> Result { + let value = match self { + Value::Generic(value) => value, + Value::I8(value) if value >= 0 => value as u64, + Value::U8(value) => u64::from(value), + Value::I16(value) if value >= 0 => value as u64, + Value::U16(value) => u64::from(value), + Value::I32(value) if value >= 0 => value as u64, + Value::U32(value) => u64::from(value), + Value::I64(value) if value >= 0 => value as u64, + Value::U64(value) => value, + _ => return Err(Error::InvalidShiftExpression), + }; + Ok(value) + } + + /// Perform a shift left operation. + /// + /// This operation requires integral types. + /// If the shift length exceeds the type size, then 0 is returned. + /// If the shift length is negative then an error is returned. + /// + /// This corresponds to the DWARF `DW_OP_shl` operation. + pub fn shl(self, rhs: Value, addr_mask: u64) -> Result { + let v2 = rhs.shift_length()?; + let value = match self { + Value::Generic(v1) => Value::Generic(if v2 >= u64::from(mask_bit_size(addr_mask)) { + 0 + } else { + (v1 & addr_mask) << v2 + }), + Value::I8(v1) => Value::I8(if v2 >= 8 { 0 } else { v1 << v2 }), + Value::U8(v1) => Value::U8(if v2 >= 8 { 0 } else { v1 << v2 }), + Value::I16(v1) => Value::I16(if v2 >= 16 { 0 } else { v1 << v2 }), + Value::U16(v1) => Value::U16(if v2 >= 16 { 0 } else { v1 << v2 }), + Value::I32(v1) => Value::I32(if v2 >= 32 { 0 } else { v1 << v2 }), + Value::U32(v1) => Value::U32(if v2 >= 32 { 0 } else { v1 << v2 }), + Value::I64(v1) => Value::I64(if v2 >= 64 { 0 } else { v1 << v2 }), + Value::U64(v1) => Value::U64(if v2 >= 64 { 0 } else { v1 << v2 }), + _ => return Err(Error::IntegralTypeRequired), + }; + Ok(value) + } + + /// Perform a logical shift right operation. + /// + /// This operation requires an unsigned integral type for the value. + /// If the value type is `Generic`, then it is interpreted as an unsigned value. + /// + /// This operation requires an integral type for the shift length. + /// If the shift length exceeds the type size, then 0 is returned. + /// If the shift length is negative then an error is returned. + /// + /// This corresponds to the DWARF `DW_OP_shr` operation. + pub fn shr(self, rhs: Value, addr_mask: u64) -> Result { + let v2 = rhs.shift_length()?; + let value = match self { + Value::Generic(v1) => Value::Generic(if v2 >= u64::from(mask_bit_size(addr_mask)) { + 0 + } else { + (v1 & addr_mask) >> v2 + }), + Value::U8(v1) => Value::U8(if v2 >= 8 { 0 } else { v1 >> v2 }), + Value::U16(v1) => Value::U16(if v2 >= 16 { 0 } else { v1 >> v2 }), + Value::U32(v1) => Value::U32(if v2 >= 32 { 0 } else { v1 >> v2 }), + Value::U64(v1) => Value::U64(if v2 >= 64 { 0 } else { v1 >> v2 }), + // It's unclear if signed values should implicitly convert to an unsigned value. + // For now, we don't support them. + Value::I8(_) | Value::I16(_) | Value::I32(_) | Value::I64(_) => { + return Err(Error::UnsupportedTypeOperation); + } + _ => return Err(Error::IntegralTypeRequired), + }; + Ok(value) + } + + /// Perform an arithmetic shift right operation. + /// + /// This operation requires a signed integral type for the value. + /// If the value type is `Generic`, then it is interpreted as a signed value. + /// + /// This operation requires an integral type for the shift length. + /// If the shift length exceeds the type size, then 0 is returned for positive values, + /// and -1 is returned for negative values. + /// If the shift length is negative then an error is returned. + /// + /// This corresponds to the DWARF `DW_OP_shra` operation. + pub fn shra(self, rhs: Value, addr_mask: u64) -> Result { + let v2 = rhs.shift_length()?; + let value = match self { + Value::Generic(v1) => { + let v1 = sign_extend(v1, addr_mask); + let value = if v2 >= u64::from(mask_bit_size(addr_mask)) { + if v1 < 0 { + !0 + } else { + 0 + } + } else { + (v1 >> v2) as u64 + }; + Value::Generic(value) + } + Value::I8(v1) => Value::I8(if v2 >= 8 { + if v1 < 0 { + !0 + } else { + 0 + } + } else { + v1 >> v2 + }), + Value::I16(v1) => Value::I16(if v2 >= 16 { + if v1 < 0 { + !0 + } else { + 0 + } + } else { + v1 >> v2 + }), + Value::I32(v1) => Value::I32(if v2 >= 32 { + if v1 < 0 { + !0 + } else { + 0 + } + } else { + v1 >> v2 + }), + Value::I64(v1) => Value::I64(if v2 >= 64 { + if v1 < 0 { + !0 + } else { + 0 + } + } else { + v1 >> v2 + }), + // It's unclear if unsigned values should implicitly convert to a signed value. + // For now, we don't support them. + Value::U8(_) | Value::U16(_) | Value::U32(_) | Value::U64(_) => { + return Err(Error::UnsupportedTypeOperation); + } + _ => return Err(Error::IntegralTypeRequired), + }; + Ok(value) + } + + /// Perform the `==` relational operation. + /// + /// This operation requires matching integral types. + /// If the value type is `Generic`, then it is interpreted as a signed value. + /// + /// This corresponds to the DWARF `DW_OP_eq` operation. + pub fn eq(self, rhs: Value, addr_mask: u64) -> Result { + let value = match (self, rhs) { + (Value::Generic(v1), Value::Generic(v2)) => { + sign_extend(v1, addr_mask) == sign_extend(v2, addr_mask) + } + (Value::I8(v1), Value::I8(v2)) => v1 == v2, + (Value::U8(v1), Value::U8(v2)) => v1 == v2, + (Value::I16(v1), Value::I16(v2)) => v1 == v2, + (Value::U16(v1), Value::U16(v2)) => v1 == v2, + (Value::I32(v1), Value::I32(v2)) => v1 == v2, + (Value::U32(v1), Value::U32(v2)) => v1 == v2, + (Value::I64(v1), Value::I64(v2)) => v1 == v2, + (Value::U64(v1), Value::U64(v2)) => v1 == v2, + (Value::F32(v1), Value::F32(v2)) => v1 == v2, + (Value::F64(v1), Value::F64(v2)) => v1 == v2, + _ => return Err(Error::TypeMismatch), + }; + Ok(Value::Generic(value as u64)) + } + + /// Perform the `>=` relational operation. + /// + /// This operation requires matching integral types. + /// If the value type is `Generic`, then it is interpreted as a signed value. + /// + /// This corresponds to the DWARF `DW_OP_ge` operation. + pub fn ge(self, rhs: Value, addr_mask: u64) -> Result { + let value = match (self, rhs) { + (Value::Generic(v1), Value::Generic(v2)) => { + sign_extend(v1, addr_mask) >= sign_extend(v2, addr_mask) + } + (Value::I8(v1), Value::I8(v2)) => v1 >= v2, + (Value::U8(v1), Value::U8(v2)) => v1 >= v2, + (Value::I16(v1), Value::I16(v2)) => v1 >= v2, + (Value::U16(v1), Value::U16(v2)) => v1 >= v2, + (Value::I32(v1), Value::I32(v2)) => v1 >= v2, + (Value::U32(v1), Value::U32(v2)) => v1 >= v2, + (Value::I64(v1), Value::I64(v2)) => v1 >= v2, + (Value::U64(v1), Value::U64(v2)) => v1 >= v2, + (Value::F32(v1), Value::F32(v2)) => v1 >= v2, + (Value::F64(v1), Value::F64(v2)) => v1 >= v2, + _ => return Err(Error::TypeMismatch), + }; + Ok(Value::Generic(value as u64)) + } + + /// Perform the `>` relational operation. + /// + /// This operation requires matching integral types. + /// If the value type is `Generic`, then it is interpreted as a signed value. + /// + /// This corresponds to the DWARF `DW_OP_gt` operation. + pub fn gt(self, rhs: Value, addr_mask: u64) -> Result { + let value = match (self, rhs) { + (Value::Generic(v1), Value::Generic(v2)) => { + sign_extend(v1, addr_mask) > sign_extend(v2, addr_mask) + } + (Value::I8(v1), Value::I8(v2)) => v1 > v2, + (Value::U8(v1), Value::U8(v2)) => v1 > v2, + (Value::I16(v1), Value::I16(v2)) => v1 > v2, + (Value::U16(v1), Value::U16(v2)) => v1 > v2, + (Value::I32(v1), Value::I32(v2)) => v1 > v2, + (Value::U32(v1), Value::U32(v2)) => v1 > v2, + (Value::I64(v1), Value::I64(v2)) => v1 > v2, + (Value::U64(v1), Value::U64(v2)) => v1 > v2, + (Value::F32(v1), Value::F32(v2)) => v1 > v2, + (Value::F64(v1), Value::F64(v2)) => v1 > v2, + _ => return Err(Error::TypeMismatch), + }; + Ok(Value::Generic(value as u64)) + } + + /// Perform the `<= relational operation. + /// + /// This operation requires matching integral types. + /// If the value type is `Generic`, then it is interpreted as a signed value. + /// + /// This corresponds to the DWARF `DW_OP_le` operation. + pub fn le(self, rhs: Value, addr_mask: u64) -> Result { + let value = match (self, rhs) { + (Value::Generic(v1), Value::Generic(v2)) => { + sign_extend(v1, addr_mask) <= sign_extend(v2, addr_mask) + } + (Value::I8(v1), Value::I8(v2)) => v1 <= v2, + (Value::U8(v1), Value::U8(v2)) => v1 <= v2, + (Value::I16(v1), Value::I16(v2)) => v1 <= v2, + (Value::U16(v1), Value::U16(v2)) => v1 <= v2, + (Value::I32(v1), Value::I32(v2)) => v1 <= v2, + (Value::U32(v1), Value::U32(v2)) => v1 <= v2, + (Value::I64(v1), Value::I64(v2)) => v1 <= v2, + (Value::U64(v1), Value::U64(v2)) => v1 <= v2, + (Value::F32(v1), Value::F32(v2)) => v1 <= v2, + (Value::F64(v1), Value::F64(v2)) => v1 <= v2, + _ => return Err(Error::TypeMismatch), + }; + Ok(Value::Generic(value as u64)) + } + + /// Perform the `< relational operation. + /// + /// This operation requires matching integral types. + /// If the value type is `Generic`, then it is interpreted as a signed value. + /// + /// This corresponds to the DWARF `DW_OP_lt` operation. + pub fn lt(self, rhs: Value, addr_mask: u64) -> Result { + let value = match (self, rhs) { + (Value::Generic(v1), Value::Generic(v2)) => { + sign_extend(v1, addr_mask) < sign_extend(v2, addr_mask) + } + (Value::I8(v1), Value::I8(v2)) => v1 < v2, + (Value::U8(v1), Value::U8(v2)) => v1 < v2, + (Value::I16(v1), Value::I16(v2)) => v1 < v2, + (Value::U16(v1), Value::U16(v2)) => v1 < v2, + (Value::I32(v1), Value::I32(v2)) => v1 < v2, + (Value::U32(v1), Value::U32(v2)) => v1 < v2, + (Value::I64(v1), Value::I64(v2)) => v1 < v2, + (Value::U64(v1), Value::U64(v2)) => v1 < v2, + (Value::F32(v1), Value::F32(v2)) => v1 < v2, + (Value::F64(v1), Value::F64(v2)) => v1 < v2, + _ => return Err(Error::TypeMismatch), + }; + Ok(Value::Generic(value as u64)) + } + + /// Perform the `!= relational operation. + /// + /// This operation requires matching integral types. + /// If the value type is `Generic`, then it is interpreted as a signed value. + /// + /// This corresponds to the DWARF `DW_OP_ne` operation. + pub fn ne(self, rhs: Value, addr_mask: u64) -> Result { + let value = match (self, rhs) { + (Value::Generic(v1), Value::Generic(v2)) => { + sign_extend(v1, addr_mask) != sign_extend(v2, addr_mask) + } + (Value::I8(v1), Value::I8(v2)) => v1 != v2, + (Value::U8(v1), Value::U8(v2)) => v1 != v2, + (Value::I16(v1), Value::I16(v2)) => v1 != v2, + (Value::U16(v1), Value::U16(v2)) => v1 != v2, + (Value::I32(v1), Value::I32(v2)) => v1 != v2, + (Value::U32(v1), Value::U32(v2)) => v1 != v2, + (Value::I64(v1), Value::I64(v2)) => v1 != v2, + (Value::U64(v1), Value::U64(v2)) => v1 != v2, + (Value::F32(v1), Value::F32(v2)) => v1 != v2, + (Value::F64(v1), Value::F64(v2)) => v1 != v2, + _ => return Err(Error::TypeMismatch), + }; + Ok(Value::Generic(value as u64)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::common::{DebugAbbrevOffset, DebugInfoOffset, Encoding, Format}; + use crate::endianity::LittleEndian; + use crate::read::{ + Abbreviation, AttributeSpecification, DebuggingInformationEntry, EndianSlice, UnitHeader, + UnitOffset, UnitType, + }; + + #[test] + #[rustfmt::skip] + fn valuetype_from_encoding() { + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 4, + }; + let unit = UnitHeader::new( + encoding, + 7, + UnitType::Compilation, + DebugAbbrevOffset(0), + DebugInfoOffset(0).into(), + EndianSlice::new(&[], LittleEndian), + ); + + let abbrev = Abbreviation::new( + 42, + constants::DW_TAG_base_type, + constants::DW_CHILDREN_no, + vec![ + AttributeSpecification::new( + constants::DW_AT_byte_size, + constants::DW_FORM_udata, + None, + ), + AttributeSpecification::new( + constants::DW_AT_encoding, + constants::DW_FORM_udata, + None, + ), + AttributeSpecification::new( + constants::DW_AT_endianity, + constants::DW_FORM_udata, + None, + ), + ].into(), + ); + + for &(attrs, result) in &[ + ([0x01, constants::DW_ATE_signed.0, constants::DW_END_default.0], ValueType::I8), + ([0x02, constants::DW_ATE_signed.0, constants::DW_END_default.0], ValueType::I16), + ([0x04, constants::DW_ATE_signed.0, constants::DW_END_default.0], ValueType::I32), + ([0x08, constants::DW_ATE_signed.0, constants::DW_END_default.0], ValueType::I64), + ([0x01, constants::DW_ATE_unsigned.0, constants::DW_END_default.0], ValueType::U8), + ([0x02, constants::DW_ATE_unsigned.0, constants::DW_END_default.0], ValueType::U16), + ([0x04, constants::DW_ATE_unsigned.0, constants::DW_END_default.0], ValueType::U32), + ([0x08, constants::DW_ATE_unsigned.0, constants::DW_END_default.0], ValueType::U64), + ([0x04, constants::DW_ATE_float.0, constants::DW_END_default.0], ValueType::F32), + ([0x08, constants::DW_ATE_float.0, constants::DW_END_default.0], ValueType::F64), + ] { + let entry = DebuggingInformationEntry::new( + UnitOffset(0), + EndianSlice::new(&attrs, LittleEndian), + &abbrev, + &unit, + ); + assert_eq!(ValueType::from_entry(&entry), Ok(Some(result))); + } + + for attrs in &[ + [0x03, constants::DW_ATE_signed.0, constants::DW_END_default.0], + [0x02, constants::DW_ATE_signed.0, constants::DW_END_big.0], + ] { + let entry = DebuggingInformationEntry::new( + UnitOffset(0), + EndianSlice::new(attrs, LittleEndian), + &abbrev, + &unit, + ); + assert_eq!(ValueType::from_entry(&entry), Ok(None)); + } + } + + #[test] + fn value_convert() { + let addr_mask = !0 >> 32; + for &(v, t, result) in &[ + (Value::Generic(1), ValueType::I8, Ok(Value::I8(1))), + (Value::I8(1), ValueType::U8, Ok(Value::U8(1))), + (Value::U8(1), ValueType::I16, Ok(Value::I16(1))), + (Value::I16(1), ValueType::U16, Ok(Value::U16(1))), + (Value::U16(1), ValueType::I32, Ok(Value::I32(1))), + (Value::I32(1), ValueType::U32, Ok(Value::U32(1))), + (Value::U32(1), ValueType::F32, Ok(Value::F32(1.))), + (Value::F32(1.), ValueType::I64, Ok(Value::I64(1))), + (Value::I64(1), ValueType::U64, Ok(Value::U64(1))), + (Value::U64(1), ValueType::F64, Ok(Value::F64(1.))), + (Value::F64(1.), ValueType::Generic, Ok(Value::Generic(1))), + ] { + assert_eq!(v.convert(t, addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_reinterpret() { + let addr_mask = !0 >> 32; + for &(v, t, result) in &[ + // 8-bit + (Value::I8(-1), ValueType::U8, Ok(Value::U8(0xff))), + (Value::U8(0xff), ValueType::I8, Ok(Value::I8(-1))), + // 16-bit + (Value::I16(1), ValueType::U16, Ok(Value::U16(1))), + (Value::U16(1), ValueType::I16, Ok(Value::I16(1))), + // 32-bit + (Value::Generic(1), ValueType::I32, Ok(Value::I32(1))), + (Value::I32(1), ValueType::U32, Ok(Value::U32(1))), + (Value::U32(0x3f80_0000), ValueType::F32, Ok(Value::F32(1.0))), + (Value::F32(1.0), ValueType::Generic, Ok(Value::Generic(0x3f80_0000))), + // Type mismatches + (Value::Generic(1), ValueType::U8, Err(Error::TypeMismatch)), + (Value::U8(1), ValueType::U16, Err(Error::TypeMismatch)), + (Value::U16(1), ValueType::U32, Err(Error::TypeMismatch)), + (Value::U32(1), ValueType::U64, Err(Error::TypeMismatch)), + (Value::U64(1), ValueType::Generic, Err(Error::TypeMismatch)), + ] { + assert_eq!(v.reinterpret(t, addr_mask), result); + } + + let addr_mask = !0; + for &(v, t, result) in &[ + // 64-bit + (Value::Generic(1), ValueType::I64, Ok(Value::I64(1))), + (Value::I64(1), ValueType::U64, Ok(Value::U64(1))), + (Value::U64(0x3ff0_0000_0000_0000), ValueType::F64, Ok(Value::F64(1.0))), + (Value::F64(1.0), ValueType::Generic, Ok(Value::Generic(0x3ff0_0000_0000_0000))), + ] { + assert_eq!(v.reinterpret(t, addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_abs() { + let addr_mask = 0xffff_ffff; + for &(v, result) in &[ + (Value::Generic(0xffff_ffff), Ok(Value::Generic(1))), + (Value::I8(-1), Ok(Value::I8(1))), + (Value::U8(1), Ok(Value::U8(1))), + (Value::I16(-1), Ok(Value::I16(1))), + (Value::U16(1), Ok(Value::U16(1))), + (Value::I32(-1), Ok(Value::I32(1))), + (Value::U32(1), Ok(Value::U32(1))), + (Value::I64(-1), Ok(Value::I64(1))), + (Value::U64(1), Ok(Value::U64(1))), + (Value::F32(-1.), Ok(Value::F32(1.))), + (Value::F64(-1.), Ok(Value::F64(1.))), + ] { + assert_eq!(v.abs(addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_neg() { + let addr_mask = 0xffff_ffff; + for &(v, result) in &[ + (Value::Generic(0xffff_ffff), Ok(Value::Generic(1))), + (Value::I8(1), Ok(Value::I8(-1))), + (Value::U8(1), Err(Error::UnsupportedTypeOperation)), + (Value::I16(1), Ok(Value::I16(-1))), + (Value::U16(1), Err(Error::UnsupportedTypeOperation)), + (Value::I32(1), Ok(Value::I32(-1))), + (Value::U32(1), Err(Error::UnsupportedTypeOperation)), + (Value::I64(1), Ok(Value::I64(-1))), + (Value::U64(1), Err(Error::UnsupportedTypeOperation)), + (Value::F32(1.), Ok(Value::F32(-1.))), + (Value::F64(1.), Ok(Value::F64(-1.))), + ] { + assert_eq!(v.neg(addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_add() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + (Value::Generic(1), Value::Generic(2), Ok(Value::Generic(3))), + (Value::I8(-1), Value::I8(2), Ok(Value::I8(1))), + (Value::U8(1), Value::U8(2), Ok(Value::U8(3))), + (Value::I16(-1), Value::I16(2), Ok(Value::I16(1))), + (Value::U16(1), Value::U16(2), Ok(Value::U16(3))), + (Value::I32(-1), Value::I32(2), Ok(Value::I32(1))), + (Value::U32(1), Value::U32(2), Ok(Value::U32(3))), + (Value::I64(-1), Value::I64(2), Ok(Value::I64(1))), + (Value::U64(1), Value::U64(2), Ok(Value::U64(3))), + (Value::F32(-1.), Value::F32(2.), Ok(Value::F32(1.))), + (Value::F64(-1.), Value::F64(2.), Ok(Value::F64(1.))), + (Value::Generic(1), Value::U32(2), Err(Error::TypeMismatch)), + ] { + assert_eq!(v1.add(v2, addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_sub() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + (Value::Generic(3), Value::Generic(2), Ok(Value::Generic(1))), + (Value::I8(-1), Value::I8(2), Ok(Value::I8(-3))), + (Value::U8(3), Value::U8(2), Ok(Value::U8(1))), + (Value::I16(-1), Value::I16(2), Ok(Value::I16(-3))), + (Value::U16(3), Value::U16(2), Ok(Value::U16(1))), + (Value::I32(-1), Value::I32(2), Ok(Value::I32(-3))), + (Value::U32(3), Value::U32(2), Ok(Value::U32(1))), + (Value::I64(-1), Value::I64(2), Ok(Value::I64(-3))), + (Value::U64(3), Value::U64(2), Ok(Value::U64(1))), + (Value::F32(-1.), Value::F32(2.), Ok(Value::F32(-3.))), + (Value::F64(-1.), Value::F64(2.), Ok(Value::F64(-3.))), + (Value::Generic(3), Value::U32(2), Err(Error::TypeMismatch)), + ] { + assert_eq!(v1.sub(v2, addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_mul() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + (Value::Generic(2), Value::Generic(3), Ok(Value::Generic(6))), + (Value::I8(-2), Value::I8(3), Ok(Value::I8(-6))), + (Value::U8(2), Value::U8(3), Ok(Value::U8(6))), + (Value::I16(-2), Value::I16(3), Ok(Value::I16(-6))), + (Value::U16(2), Value::U16(3), Ok(Value::U16(6))), + (Value::I32(-2), Value::I32(3), Ok(Value::I32(-6))), + (Value::U32(2), Value::U32(3), Ok(Value::U32(6))), + (Value::I64(-2), Value::I64(3), Ok(Value::I64(-6))), + (Value::U64(2), Value::U64(3), Ok(Value::U64(6))), + (Value::F32(-2.), Value::F32(3.), Ok(Value::F32(-6.))), + (Value::F64(-2.), Value::F64(3.), Ok(Value::F64(-6.))), + (Value::Generic(2), Value::U32(3), Err(Error::TypeMismatch)), + ] { + assert_eq!(v1.mul(v2, addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_div() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + (Value::Generic(6), Value::Generic(3), Ok(Value::Generic(2))), + (Value::I8(-6), Value::I8(3), Ok(Value::I8(-2))), + (Value::U8(6), Value::U8(3), Ok(Value::U8(2))), + (Value::I16(-6), Value::I16(3), Ok(Value::I16(-2))), + (Value::U16(6), Value::U16(3), Ok(Value::U16(2))), + (Value::I32(-6), Value::I32(3), Ok(Value::I32(-2))), + (Value::U32(6), Value::U32(3), Ok(Value::U32(2))), + (Value::I64(-6), Value::I64(3), Ok(Value::I64(-2))), + (Value::U64(6), Value::U64(3), Ok(Value::U64(2))), + (Value::F32(-6.), Value::F32(3.), Ok(Value::F32(-2.))), + (Value::F64(-6.), Value::F64(3.), Ok(Value::F64(-2.))), + (Value::Generic(6), Value::U32(3), Err(Error::TypeMismatch)), + ] { + assert_eq!(v1.div(v2, addr_mask), result); + } + for &(v1, v2, result) in &[ + (Value::Generic(6), Value::Generic(0), Err(Error::DivisionByZero)), + (Value::I8(-6), Value::I8(0), Err(Error::DivisionByZero)), + (Value::U8(6), Value::U8(0), Err(Error::DivisionByZero)), + (Value::I16(-6), Value::I16(0), Err(Error::DivisionByZero)), + (Value::U16(6), Value::U16(0), Err(Error::DivisionByZero)), + (Value::I32(-6), Value::I32(0), Err(Error::DivisionByZero)), + (Value::U32(6), Value::U32(0), Err(Error::DivisionByZero)), + (Value::I64(-6), Value::I64(0), Err(Error::DivisionByZero)), + (Value::U64(6), Value::U64(0), Err(Error::DivisionByZero)), + (Value::F32(-6.), Value::F32(0.), Ok(Value::F32(-6. / 0.))), + (Value::F64(-6.), Value::F64(0.), Ok(Value::F64(-6. / 0.))), + ] { + assert_eq!(v1.div(v2, addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_rem() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + (Value::Generic(3), Value::Generic(2), Ok(Value::Generic(1))), + (Value::I8(-3), Value::I8(2), Ok(Value::I8(-1))), + (Value::U8(3), Value::U8(2), Ok(Value::U8(1))), + (Value::I16(-3), Value::I16(2), Ok(Value::I16(-1))), + (Value::U16(3), Value::U16(2), Ok(Value::U16(1))), + (Value::I32(-3), Value::I32(2), Ok(Value::I32(-1))), + (Value::U32(3), Value::U32(2), Ok(Value::U32(1))), + (Value::I64(-3), Value::I64(2), Ok(Value::I64(-1))), + (Value::U64(3), Value::U64(2), Ok(Value::U64(1))), + (Value::F32(-3.), Value::F32(2.), Err(Error::IntegralTypeRequired)), + (Value::F64(-3.), Value::F64(2.), Err(Error::IntegralTypeRequired)), + (Value::Generic(3), Value::U32(2), Err(Error::TypeMismatch)), + ] { + assert_eq!(v1.rem(v2, addr_mask), result); + } + for &(v1, v2, result) in &[ + (Value::Generic(3), Value::Generic(0), Err(Error::DivisionByZero)), + (Value::I8(-3), Value::I8(0), Err(Error::DivisionByZero)), + (Value::U8(3), Value::U8(0), Err(Error::DivisionByZero)), + (Value::I16(-3), Value::I16(0), Err(Error::DivisionByZero)), + (Value::U16(3), Value::U16(0), Err(Error::DivisionByZero)), + (Value::I32(-3), Value::I32(0), Err(Error::DivisionByZero)), + (Value::U32(3), Value::U32(0), Err(Error::DivisionByZero)), + (Value::I64(-3), Value::I64(0), Err(Error::DivisionByZero)), + (Value::U64(3), Value::U64(0), Err(Error::DivisionByZero)), + ] { + assert_eq!(v1.rem(v2, addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_not() { + let addr_mask = 0xffff_ffff; + for &(v, result) in &[ + (Value::Generic(1), Ok(Value::Generic(!1))), + (Value::I8(1), Ok(Value::I8(!1))), + (Value::U8(1), Ok(Value::U8(!1))), + (Value::I16(1), Ok(Value::I16(!1))), + (Value::U16(1), Ok(Value::U16(!1))), + (Value::I32(1), Ok(Value::I32(!1))), + (Value::U32(1), Ok(Value::U32(!1))), + (Value::I64(1), Ok(Value::I64(!1))), + (Value::U64(1), Ok(Value::U64(!1))), + (Value::F32(1.), Err(Error::IntegralTypeRequired)), + (Value::F64(1.), Err(Error::IntegralTypeRequired)), + ] { + assert_eq!(v.not(addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_and() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + (Value::Generic(3), Value::Generic(5), Ok(Value::Generic(1))), + (Value::I8(3), Value::I8(5), Ok(Value::I8(1))), + (Value::U8(3), Value::U8(5), Ok(Value::U8(1))), + (Value::I16(3), Value::I16(5), Ok(Value::I16(1))), + (Value::U16(3), Value::U16(5), Ok(Value::U16(1))), + (Value::I32(3), Value::I32(5), Ok(Value::I32(1))), + (Value::U32(3), Value::U32(5), Ok(Value::U32(1))), + (Value::I64(3), Value::I64(5), Ok(Value::I64(1))), + (Value::U64(3), Value::U64(5), Ok(Value::U64(1))), + (Value::F32(3.), Value::F32(5.), Err(Error::IntegralTypeRequired)), + (Value::F64(3.), Value::F64(5.), Err(Error::IntegralTypeRequired)), + (Value::Generic(3), Value::U32(5), Err(Error::TypeMismatch)), + ] { + assert_eq!(v1.and(v2, addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_or() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + (Value::Generic(3), Value::Generic(5), Ok(Value::Generic(7))), + (Value::I8(3), Value::I8(5), Ok(Value::I8(7))), + (Value::U8(3), Value::U8(5), Ok(Value::U8(7))), + (Value::I16(3), Value::I16(5), Ok(Value::I16(7))), + (Value::U16(3), Value::U16(5), Ok(Value::U16(7))), + (Value::I32(3), Value::I32(5), Ok(Value::I32(7))), + (Value::U32(3), Value::U32(5), Ok(Value::U32(7))), + (Value::I64(3), Value::I64(5), Ok(Value::I64(7))), + (Value::U64(3), Value::U64(5), Ok(Value::U64(7))), + (Value::F32(3.), Value::F32(5.), Err(Error::IntegralTypeRequired)), + (Value::F64(3.), Value::F64(5.), Err(Error::IntegralTypeRequired)), + (Value::Generic(3), Value::U32(5), Err(Error::TypeMismatch)), + ] { + assert_eq!(v1.or(v2, addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_xor() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + (Value::Generic(3), Value::Generic(5), Ok(Value::Generic(6))), + (Value::I8(3), Value::I8(5), Ok(Value::I8(6))), + (Value::U8(3), Value::U8(5), Ok(Value::U8(6))), + (Value::I16(3), Value::I16(5), Ok(Value::I16(6))), + (Value::U16(3), Value::U16(5), Ok(Value::U16(6))), + (Value::I32(3), Value::I32(5), Ok(Value::I32(6))), + (Value::U32(3), Value::U32(5), Ok(Value::U32(6))), + (Value::I64(3), Value::I64(5), Ok(Value::I64(6))), + (Value::U64(3), Value::U64(5), Ok(Value::U64(6))), + (Value::F32(3.), Value::F32(5.), Err(Error::IntegralTypeRequired)), + (Value::F64(3.), Value::F64(5.), Err(Error::IntegralTypeRequired)), + (Value::Generic(3), Value::U32(5), Err(Error::TypeMismatch)), + ] { + assert_eq!(v1.xor(v2, addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_shl() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + // One of each type + (Value::Generic(3), Value::Generic(5), Ok(Value::Generic(96))), + (Value::I8(3), Value::U8(5), Ok(Value::I8(96))), + (Value::U8(3), Value::I8(5), Ok(Value::U8(96))), + (Value::I16(3), Value::U16(5), Ok(Value::I16(96))), + (Value::U16(3), Value::I16(5), Ok(Value::U16(96))), + (Value::I32(3), Value::U32(5), Ok(Value::I32(96))), + (Value::U32(3), Value::I32(5), Ok(Value::U32(96))), + (Value::I64(3), Value::U64(5), Ok(Value::I64(96))), + (Value::U64(3), Value::I64(5), Ok(Value::U64(96))), + (Value::F32(3.), Value::U8(5), Err(Error::IntegralTypeRequired)), + (Value::F64(3.), Value::U8(5), Err(Error::IntegralTypeRequired)), + // Invalid shifts + (Value::U8(3), Value::I8(-5), Err(Error::InvalidShiftExpression)), + (Value::U8(3), Value::I16(-5), Err(Error::InvalidShiftExpression)), + (Value::U8(3), Value::I32(-5), Err(Error::InvalidShiftExpression)), + (Value::U8(3), Value::I64(-5), Err(Error::InvalidShiftExpression)), + (Value::U8(3), Value::F32(5.), Err(Error::InvalidShiftExpression)), + (Value::U8(3), Value::F64(5.), Err(Error::InvalidShiftExpression)), + // Large shifts + (Value::Generic(3), Value::Generic(32), Ok(Value::Generic(0))), + (Value::I8(3), Value::U8(8), Ok(Value::I8(0))), + (Value::U8(3), Value::I8(9), Ok(Value::U8(0))), + (Value::I16(3), Value::U16(17), Ok(Value::I16(0))), + (Value::U16(3), Value::I16(16), Ok(Value::U16(0))), + (Value::I32(3), Value::U32(32), Ok(Value::I32(0))), + (Value::U32(3), Value::I32(33), Ok(Value::U32(0))), + (Value::I64(3), Value::U64(65), Ok(Value::I64(0))), + (Value::U64(3), Value::I64(64), Ok(Value::U64(0))), + ] { + assert_eq!(v1.shl(v2, addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_shr() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + // One of each type + (Value::Generic(96), Value::Generic(5), Ok(Value::Generic(3))), + (Value::I8(96), Value::U8(5), Err(Error::UnsupportedTypeOperation)), + (Value::U8(96), Value::I8(5), Ok(Value::U8(3))), + (Value::I16(96), Value::U16(5), Err(Error::UnsupportedTypeOperation)), + (Value::U16(96), Value::I16(5), Ok(Value::U16(3))), + (Value::I32(96), Value::U32(5), Err(Error::UnsupportedTypeOperation)), + (Value::U32(96), Value::I32(5), Ok(Value::U32(3))), + (Value::I64(96), Value::U64(5), Err(Error::UnsupportedTypeOperation)), + (Value::U64(96), Value::I64(5), Ok(Value::U64(3))), + (Value::F32(96.), Value::U8(5), Err(Error::IntegralTypeRequired)), + (Value::F64(96.), Value::U8(5), Err(Error::IntegralTypeRequired)), + // Invalid shifts + (Value::U8(96), Value::I8(-5), Err(Error::InvalidShiftExpression)), + (Value::U8(96), Value::I16(-5), Err(Error::InvalidShiftExpression)), + (Value::U8(96), Value::I32(-5), Err(Error::InvalidShiftExpression)), + (Value::U8(96), Value::I64(-5), Err(Error::InvalidShiftExpression)), + (Value::U8(96), Value::F32(5.), Err(Error::InvalidShiftExpression)), + (Value::U8(96), Value::F64(5.), Err(Error::InvalidShiftExpression)), + // Large shifts + (Value::Generic(96), Value::Generic(32), Ok(Value::Generic(0))), + (Value::U8(96), Value::I8(9), Ok(Value::U8(0))), + (Value::U16(96), Value::I16(16), Ok(Value::U16(0))), + (Value::U32(96), Value::I32(33), Ok(Value::U32(0))), + (Value::U64(96), Value::I64(64), Ok(Value::U64(0))), + ] { + assert_eq!(v1.shr(v2, addr_mask), result); + } + } + + #[test] + #[rustfmt::skip] + fn value_shra() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + // One of each type + (Value::Generic(u64::from(-96i32 as u32)), Value::Generic(5), Ok(Value::Generic(-3i64 as u64))), + (Value::I8(-96), Value::U8(5), Ok(Value::I8(-3))), + (Value::U8(96), Value::I8(5), Err(Error::UnsupportedTypeOperation)), + (Value::I16(-96), Value::U16(5), Ok(Value::I16(-3))), + (Value::U16(96), Value::I16(5), Err(Error::UnsupportedTypeOperation)), + (Value::I32(-96), Value::U32(5), Ok(Value::I32(-3))), + (Value::U32(96), Value::I32(5), Err(Error::UnsupportedTypeOperation)), + (Value::I64(-96), Value::U64(5), Ok(Value::I64(-3))), + (Value::U64(96), Value::I64(5), Err(Error::UnsupportedTypeOperation)), + (Value::F32(96.), Value::U8(5), Err(Error::IntegralTypeRequired)), + (Value::F64(96.), Value::U8(5), Err(Error::IntegralTypeRequired)), + // Invalid shifts + (Value::U8(96), Value::I8(-5), Err(Error::InvalidShiftExpression)), + (Value::U8(96), Value::I16(-5), Err(Error::InvalidShiftExpression)), + (Value::U8(96), Value::I32(-5), Err(Error::InvalidShiftExpression)), + (Value::U8(96), Value::I64(-5), Err(Error::InvalidShiftExpression)), + (Value::U8(96), Value::F32(5.), Err(Error::InvalidShiftExpression)), + (Value::U8(96), Value::F64(5.), Err(Error::InvalidShiftExpression)), + // Large shifts + (Value::Generic(96), Value::Generic(32), Ok(Value::Generic(0))), + (Value::I8(96), Value::U8(8), Ok(Value::I8(0))), + (Value::I8(-96), Value::U8(8), Ok(Value::I8(-1))), + (Value::I16(96), Value::U16(17), Ok(Value::I16(0))), + (Value::I16(-96), Value::U16(17), Ok(Value::I16(-1))), + (Value::I32(96), Value::U32(32), Ok(Value::I32(0))), + (Value::I32(-96), Value::U32(32), Ok(Value::I32(-1))), + (Value::I64(96), Value::U64(65), Ok(Value::I64(0))), + (Value::I64(-96), Value::U64(65), Ok(Value::I64(-1))), + ] { + assert_eq!(v1.shra(v2, addr_mask), result); + } + } + + #[test] + fn value_eq() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + (Value::Generic(3), Value::Generic(3), Ok(Value::Generic(1))), + (Value::Generic(!3), Value::Generic(3), Ok(Value::Generic(0))), + (Value::I8(3), Value::I8(3), Ok(Value::Generic(1))), + (Value::I8(!3), Value::I8(3), Ok(Value::Generic(0))), + (Value::U8(3), Value::U8(3), Ok(Value::Generic(1))), + (Value::U8(!3), Value::U8(3), Ok(Value::Generic(0))), + (Value::I16(3), Value::I16(3), Ok(Value::Generic(1))), + (Value::I16(!3), Value::I16(3), Ok(Value::Generic(0))), + (Value::U16(3), Value::U16(3), Ok(Value::Generic(1))), + (Value::U16(!3), Value::U16(3), Ok(Value::Generic(0))), + (Value::I32(3), Value::I32(3), Ok(Value::Generic(1))), + (Value::I32(!3), Value::I32(3), Ok(Value::Generic(0))), + (Value::U32(3), Value::U32(3), Ok(Value::Generic(1))), + (Value::U32(!3), Value::U32(3), Ok(Value::Generic(0))), + (Value::I64(3), Value::I64(3), Ok(Value::Generic(1))), + (Value::I64(!3), Value::I64(3), Ok(Value::Generic(0))), + (Value::U64(3), Value::U64(3), Ok(Value::Generic(1))), + (Value::U64(!3), Value::U64(3), Ok(Value::Generic(0))), + (Value::F32(3.), Value::F32(3.), Ok(Value::Generic(1))), + (Value::F32(-3.), Value::F32(3.), Ok(Value::Generic(0))), + (Value::F64(3.), Value::F64(3.), Ok(Value::Generic(1))), + (Value::F64(-3.), Value::F64(3.), Ok(Value::Generic(0))), + (Value::Generic(3), Value::U32(3), Err(Error::TypeMismatch)), + ] { + assert_eq!(v1.eq(v2, addr_mask), result); + } + } + + #[test] + fn value_ne() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + (Value::Generic(3), Value::Generic(3), Ok(Value::Generic(0))), + (Value::Generic(!3), Value::Generic(3), Ok(Value::Generic(1))), + (Value::I8(3), Value::I8(3), Ok(Value::Generic(0))), + (Value::I8(!3), Value::I8(3), Ok(Value::Generic(1))), + (Value::U8(3), Value::U8(3), Ok(Value::Generic(0))), + (Value::U8(!3), Value::U8(3), Ok(Value::Generic(1))), + (Value::I16(3), Value::I16(3), Ok(Value::Generic(0))), + (Value::I16(!3), Value::I16(3), Ok(Value::Generic(1))), + (Value::U16(3), Value::U16(3), Ok(Value::Generic(0))), + (Value::U16(!3), Value::U16(3), Ok(Value::Generic(1))), + (Value::I32(3), Value::I32(3), Ok(Value::Generic(0))), + (Value::I32(!3), Value::I32(3), Ok(Value::Generic(1))), + (Value::U32(3), Value::U32(3), Ok(Value::Generic(0))), + (Value::U32(!3), Value::U32(3), Ok(Value::Generic(1))), + (Value::I64(3), Value::I64(3), Ok(Value::Generic(0))), + (Value::I64(!3), Value::I64(3), Ok(Value::Generic(1))), + (Value::U64(3), Value::U64(3), Ok(Value::Generic(0))), + (Value::U64(!3), Value::U64(3), Ok(Value::Generic(1))), + (Value::F32(3.), Value::F32(3.), Ok(Value::Generic(0))), + (Value::F32(-3.), Value::F32(3.), Ok(Value::Generic(1))), + (Value::F64(3.), Value::F64(3.), Ok(Value::Generic(0))), + (Value::F64(-3.), Value::F64(3.), Ok(Value::Generic(1))), + (Value::Generic(3), Value::U32(3), Err(Error::TypeMismatch)), + ] { + assert_eq!(v1.ne(v2, addr_mask), result); + } + } + + #[test] + fn value_ge() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + (Value::Generic(3), Value::Generic(!3), Ok(Value::Generic(1))), + (Value::Generic(!3), Value::Generic(3), Ok(Value::Generic(0))), + (Value::I8(3), Value::I8(!3), Ok(Value::Generic(1))), + (Value::I8(!3), Value::I8(3), Ok(Value::Generic(0))), + (Value::U8(3), Value::U8(!3), Ok(Value::Generic(0))), + (Value::U8(!3), Value::U8(3), Ok(Value::Generic(1))), + (Value::I16(3), Value::I16(!3), Ok(Value::Generic(1))), + (Value::I16(!3), Value::I16(3), Ok(Value::Generic(0))), + (Value::U16(3), Value::U16(!3), Ok(Value::Generic(0))), + (Value::U16(!3), Value::U16(3), Ok(Value::Generic(1))), + (Value::I32(3), Value::I32(!3), Ok(Value::Generic(1))), + (Value::I32(!3), Value::I32(3), Ok(Value::Generic(0))), + (Value::U32(3), Value::U32(!3), Ok(Value::Generic(0))), + (Value::U32(!3), Value::U32(3), Ok(Value::Generic(1))), + (Value::I64(3), Value::I64(!3), Ok(Value::Generic(1))), + (Value::I64(!3), Value::I64(3), Ok(Value::Generic(0))), + (Value::U64(3), Value::U64(!3), Ok(Value::Generic(0))), + (Value::U64(!3), Value::U64(3), Ok(Value::Generic(1))), + (Value::F32(3.), Value::F32(-3.), Ok(Value::Generic(1))), + (Value::F32(-3.), Value::F32(3.), Ok(Value::Generic(0))), + (Value::F64(3.), Value::F64(-3.), Ok(Value::Generic(1))), + (Value::F64(-3.), Value::F64(3.), Ok(Value::Generic(0))), + (Value::Generic(3), Value::U32(3), Err(Error::TypeMismatch)), + ] { + assert_eq!(v1.ge(v2, addr_mask), result); + } + } + + #[test] + fn value_gt() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + (Value::Generic(3), Value::Generic(!3), Ok(Value::Generic(1))), + (Value::Generic(!3), Value::Generic(3), Ok(Value::Generic(0))), + (Value::I8(3), Value::I8(!3), Ok(Value::Generic(1))), + (Value::I8(!3), Value::I8(3), Ok(Value::Generic(0))), + (Value::U8(3), Value::U8(!3), Ok(Value::Generic(0))), + (Value::U8(!3), Value::U8(3), Ok(Value::Generic(1))), + (Value::I16(3), Value::I16(!3), Ok(Value::Generic(1))), + (Value::I16(!3), Value::I16(3), Ok(Value::Generic(0))), + (Value::U16(3), Value::U16(!3), Ok(Value::Generic(0))), + (Value::U16(!3), Value::U16(3), Ok(Value::Generic(1))), + (Value::I32(3), Value::I32(!3), Ok(Value::Generic(1))), + (Value::I32(!3), Value::I32(3), Ok(Value::Generic(0))), + (Value::U32(3), Value::U32(!3), Ok(Value::Generic(0))), + (Value::U32(!3), Value::U32(3), Ok(Value::Generic(1))), + (Value::I64(3), Value::I64(!3), Ok(Value::Generic(1))), + (Value::I64(!3), Value::I64(3), Ok(Value::Generic(0))), + (Value::U64(3), Value::U64(!3), Ok(Value::Generic(0))), + (Value::U64(!3), Value::U64(3), Ok(Value::Generic(1))), + (Value::F32(3.), Value::F32(-3.), Ok(Value::Generic(1))), + (Value::F32(-3.), Value::F32(3.), Ok(Value::Generic(0))), + (Value::F64(3.), Value::F64(-3.), Ok(Value::Generic(1))), + (Value::F64(-3.), Value::F64(3.), Ok(Value::Generic(0))), + (Value::Generic(3), Value::U32(3), Err(Error::TypeMismatch)), + ] { + assert_eq!(v1.gt(v2, addr_mask), result); + } + } + + #[test] + fn value_le() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + (Value::Generic(3), Value::Generic(!3), Ok(Value::Generic(0))), + (Value::Generic(!3), Value::Generic(3), Ok(Value::Generic(1))), + (Value::I8(3), Value::I8(!3), Ok(Value::Generic(0))), + (Value::I8(!3), Value::I8(3), Ok(Value::Generic(1))), + (Value::U8(3), Value::U8(!3), Ok(Value::Generic(1))), + (Value::U8(!3), Value::U8(3), Ok(Value::Generic(0))), + (Value::I16(3), Value::I16(!3), Ok(Value::Generic(0))), + (Value::I16(!3), Value::I16(3), Ok(Value::Generic(1))), + (Value::U16(3), Value::U16(!3), Ok(Value::Generic(1))), + (Value::U16(!3), Value::U16(3), Ok(Value::Generic(0))), + (Value::I32(3), Value::I32(!3), Ok(Value::Generic(0))), + (Value::I32(!3), Value::I32(3), Ok(Value::Generic(1))), + (Value::U32(3), Value::U32(!3), Ok(Value::Generic(1))), + (Value::U32(!3), Value::U32(3), Ok(Value::Generic(0))), + (Value::I64(3), Value::I64(!3), Ok(Value::Generic(0))), + (Value::I64(!3), Value::I64(3), Ok(Value::Generic(1))), + (Value::U64(3), Value::U64(!3), Ok(Value::Generic(1))), + (Value::U64(!3), Value::U64(3), Ok(Value::Generic(0))), + (Value::F32(3.), Value::F32(-3.), Ok(Value::Generic(0))), + (Value::F32(-3.), Value::F32(3.), Ok(Value::Generic(1))), + (Value::F64(3.), Value::F64(-3.), Ok(Value::Generic(0))), + (Value::F64(-3.), Value::F64(3.), Ok(Value::Generic(1))), + (Value::Generic(3), Value::U32(3), Err(Error::TypeMismatch)), + ] { + assert_eq!(v1.le(v2, addr_mask), result); + } + } + + #[test] + fn value_lt() { + let addr_mask = 0xffff_ffff; + for &(v1, v2, result) in &[ + (Value::Generic(3), Value::Generic(!3), Ok(Value::Generic(0))), + (Value::Generic(!3), Value::Generic(3), Ok(Value::Generic(1))), + (Value::I8(3), Value::I8(!3), Ok(Value::Generic(0))), + (Value::I8(!3), Value::I8(3), Ok(Value::Generic(1))), + (Value::U8(3), Value::U8(!3), Ok(Value::Generic(1))), + (Value::U8(!3), Value::U8(3), Ok(Value::Generic(0))), + (Value::I16(3), Value::I16(!3), Ok(Value::Generic(0))), + (Value::I16(!3), Value::I16(3), Ok(Value::Generic(1))), + (Value::U16(3), Value::U16(!3), Ok(Value::Generic(1))), + (Value::U16(!3), Value::U16(3), Ok(Value::Generic(0))), + (Value::I32(3), Value::I32(!3), Ok(Value::Generic(0))), + (Value::I32(!3), Value::I32(3), Ok(Value::Generic(1))), + (Value::U32(3), Value::U32(!3), Ok(Value::Generic(1))), + (Value::U32(!3), Value::U32(3), Ok(Value::Generic(0))), + (Value::I64(3), Value::I64(!3), Ok(Value::Generic(0))), + (Value::I64(!3), Value::I64(3), Ok(Value::Generic(1))), + (Value::U64(3), Value::U64(!3), Ok(Value::Generic(1))), + (Value::U64(!3), Value::U64(3), Ok(Value::Generic(0))), + (Value::F32(3.), Value::F32(-3.), Ok(Value::Generic(0))), + (Value::F32(-3.), Value::F32(3.), Ok(Value::Generic(1))), + (Value::F64(3.), Value::F64(-3.), Ok(Value::Generic(0))), + (Value::F64(-3.), Value::F64(3.), Ok(Value::Generic(1))), + (Value::Generic(3), Value::U32(3), Err(Error::TypeMismatch)), + ] { + assert_eq!(v1.lt(v2, addr_mask), result); + } + } +} diff --git a/third_party/rust/gimli/src/test_util.rs b/third_party/rust/gimli/src/test_util.rs new file mode 100644 index 000000000000..706aaf934633 --- /dev/null +++ b/third_party/rust/gimli/src/test_util.rs @@ -0,0 +1,53 @@ +#![allow(missing_docs)] + +use crate::Format; +use test_assembler::{Label, Section}; + +pub trait GimliSectionMethods { + fn sleb(self, val: i64) -> Self; + fn uleb(self, val: u64) -> Self; + fn initial_length(self, format: Format, length: &Label, start: &Label) -> Self; + fn word(self, size: u8, val: u64) -> Self; + fn word_label(self, size: u8, val: &Label) -> Self; +} + +impl GimliSectionMethods for Section { + fn sleb(mut self, mut val: i64) -> Self { + while val & !0x3f != 0 && val | 0x3f != -1 { + self = self.D8(val as u8 | 0x80); + val >>= 7; + } + self.D8(val as u8 & 0x7f) + } + + fn uleb(mut self, mut val: u64) -> Self { + while val & !0x7f != 0 { + self = self.D8(val as u8 | 0x80); + val >>= 7; + } + self.D8(val as u8) + } + + fn initial_length(self, format: Format, length: &Label, start: &Label) -> Self { + match format { + Format::Dwarf32 => self.D32(length).mark(start), + Format::Dwarf64 => self.D32(0xffff_ffff).D64(length).mark(start), + } + } + + fn word(self, size: u8, val: u64) -> Self { + match size { + 4 => self.D32(val as u32), + 8 => self.D64(val), + _ => panic!("unsupported word size"), + } + } + + fn word_label(self, size: u8, val: &Label) -> Self { + match size { + 4 => self.D32(val), + 8 => self.D64(val), + _ => panic!("unsupported word size"), + } + } +} diff --git a/third_party/rust/gimli/src/write/abbrev.rs b/third_party/rust/gimli/src/write/abbrev.rs new file mode 100644 index 000000000000..7cdfa969c483 --- /dev/null +++ b/third_party/rust/gimli/src/write/abbrev.rs @@ -0,0 +1,188 @@ +use alloc::vec::Vec; +use indexmap::IndexSet; +use std::ops::{Deref, DerefMut}; + +use crate::common::{DebugAbbrevOffset, SectionId}; +use crate::constants; +use crate::write::{Result, Section, Writer}; + +/// A table of abbreviations that will be stored in a `.debug_abbrev` section. +// Requirements: +// - values are `Abbreviation` +// - insertion returns an abbreviation code for use in writing a DIE +// - inserting a duplicate returns the code of the existing value +#[derive(Debug, Default)] +pub(crate) struct AbbreviationTable { + abbrevs: IndexSet, +} + +impl AbbreviationTable { + /// Add an abbreviation to the table and return its code. + pub fn add(&mut self, abbrev: Abbreviation) -> u64 { + let (code, _) = self.abbrevs.insert_full(abbrev); + // Code must be non-zero + (code + 1) as u64 + } + + /// Write the abbreviation table to the `.debug_abbrev` section. + pub fn write(&self, w: &mut DebugAbbrev) -> Result<()> { + for (code, abbrev) in self.abbrevs.iter().enumerate() { + w.write_uleb128((code + 1) as u64)?; + abbrev.write(w)?; + } + // Null abbreviation code + w.write_u8(0) + } +} + +/// An abbreviation describes the shape of a `DebuggingInformationEntry`'s type: +/// its tag type, whether it has children, and its set of attributes. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct Abbreviation { + tag: constants::DwTag, + has_children: bool, + attributes: Vec, +} + +impl Abbreviation { + /// Construct a new `Abbreviation`. + #[inline] + pub fn new( + tag: constants::DwTag, + has_children: bool, + attributes: Vec, + ) -> Abbreviation { + Abbreviation { + tag, + has_children, + attributes, + } + } + + /// Write the abbreviation to the `.debug_abbrev` section. + pub fn write(&self, w: &mut DebugAbbrev) -> Result<()> { + w.write_uleb128(self.tag.0.into())?; + w.write_u8(if self.has_children { + constants::DW_CHILDREN_yes.0 + } else { + constants::DW_CHILDREN_no.0 + })?; + for attr in &self.attributes { + attr.write(w)?; + } + // Null name and form + w.write_u8(0)?; + w.write_u8(0) + } +} + +/// The description of an attribute in an abbreviated type. +// TODO: support implicit const +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct AttributeSpecification { + name: constants::DwAt, + form: constants::DwForm, +} + +impl AttributeSpecification { + /// Construct a new `AttributeSpecification`. + #[inline] + pub fn new(name: constants::DwAt, form: constants::DwForm) -> AttributeSpecification { + AttributeSpecification { name, form } + } + + /// Write the attribute specification to the `.debug_abbrev` section. + #[inline] + pub fn write(&self, w: &mut DebugAbbrev) -> Result<()> { + w.write_uleb128(self.name.0.into())?; + w.write_uleb128(self.form.0.into()) + } +} + +define_section!( + DebugAbbrev, + DebugAbbrevOffset, + "A writable `.debug_abbrev` section." +); + +#[cfg(test)] +#[cfg(feature = "read")] +mod tests { + use super::*; + use crate::constants; + use crate::read; + use crate::write::EndianVec; + use crate::LittleEndian; + + #[test] + fn test_abbreviation_table() { + let mut abbrevs = AbbreviationTable::default(); + let abbrev1 = Abbreviation::new( + constants::DW_TAG_subprogram, + false, + vec![AttributeSpecification::new( + constants::DW_AT_name, + constants::DW_FORM_string, + )], + ); + let abbrev2 = Abbreviation::new( + constants::DW_TAG_compile_unit, + true, + vec![ + AttributeSpecification::new(constants::DW_AT_producer, constants::DW_FORM_strp), + AttributeSpecification::new(constants::DW_AT_language, constants::DW_FORM_data2), + ], + ); + let code1 = abbrevs.add(abbrev1.clone()); + assert_eq!(code1, 1); + let code2 = abbrevs.add(abbrev2.clone()); + assert_eq!(code2, 2); + assert_eq!(abbrevs.add(abbrev1.clone()), code1); + assert_eq!(abbrevs.add(abbrev2.clone()), code2); + + let mut debug_abbrev = DebugAbbrev::from(EndianVec::new(LittleEndian)); + let debug_abbrev_offset = debug_abbrev.offset(); + assert_eq!(debug_abbrev_offset, DebugAbbrevOffset(0)); + abbrevs.write(&mut debug_abbrev).unwrap(); + assert_eq!(debug_abbrev.offset(), DebugAbbrevOffset(17)); + + let read_debug_abbrev = read::DebugAbbrev::new(debug_abbrev.slice(), LittleEndian); + let read_abbrevs = read_debug_abbrev + .abbreviations(debug_abbrev_offset) + .unwrap(); + + let read_abbrev1 = read_abbrevs.get(code1).unwrap(); + assert_eq!(abbrev1.tag, read_abbrev1.tag()); + assert_eq!(abbrev1.has_children, read_abbrev1.has_children()); + assert_eq!(abbrev1.attributes.len(), read_abbrev1.attributes().len()); + assert_eq!( + abbrev1.attributes[0].name, + read_abbrev1.attributes()[0].name() + ); + assert_eq!( + abbrev1.attributes[0].form, + read_abbrev1.attributes()[0].form() + ); + + let read_abbrev2 = read_abbrevs.get(code2).unwrap(); + assert_eq!(abbrev2.tag, read_abbrev2.tag()); + assert_eq!(abbrev2.has_children, read_abbrev2.has_children()); + assert_eq!(abbrev2.attributes.len(), read_abbrev2.attributes().len()); + assert_eq!( + abbrev2.attributes[0].name, + read_abbrev2.attributes()[0].name() + ); + assert_eq!( + abbrev2.attributes[0].form, + read_abbrev2.attributes()[0].form() + ); + assert_eq!( + abbrev2.attributes[1].name, + read_abbrev2.attributes()[1].name() + ); + assert_eq!( + abbrev2.attributes[1].form, + read_abbrev2.attributes()[1].form() + ); + } +} diff --git a/third_party/rust/gimli/src/write/cfi.rs b/third_party/rust/gimli/src/write/cfi.rs new file mode 100644 index 000000000000..d1339615f914 --- /dev/null +++ b/third_party/rust/gimli/src/write/cfi.rs @@ -0,0 +1,1071 @@ +use alloc::vec::Vec; +use indexmap::IndexSet; +use std::ops::{Deref, DerefMut}; + +use crate::common::{DebugFrameOffset, EhFrameOffset, Encoding, Format, Register, SectionId}; +use crate::constants; +use crate::write::{Address, BaseId, Error, Expression, Result, Section, Writer}; + +define_section!( + DebugFrame, + DebugFrameOffset, + "A writable `.debug_frame` section." +); + +define_section!(EhFrame, EhFrameOffset, "A writable `.eh_frame` section."); + +define_id!(CieId, "An identifier for a CIE in a `FrameTable`."); + +/// A table of frame description entries. +#[derive(Debug, Default)] +pub struct FrameTable { + /// Base id for CIEs. + base_id: BaseId, + /// The common information entries. + cies: IndexSet, + /// The frame description entries. + fdes: Vec<(CieId, FrameDescriptionEntry)>, +} + +impl FrameTable { + /// Add a CIE and return its id. + /// + /// If the CIE already exists, then return the id of the existing CIE. + pub fn add_cie(&mut self, cie: CommonInformationEntry) -> CieId { + let (index, _) = self.cies.insert_full(cie); + CieId::new(self.base_id, index) + } + + /// The number of CIEs. + pub fn cie_count(&self) -> usize { + self.cies.len() + } + + /// Add a FDE. + /// + /// Does not check for duplicates. + /// + /// # Panics + /// + /// Panics if the CIE id is invalid. + pub fn add_fde(&mut self, cie: CieId, fde: FrameDescriptionEntry) { + debug_assert_eq!(self.base_id, cie.base_id); + self.fdes.push((cie, fde)); + } + + /// The number of FDEs. + pub fn fde_count(&self) -> usize { + self.fdes.len() + } + + /// Write the frame table entries to the given `.debug_frame` section. + pub fn write_debug_frame(&self, w: &mut DebugFrame) -> Result<()> { + self.write(&mut w.0, false) + } + + /// Write the frame table entries to the given `.eh_frame` section. + pub fn write_eh_frame(&self, w: &mut EhFrame) -> Result<()> { + self.write(&mut w.0, true) + } + + fn write(&self, w: &mut W, eh_frame: bool) -> Result<()> { + let mut cie_offsets = vec![None; self.cies.len()]; + for (cie_id, fde) in &self.fdes { + let cie_index = cie_id.index; + let cie = self.cies.get_index(cie_index).unwrap(); + let cie_offset = match cie_offsets[cie_index] { + Some(offset) => offset, + None => { + // Only write CIEs as they are referenced. + let offset = cie.write(w, eh_frame)?; + cie_offsets[cie_index] = Some(offset); + offset + } + }; + + fde.write(w, eh_frame, cie_offset, cie)?; + } + // TODO: write length 0 terminator for eh_frame? + Ok(()) + } +} + +/// A common information entry. This contains information that is shared between FDEs. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct CommonInformationEntry { + encoding: Encoding, + + /// A constant that is factored out of code offsets. + /// + /// This should be set to the minimum instruction length. + /// Writing a code offset that is not a multiple of this factor will generate an error. + code_alignment_factor: u8, + + /// A constant that is factored out of data offsets. + /// + /// This should be set to the minimum data alignment for the frame. + /// Writing a data offset that is not a multiple of this factor will generate an error. + data_alignment_factor: i8, + + /// The return address register. This might not correspond to an actual machine register. + return_address_register: Register, + + /// The address of the personality function and its encoding. + pub personality: Option<(constants::DwEhPe, Address)>, + + /// The encoding to use for the LSDA address in FDEs. + /// + /// If set then all FDEs which use this CIE must have a LSDA address. + pub lsda_encoding: Option, + + /// The encoding to use for addresses in FDEs. + pub fde_address_encoding: constants::DwEhPe, + + /// True for signal trampolines. + pub signal_trampoline: bool, + + /// The initial instructions upon entry to this function. + instructions: Vec, +} + +impl CommonInformationEntry { + /// Create a new common information entry. + /// + /// The encoding version must be a CFI version, not a DWARF version. + pub fn new( + encoding: Encoding, + code_alignment_factor: u8, + data_alignment_factor: i8, + return_address_register: Register, + ) -> Self { + CommonInformationEntry { + encoding, + code_alignment_factor, + data_alignment_factor, + return_address_register, + personality: None, + lsda_encoding: None, + fde_address_encoding: constants::DW_EH_PE_absptr, + signal_trampoline: false, + instructions: Vec::new(), + } + } + + /// Add an initial instruction. + pub fn add_instruction(&mut self, instruction: CallFrameInstruction) { + self.instructions.push(instruction); + } + + fn has_augmentation(&self) -> bool { + self.personality.is_some() + || self.lsda_encoding.is_some() + || self.signal_trampoline + || self.fde_address_encoding != constants::DW_EH_PE_absptr + } + + /// Returns the section offset of the CIE. + fn write(&self, w: &mut W, eh_frame: bool) -> Result { + let encoding = self.encoding; + let offset = w.len(); + + let length_offset = w.write_initial_length(encoding.format)?; + let length_base = w.len(); + + if eh_frame { + w.write_u32(0)?; + } else { + match encoding.format { + Format::Dwarf32 => w.write_u32(0xffff_ffff)?, + Format::Dwarf64 => w.write_u64(0xffff_ffff_ffff_ffff)?, + } + } + + if eh_frame { + if encoding.version != 1 { + return Err(Error::UnsupportedVersion(encoding.version)); + }; + } else { + match encoding.version { + 1 | 3 | 4 => {} + _ => return Err(Error::UnsupportedVersion(encoding.version)), + }; + } + w.write_u8(encoding.version as u8)?; + + let augmentation = self.has_augmentation(); + if augmentation { + w.write_u8(b'z')?; + if self.lsda_encoding.is_some() { + w.write_u8(b'L')?; + } + if self.personality.is_some() { + w.write_u8(b'P')?; + } + if self.fde_address_encoding != constants::DW_EH_PE_absptr { + w.write_u8(b'R')?; + } + if self.signal_trampoline { + w.write_u8(b'S')?; + } + } + w.write_u8(0)?; + + if encoding.version >= 4 { + w.write_u8(encoding.address_size)?; + // TODO: segment_selector_size + w.write_u8(0)?; + } + + w.write_uleb128(self.code_alignment_factor.into())?; + w.write_sleb128(self.data_alignment_factor.into())?; + + if !eh_frame && encoding.version == 1 { + let register = self.return_address_register.0 as u8; + if u16::from(register) != self.return_address_register.0 { + return Err(Error::ValueTooLarge); + } + w.write_u8(register)?; + } else { + w.write_uleb128(self.return_address_register.0.into())?; + } + + if augmentation { + let augmentation_length_offset = w.len(); + w.write_u8(0)?; + let augmentation_length_base = w.len(); + + if let Some(eh_pe) = self.lsda_encoding { + w.write_u8(eh_pe.0)?; + } + if let Some((eh_pe, address)) = self.personality { + w.write_u8(eh_pe.0)?; + w.write_eh_pointer(address, eh_pe, encoding.address_size)?; + } + if self.fde_address_encoding != constants::DW_EH_PE_absptr { + w.write_u8(self.fde_address_encoding.0)?; + } + + let augmentation_length = (w.len() - augmentation_length_base) as u64; + debug_assert!(augmentation_length < 0x80); + w.write_udata_at(augmentation_length_offset, augmentation_length, 1)?; + } + + for instruction in &self.instructions { + instruction.write(w, encoding, self)?; + } + + write_nop( + w, + encoding.format.word_size() as usize + w.len() - length_base, + encoding.address_size, + )?; + + let length = (w.len() - length_base) as u64; + w.write_initial_length_at(length_offset, length, encoding.format)?; + + Ok(offset) + } +} + +/// A frame description entry. There should be one FDE per function. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FrameDescriptionEntry { + /// The initial address of the function. + address: Address, + + /// The length in bytes of the function. + length: u32, + + /// The address of the LSDA. + pub lsda: Option
, + + /// The instructions for this function, ordered by offset. + instructions: Vec<(u32, CallFrameInstruction)>, +} + +impl FrameDescriptionEntry { + /// Create a new frame description entry for a function. + pub fn new(address: Address, length: u32) -> Self { + FrameDescriptionEntry { + address, + length, + lsda: None, + instructions: Vec::new(), + } + } + + /// Add an instruction. + /// + /// Instructions must be added in increasing order of offset, or writing will fail. + pub fn add_instruction(&mut self, offset: u32, instruction: CallFrameInstruction) { + debug_assert!(self.instructions.last().map(|x| x.0).unwrap_or(0) <= offset); + self.instructions.push((offset, instruction)); + } + + fn write( + &self, + w: &mut W, + eh_frame: bool, + cie_offset: usize, + cie: &CommonInformationEntry, + ) -> Result<()> { + let encoding = cie.encoding; + let length_offset = w.write_initial_length(encoding.format)?; + let length_base = w.len(); + + if eh_frame { + // .eh_frame uses a relative offset which doesn't need relocation. + w.write_udata((w.len() - cie_offset) as u64, 4)?; + } else { + w.write_offset( + cie_offset, + SectionId::DebugFrame, + encoding.format.word_size(), + )?; + } + + if cie.fde_address_encoding != constants::DW_EH_PE_absptr { + w.write_eh_pointer( + self.address, + cie.fde_address_encoding, + encoding.address_size, + )?; + w.write_eh_pointer_data( + self.length.into(), + cie.fde_address_encoding.format(), + encoding.address_size, + )?; + } else { + w.write_address(self.address, encoding.address_size)?; + w.write_udata(self.length.into(), encoding.address_size)?; + } + + if cie.has_augmentation() { + let augmentation_length_offset = w.len(); + w.write_u8(0)?; + let augmentation_length_base = w.len(); + + debug_assert_eq!(self.lsda.is_some(), cie.lsda_encoding.is_some()); + if let (Some(lsda), Some(lsda_encoding)) = (self.lsda, cie.lsda_encoding) { + w.write_eh_pointer(lsda, lsda_encoding, encoding.address_size)?; + } + + let augmentation_length = (w.len() - augmentation_length_base) as u64; + debug_assert!(augmentation_length < 0x80); + w.write_udata_at(augmentation_length_offset, augmentation_length, 1)?; + } + + let mut prev_offset = 0; + for (offset, instruction) in &self.instructions { + write_advance_loc(w, cie.code_alignment_factor, prev_offset, *offset)?; + prev_offset = *offset; + instruction.write(w, encoding, cie)?; + } + + write_nop( + w, + encoding.format.word_size() as usize + w.len() - length_base, + encoding.address_size, + )?; + + let length = (w.len() - length_base) as u64; + w.write_initial_length_at(length_offset, length, encoding.format)?; + + Ok(()) + } +} + +/// An instruction in a frame description entry. +/// +/// This may be a CFA definition, a register rule, or some other directive. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum CallFrameInstruction { + /// Define the CFA rule to use the provided register and offset. + Cfa(Register, i32), + /// Update the CFA rule to use the provided register. The offset is unchanged. + CfaRegister(Register), + /// Update the CFA rule to use the provided offset. The register is unchanged. + CfaOffset(i32), + /// Define the CFA rule to use the provided expression. + CfaExpression(Expression), + + /// Restore the initial rule for the register. + Restore(Register), + /// The previous value of the register is not recoverable. + Undefined(Register), + /// The register has not been modified. + SameValue(Register), + /// The previous value of the register is saved at address CFA + offset. + Offset(Register, i32), + /// The previous value of the register is CFA + offset. + ValOffset(Register, i32), + /// The previous value of the register is stored in another register. + Register(Register, Register), + /// The previous value of the register is saved at address given by the expression. + Expression(Register, Expression), + /// The previous value of the register is given by the expression. + ValExpression(Register, Expression), + + /// Push all register rules onto a stack. + RememberState, + /// Pop all register rules off the stack. + RestoreState, + /// The size of the arguments that have been pushed onto the stack. + ArgsSize(u32), + + /// AAarch64 extension: negate the `RA_SIGN_STATE` pseudo-register. + NegateRaState, +} + +impl CallFrameInstruction { + fn write( + &self, + w: &mut W, + encoding: Encoding, + cie: &CommonInformationEntry, + ) -> Result<()> { + match *self { + CallFrameInstruction::Cfa(register, offset) => { + if offset < 0 { + let offset = factored_data_offset(offset, cie.data_alignment_factor)?; + w.write_u8(constants::DW_CFA_def_cfa_sf.0)?; + w.write_uleb128(register.0.into())?; + w.write_sleb128(offset.into())?; + } else { + // Unfactored offset. + w.write_u8(constants::DW_CFA_def_cfa.0)?; + w.write_uleb128(register.0.into())?; + w.write_uleb128(offset as u64)?; + } + } + CallFrameInstruction::CfaRegister(register) => { + w.write_u8(constants::DW_CFA_def_cfa_register.0)?; + w.write_uleb128(register.0.into())?; + } + CallFrameInstruction::CfaOffset(offset) => { + if offset < 0 { + let offset = factored_data_offset(offset, cie.data_alignment_factor)?; + w.write_u8(constants::DW_CFA_def_cfa_offset_sf.0)?; + w.write_sleb128(offset.into())?; + } else { + // Unfactored offset. + w.write_u8(constants::DW_CFA_def_cfa_offset.0)?; + w.write_uleb128(offset as u64)?; + } + } + CallFrameInstruction::CfaExpression(ref expression) => { + w.write_u8(constants::DW_CFA_def_cfa_expression.0)?; + w.write_uleb128(expression.size(encoding, None) as u64)?; + expression.write(w, None, encoding, None)?; + } + CallFrameInstruction::Restore(register) => { + if register.0 < 0x40 { + w.write_u8(constants::DW_CFA_restore.0 | register.0 as u8)?; + } else { + w.write_u8(constants::DW_CFA_restore_extended.0)?; + w.write_uleb128(register.0.into())?; + } + } + CallFrameInstruction::Undefined(register) => { + w.write_u8(constants::DW_CFA_undefined.0)?; + w.write_uleb128(register.0.into())?; + } + CallFrameInstruction::SameValue(register) => { + w.write_u8(constants::DW_CFA_same_value.0)?; + w.write_uleb128(register.0.into())?; + } + CallFrameInstruction::Offset(register, offset) => { + let offset = factored_data_offset(offset, cie.data_alignment_factor)?; + if offset < 0 { + w.write_u8(constants::DW_CFA_offset_extended_sf.0)?; + w.write_uleb128(register.0.into())?; + w.write_sleb128(offset.into())?; + } else if register.0 < 0x40 { + w.write_u8(constants::DW_CFA_offset.0 | register.0 as u8)?; + w.write_uleb128(offset as u64)?; + } else { + w.write_u8(constants::DW_CFA_offset_extended.0)?; + w.write_uleb128(register.0.into())?; + w.write_uleb128(offset as u64)?; + } + } + CallFrameInstruction::ValOffset(register, offset) => { + let offset = factored_data_offset(offset, cie.data_alignment_factor)?; + if offset < 0 { + w.write_u8(constants::DW_CFA_val_offset_sf.0)?; + w.write_uleb128(register.0.into())?; + w.write_sleb128(offset.into())?; + } else { + w.write_u8(constants::DW_CFA_val_offset.0)?; + w.write_uleb128(register.0.into())?; + w.write_uleb128(offset as u64)?; + } + } + CallFrameInstruction::Register(register1, register2) => { + w.write_u8(constants::DW_CFA_register.0)?; + w.write_uleb128(register1.0.into())?; + w.write_uleb128(register2.0.into())?; + } + CallFrameInstruction::Expression(register, ref expression) => { + w.write_u8(constants::DW_CFA_expression.0)?; + w.write_uleb128(register.0.into())?; + w.write_uleb128(expression.size(encoding, None) as u64)?; + expression.write(w, None, encoding, None)?; + } + CallFrameInstruction::ValExpression(register, ref expression) => { + w.write_u8(constants::DW_CFA_val_expression.0)?; + w.write_uleb128(register.0.into())?; + w.write_uleb128(expression.size(encoding, None) as u64)?; + expression.write(w, None, encoding, None)?; + } + CallFrameInstruction::RememberState => { + w.write_u8(constants::DW_CFA_remember_state.0)?; + } + CallFrameInstruction::RestoreState => { + w.write_u8(constants::DW_CFA_restore_state.0)?; + } + CallFrameInstruction::ArgsSize(size) => { + w.write_u8(constants::DW_CFA_GNU_args_size.0)?; + w.write_uleb128(size.into())?; + } + CallFrameInstruction::NegateRaState => { + w.write_u8(constants::DW_CFA_AARCH64_negate_ra_state.0)?; + } + } + Ok(()) + } +} + +fn write_advance_loc( + w: &mut W, + code_alignment_factor: u8, + prev_offset: u32, + offset: u32, +) -> Result<()> { + if offset == prev_offset { + return Ok(()); + } + let delta = factored_code_delta(prev_offset, offset, code_alignment_factor)?; + if delta < 0x40 { + w.write_u8(constants::DW_CFA_advance_loc.0 | delta as u8)?; + } else if delta < 0x100 { + w.write_u8(constants::DW_CFA_advance_loc1.0)?; + w.write_u8(delta as u8)?; + } else if delta < 0x10000 { + w.write_u8(constants::DW_CFA_advance_loc2.0)?; + w.write_u16(delta as u16)?; + } else { + w.write_u8(constants::DW_CFA_advance_loc4.0)?; + w.write_u32(delta)?; + } + Ok(()) +} + +fn write_nop(w: &mut W, len: usize, align: u8) -> Result<()> { + debug_assert_eq!(align & (align - 1), 0); + let tail_len = (!len + 1) & (align as usize - 1); + for _ in 0..tail_len { + w.write_u8(constants::DW_CFA_nop.0)?; + } + Ok(()) +} + +fn factored_code_delta(prev_offset: u32, offset: u32, factor: u8) -> Result { + if offset < prev_offset { + return Err(Error::InvalidFrameCodeOffset(offset)); + } + let delta = offset - prev_offset; + let factor = u32::from(factor); + let factored_delta = delta / factor; + if delta != factored_delta * factor { + return Err(Error::InvalidFrameCodeOffset(offset)); + } + Ok(factored_delta) +} + +fn factored_data_offset(offset: i32, factor: i8) -> Result { + let factor = i32::from(factor); + let factored_offset = offset / factor; + if offset != factored_offset * factor { + return Err(Error::InvalidFrameDataOffset(offset)); + } + Ok(factored_offset) +} + +#[cfg(feature = "read")] +pub(crate) mod convert { + use super::*; + use crate::read::{self, Reader}; + use crate::write::{ConvertError, ConvertResult}; + use std::collections::{hash_map, HashMap}; + + impl FrameTable { + /// Create a frame table by reading the data in the given section. + /// + /// `convert_address` is a function to convert read addresses into the `Address` + /// type. For non-relocatable addresses, this function may simply return + /// `Address::Constant(address)`. For relocatable addresses, it is the caller's + /// responsibility to determine the symbol and addend corresponding to the address + /// and return `Address::Symbol { symbol, addend }`. + pub fn from( + frame: &Section, + convert_address: &dyn Fn(u64) -> Option
, + ) -> ConvertResult + where + R: Reader, + Section: read::UnwindSection, + Section::Offset: read::UnwindOffset, + { + let bases = read::BaseAddresses::default().set_eh_frame(0); + + let mut frame_table = FrameTable::default(); + + let mut cie_ids = HashMap::new(); + let mut entries = frame.entries(&bases); + while let Some(entry) = entries.next()? { + let partial = match entry { + read::CieOrFde::Cie(_) => continue, + read::CieOrFde::Fde(partial) => partial, + }; + + // TODO: is it worth caching the parsed CIEs? It would be better if FDEs only + // stored a reference. + let from_fde = partial.parse(Section::cie_from_offset)?; + let from_cie = from_fde.cie(); + let cie_id = match cie_ids.entry(from_cie.offset()) { + hash_map::Entry::Occupied(o) => *o.get(), + hash_map::Entry::Vacant(e) => { + let cie = + CommonInformationEntry::from(from_cie, frame, &bases, convert_address)?; + let cie_id = frame_table.add_cie(cie); + e.insert(cie_id); + cie_id + } + }; + let fde = FrameDescriptionEntry::from(&from_fde, frame, &bases, convert_address)?; + frame_table.add_fde(cie_id, fde); + } + + Ok(frame_table) + } + } + + impl CommonInformationEntry { + fn from( + from_cie: &read::CommonInformationEntry, + frame: &Section, + bases: &read::BaseAddresses, + convert_address: &dyn Fn(u64) -> Option
, + ) -> ConvertResult + where + R: Reader, + Section: read::UnwindSection, + Section::Offset: read::UnwindOffset, + { + let mut cie = CommonInformationEntry::new( + from_cie.encoding(), + from_cie.code_alignment_factor() as u8, + from_cie.data_alignment_factor() as i8, + from_cie.return_address_register(), + ); + + cie.personality = match from_cie.personality_with_encoding() { + // We treat these the same because the encoding already determines + // whether it is indirect. + Some((eh_pe, read::Pointer::Direct(p))) + | Some((eh_pe, read::Pointer::Indirect(p))) => { + let address = convert_address(p).ok_or(ConvertError::InvalidAddress)?; + Some((eh_pe, address)) + } + _ => None, + }; + cie.lsda_encoding = from_cie.lsda_encoding(); + cie.fde_address_encoding = from_cie + .fde_address_encoding() + .unwrap_or(constants::DW_EH_PE_absptr); + cie.signal_trampoline = from_cie.is_signal_trampoline(); + + let mut offset = 0; + let mut from_instructions = from_cie.instructions(frame, bases); + while let Some(from_instruction) = from_instructions.next()? { + if let Some(instruction) = CallFrameInstruction::from( + from_instruction, + from_cie, + frame, + convert_address, + &mut offset, + )? { + cie.instructions.push(instruction); + } + } + Ok(cie) + } + } + + impl FrameDescriptionEntry { + fn from( + from_fde: &read::FrameDescriptionEntry, + frame: &Section, + bases: &read::BaseAddresses, + convert_address: &dyn Fn(u64) -> Option
, + ) -> ConvertResult + where + R: Reader, + Section: read::UnwindSection, + Section::Offset: read::UnwindOffset, + { + let address = + convert_address(from_fde.initial_address()).ok_or(ConvertError::InvalidAddress)?; + let length = from_fde.len() as u32; + let mut fde = FrameDescriptionEntry::new(address, length); + + match from_fde.lsda() { + // We treat these the same because the encoding already determines + // whether it is indirect. + Some(read::Pointer::Direct(p)) | Some(read::Pointer::Indirect(p)) => { + let address = convert_address(p).ok_or(ConvertError::InvalidAddress)?; + fde.lsda = Some(address); + } + None => {} + } + + let from_cie = from_fde.cie(); + let mut offset = 0; + let mut from_instructions = from_fde.instructions(frame, bases); + while let Some(from_instruction) = from_instructions.next()? { + if let Some(instruction) = CallFrameInstruction::from( + from_instruction, + from_cie, + frame, + convert_address, + &mut offset, + )? { + fde.instructions.push((offset, instruction)); + } + } + + Ok(fde) + } + } + + impl CallFrameInstruction { + fn from( + from_instruction: read::CallFrameInstruction, + from_cie: &read::CommonInformationEntry, + frame: &Section, + convert_address: &dyn Fn(u64) -> Option
, + offset: &mut u32, + ) -> ConvertResult> + where + R: Reader, + Section: read::UnwindSection, + { + let convert_expression = + |x| Expression::from(x, from_cie.encoding(), None, None, None, convert_address); + // TODO: validate integer type conversions + Ok(Some(match from_instruction { + read::CallFrameInstruction::SetLoc { .. } => { + return Err(ConvertError::UnsupportedCfiInstruction); + } + read::CallFrameInstruction::AdvanceLoc { delta } => { + *offset += delta * from_cie.code_alignment_factor() as u32; + return Ok(None); + } + read::CallFrameInstruction::DefCfa { register, offset } => { + CallFrameInstruction::Cfa(register, offset as i32) + } + read::CallFrameInstruction::DefCfaSf { + register, + factored_offset, + } => { + let offset = factored_offset * from_cie.data_alignment_factor(); + CallFrameInstruction::Cfa(register, offset as i32) + } + read::CallFrameInstruction::DefCfaRegister { register } => { + CallFrameInstruction::CfaRegister(register) + } + + read::CallFrameInstruction::DefCfaOffset { offset } => { + CallFrameInstruction::CfaOffset(offset as i32) + } + read::CallFrameInstruction::DefCfaOffsetSf { factored_offset } => { + let offset = factored_offset * from_cie.data_alignment_factor(); + CallFrameInstruction::CfaOffset(offset as i32) + } + read::CallFrameInstruction::DefCfaExpression { expression } => { + let expression = expression.get(frame)?; + CallFrameInstruction::CfaExpression(convert_expression(expression)?) + } + read::CallFrameInstruction::Undefined { register } => { + CallFrameInstruction::Undefined(register) + } + read::CallFrameInstruction::SameValue { register } => { + CallFrameInstruction::SameValue(register) + } + read::CallFrameInstruction::Offset { + register, + factored_offset, + } => { + let offset = factored_offset as i64 * from_cie.data_alignment_factor(); + CallFrameInstruction::Offset(register, offset as i32) + } + read::CallFrameInstruction::OffsetExtendedSf { + register, + factored_offset, + } => { + let offset = factored_offset * from_cie.data_alignment_factor(); + CallFrameInstruction::Offset(register, offset as i32) + } + read::CallFrameInstruction::ValOffset { + register, + factored_offset, + } => { + let offset = factored_offset as i64 * from_cie.data_alignment_factor(); + CallFrameInstruction::ValOffset(register, offset as i32) + } + read::CallFrameInstruction::ValOffsetSf { + register, + factored_offset, + } => { + let offset = factored_offset * from_cie.data_alignment_factor(); + CallFrameInstruction::ValOffset(register, offset as i32) + } + read::CallFrameInstruction::Register { + dest_register, + src_register, + } => CallFrameInstruction::Register(dest_register, src_register), + read::CallFrameInstruction::Expression { + register, + expression, + } => { + let expression = expression.get(frame)?; + CallFrameInstruction::Expression(register, convert_expression(expression)?) + } + read::CallFrameInstruction::ValExpression { + register, + expression, + } => { + let expression = expression.get(frame)?; + CallFrameInstruction::ValExpression(register, convert_expression(expression)?) + } + read::CallFrameInstruction::Restore { register } => { + CallFrameInstruction::Restore(register) + } + read::CallFrameInstruction::RememberState => CallFrameInstruction::RememberState, + read::CallFrameInstruction::RestoreState => CallFrameInstruction::RestoreState, + read::CallFrameInstruction::ArgsSize { size } => { + CallFrameInstruction::ArgsSize(size as u32) + } + read::CallFrameInstruction::NegateRaState => CallFrameInstruction::NegateRaState, + read::CallFrameInstruction::Nop => return Ok(None), + })) + } + } +} + +#[cfg(test)] +#[cfg(feature = "read")] +mod tests { + use super::*; + use crate::arch::X86_64; + use crate::read; + use crate::write::EndianVec; + use crate::{LittleEndian, Vendor}; + + #[test] + fn test_frame_table() { + for &version in &[1, 3, 4] { + for &address_size in &[4, 8] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + let mut frames = FrameTable::default(); + + let cie1 = CommonInformationEntry::new(encoding, 1, 8, X86_64::RA); + let cie1_id = frames.add_cie(cie1.clone()); + assert_eq!(cie1_id, frames.add_cie(cie1.clone())); + + let mut cie2 = CommonInformationEntry::new(encoding, 1, 8, X86_64::RA); + cie2.lsda_encoding = Some(constants::DW_EH_PE_absptr); + cie2.personality = + Some((constants::DW_EH_PE_absptr, Address::Constant(0x1234))); + cie2.signal_trampoline = true; + let cie2_id = frames.add_cie(cie2.clone()); + assert_ne!(cie1_id, cie2_id); + assert_eq!(cie2_id, frames.add_cie(cie2.clone())); + + let fde1 = FrameDescriptionEntry::new(Address::Constant(0x1000), 0x10); + frames.add_fde(cie1_id, fde1.clone()); + + let fde2 = FrameDescriptionEntry::new(Address::Constant(0x2000), 0x20); + frames.add_fde(cie1_id, fde2.clone()); + + let mut fde3 = FrameDescriptionEntry::new(Address::Constant(0x3000), 0x30); + fde3.lsda = Some(Address::Constant(0x3300)); + frames.add_fde(cie2_id, fde3.clone()); + + let mut fde4 = FrameDescriptionEntry::new(Address::Constant(0x4000), 0x40); + fde4.lsda = Some(Address::Constant(0x4400)); + frames.add_fde(cie2_id, fde4.clone()); + + let mut cie3 = CommonInformationEntry::new(encoding, 1, 8, X86_64::RA); + cie3.fde_address_encoding = + constants::DW_EH_PE_pcrel | constants::DW_EH_PE_sdata4; + cie3.lsda_encoding = + Some(constants::DW_EH_PE_pcrel | constants::DW_EH_PE_sdata4); + cie3.personality = Some(( + constants::DW_EH_PE_pcrel | constants::DW_EH_PE_sdata4, + Address::Constant(0x1235), + )); + cie3.signal_trampoline = true; + let cie3_id = frames.add_cie(cie3.clone()); + assert_ne!(cie2_id, cie3_id); + assert_eq!(cie3_id, frames.add_cie(cie3.clone())); + + let mut fde5 = FrameDescriptionEntry::new(Address::Constant(0x5000), 0x50); + fde5.lsda = Some(Address::Constant(0x5500)); + frames.add_fde(cie3_id, fde5.clone()); + + // Test writing `.debug_frame`. + let mut debug_frame = DebugFrame::from(EndianVec::new(LittleEndian)); + frames.write_debug_frame(&mut debug_frame).unwrap(); + + let mut read_debug_frame = + read::DebugFrame::new(debug_frame.slice(), LittleEndian); + read_debug_frame.set_address_size(address_size); + let convert_frames = FrameTable::from(&read_debug_frame, &|address| { + Some(Address::Constant(address)) + }) + .unwrap(); + assert_eq!(frames.cies, convert_frames.cies); + assert_eq!(frames.fdes.len(), convert_frames.fdes.len()); + for (a, b) in frames.fdes.iter().zip(convert_frames.fdes.iter()) { + assert_eq!(a.1, b.1); + } + + if version == 1 { + // Test writing `.eh_frame`. + let mut eh_frame = EhFrame::from(EndianVec::new(LittleEndian)); + frames.write_eh_frame(&mut eh_frame).unwrap(); + + let mut read_eh_frame = read::EhFrame::new(eh_frame.slice(), LittleEndian); + read_eh_frame.set_address_size(address_size); + let convert_frames = FrameTable::from(&read_eh_frame, &|address| { + Some(Address::Constant(address)) + }) + .unwrap(); + assert_eq!(frames.cies, convert_frames.cies); + assert_eq!(frames.fdes.len(), convert_frames.fdes.len()); + for (a, b) in frames.fdes.iter().zip(convert_frames.fdes.iter()) { + assert_eq!(a.1, b.1); + } + } + } + } + } + } + + #[test] + fn test_frame_instruction() { + let mut expression = Expression::new(); + expression.op_constu(0); + + let cie_instructions = [ + CallFrameInstruction::Cfa(X86_64::RSP, 8), + CallFrameInstruction::Offset(X86_64::RA, -8), + ]; + + let fde_instructions = [ + (0, CallFrameInstruction::Cfa(X86_64::RSP, 0)), + (0, CallFrameInstruction::Cfa(X86_64::RSP, -8)), + (2, CallFrameInstruction::CfaRegister(X86_64::RBP)), + (4, CallFrameInstruction::CfaOffset(8)), + (4, CallFrameInstruction::CfaOffset(0)), + (4, CallFrameInstruction::CfaOffset(-8)), + (6, CallFrameInstruction::CfaExpression(expression.clone())), + (8, CallFrameInstruction::Restore(Register(1))), + (8, CallFrameInstruction::Restore(Register(101))), + (10, CallFrameInstruction::Undefined(Register(2))), + (12, CallFrameInstruction::SameValue(Register(3))), + (14, CallFrameInstruction::Offset(Register(4), 16)), + (14, CallFrameInstruction::Offset(Register(104), 16)), + (16, CallFrameInstruction::ValOffset(Register(5), -24)), + (16, CallFrameInstruction::ValOffset(Register(5), 24)), + (18, CallFrameInstruction::Register(Register(6), Register(7))), + ( + 20, + CallFrameInstruction::Expression(Register(8), expression.clone()), + ), + ( + 22, + CallFrameInstruction::ValExpression(Register(9), expression.clone()), + ), + (24 + 0x80, CallFrameInstruction::RememberState), + (26 + 0x280, CallFrameInstruction::RestoreState), + (28 + 0x20280, CallFrameInstruction::ArgsSize(23)), + ]; + + let fde_instructions_aarch64 = [(0, CallFrameInstruction::NegateRaState)]; + + for &version in &[1, 3, 4] { + for &address_size in &[4, 8] { + for &vendor in &[Vendor::Default, Vendor::AArch64] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + let mut frames = FrameTable::default(); + + let mut cie = CommonInformationEntry::new(encoding, 2, 8, X86_64::RA); + for i in &cie_instructions { + cie.add_instruction(i.clone()); + } + let cie_id = frames.add_cie(cie); + + let mut fde = FrameDescriptionEntry::new(Address::Constant(0x1000), 0x10); + for (o, i) in &fde_instructions { + fde.add_instruction(*o, i.clone()); + } + frames.add_fde(cie_id, fde); + + if vendor == Vendor::AArch64 { + let mut fde = + FrameDescriptionEntry::new(Address::Constant(0x2000), 0x10); + for (o, i) in &fde_instructions_aarch64 { + fde.add_instruction(*o, i.clone()); + } + frames.add_fde(cie_id, fde); + } + + let mut debug_frame = DebugFrame::from(EndianVec::new(LittleEndian)); + frames.write_debug_frame(&mut debug_frame).unwrap(); + + let mut read_debug_frame = + read::DebugFrame::new(debug_frame.slice(), LittleEndian); + read_debug_frame.set_address_size(address_size); + read_debug_frame.set_vendor(vendor); + let frames = FrameTable::from(&read_debug_frame, &|address| { + Some(Address::Constant(address)) + }) + .unwrap(); + + assert_eq!( + &frames.cies.get_index(0).unwrap().instructions, + &cie_instructions + ); + assert_eq!(&frames.fdes[0].1.instructions, &fde_instructions); + if vendor == Vendor::AArch64 { + assert_eq!(&frames.fdes[1].1.instructions, &fde_instructions_aarch64); + } + } + } + } + } + } +} diff --git a/third_party/rust/gimli/src/write/dwarf.rs b/third_party/rust/gimli/src/write/dwarf.rs new file mode 100644 index 000000000000..ea507126a537 --- /dev/null +++ b/third_party/rust/gimli/src/write/dwarf.rs @@ -0,0 +1,138 @@ +use alloc::vec::Vec; + +use crate::common::Encoding; +use crate::write::{ + AbbreviationTable, LineProgram, LineStringTable, Result, Sections, StringTable, Unit, + UnitTable, Writer, +}; + +/// Writable DWARF information for more than one unit. +#[derive(Debug, Default)] +pub struct Dwarf { + /// A table of units. These are primarily stored in the `.debug_info` section, + /// but they also contain information that is stored in other sections. + pub units: UnitTable, + + /// Extra line number programs that are not associated with a unit. + /// + /// These should only be used when generating DWARF5 line-only debug + /// information. + pub line_programs: Vec, + + /// A table of strings that will be stored in the `.debug_line_str` section. + pub line_strings: LineStringTable, + + /// A table of strings that will be stored in the `.debug_str` section. + pub strings: StringTable, +} + +impl Dwarf { + /// Create a new `Dwarf` instance. + #[inline] + pub fn new() -> Self { + Self::default() + } + + /// Write the DWARF information to the given sections. + pub fn write(&mut self, sections: &mut Sections) -> Result<()> { + let line_strings = self.line_strings.write(&mut sections.debug_line_str)?; + let strings = self.strings.write(&mut sections.debug_str)?; + self.units.write(sections, &line_strings, &strings)?; + for line_program in &self.line_programs { + line_program.write( + &mut sections.debug_line, + line_program.encoding(), + &line_strings, + &strings, + )?; + } + Ok(()) + } +} + +/// Writable DWARF information for a single unit. +#[derive(Debug)] +pub struct DwarfUnit { + /// A unit. This is primarily stored in the `.debug_info` section, + /// but also contains information that is stored in other sections. + pub unit: Unit, + + /// A table of strings that will be stored in the `.debug_line_str` section. + pub line_strings: LineStringTable, + + /// A table of strings that will be stored in the `.debug_str` section. + pub strings: StringTable, +} + +impl DwarfUnit { + /// Create a new `DwarfUnit`. + /// + /// Note: you should set `self.unit.line_program` after creation. + /// This cannot be done earlier because it may need to reference + /// `self.line_strings`. + pub fn new(encoding: Encoding) -> Self { + let unit = Unit::new(encoding, LineProgram::none()); + DwarfUnit { + unit, + line_strings: LineStringTable::default(), + strings: StringTable::default(), + } + } + + /// Write the DWARf information to the given sections. + pub fn write(&mut self, sections: &mut Sections) -> Result<()> { + let line_strings = self.line_strings.write(&mut sections.debug_line_str)?; + let strings = self.strings.write(&mut sections.debug_str)?; + + let abbrev_offset = sections.debug_abbrev.offset(); + let mut abbrevs = AbbreviationTable::default(); + + self.unit.write( + sections, + abbrev_offset, + &mut abbrevs, + &line_strings, + &strings, + )?; + // None should exist because we didn't give out any UnitId. + assert!(sections.debug_info_refs.is_empty()); + assert!(sections.debug_loc_refs.is_empty()); + assert!(sections.debug_loclists_refs.is_empty()); + + abbrevs.write(&mut sections.debug_abbrev)?; + Ok(()) + } +} + +#[cfg(feature = "read")] +pub(crate) mod convert { + use super::*; + use crate::read::{self, Reader}; + use crate::write::{Address, ConvertResult}; + + impl Dwarf { + /// Create a `write::Dwarf` by converting a `read::Dwarf`. + /// + /// `convert_address` is a function to convert read addresses into the `Address` + /// type. For non-relocatable addresses, this function may simply return + /// `Address::Constant(address)`. For relocatable addresses, it is the caller's + /// responsibility to determine the symbol and addend corresponding to the address + /// and return `Address::Symbol { symbol, addend }`. + pub fn from>( + dwarf: &read::Dwarf, + convert_address: &dyn Fn(u64) -> Option
, + ) -> ConvertResult { + let mut line_strings = LineStringTable::default(); + let mut strings = StringTable::default(); + let units = UnitTable::from(dwarf, &mut line_strings, &mut strings, convert_address)?; + // TODO: convert the line programs that were not referenced by a unit. + let line_programs = Vec::new(); + Ok(Dwarf { + units, + line_programs, + line_strings, + strings, + }) + } + } +} diff --git a/third_party/rust/gimli/src/write/endian_vec.rs b/third_party/rust/gimli/src/write/endian_vec.rs new file mode 100644 index 000000000000..7b040606a0fb --- /dev/null +++ b/third_party/rust/gimli/src/write/endian_vec.rs @@ -0,0 +1,117 @@ +use alloc::vec::Vec; +use std::mem; + +use crate::endianity::Endianity; +use crate::write::{Error, Result, Writer}; + +/// A `Vec` with endianity metadata. +/// +/// This implements the `Writer` trait, which is used for all writing of DWARF sections. +#[derive(Debug, Clone)] +pub struct EndianVec +where + Endian: Endianity, +{ + vec: Vec, + endian: Endian, +} + +impl EndianVec +where + Endian: Endianity, +{ + /// Construct an empty `EndianVec` with the given endianity. + pub fn new(endian: Endian) -> EndianVec { + EndianVec { + vec: Vec::new(), + endian, + } + } + + /// Return a reference to the raw slice. + pub fn slice(&self) -> &[u8] { + &self.vec + } + + /// Convert into a `Vec`. + pub fn into_vec(self) -> Vec { + self.vec + } + + /// Take any written data out of the `EndianVec`, leaving an empty `Vec` in its place. + pub fn take(&mut self) -> Vec { + let mut vec = Vec::new(); + mem::swap(&mut self.vec, &mut vec); + vec + } +} + +impl Writer for EndianVec +where + Endian: Endianity, +{ + type Endian = Endian; + + #[inline] + fn endian(&self) -> Self::Endian { + self.endian + } + + #[inline] + fn len(&self) -> usize { + self.vec.len() + } + + fn write(&mut self, bytes: &[u8]) -> Result<()> { + self.vec.extend(bytes); + Ok(()) + } + + fn write_at(&mut self, offset: usize, bytes: &[u8]) -> Result<()> { + if offset > self.vec.len() { + return Err(Error::OffsetOutOfBounds); + } + let to = &mut self.vec[offset..]; + if bytes.len() > to.len() { + return Err(Error::LengthOutOfBounds); + } + let to = &mut to[..bytes.len()]; + to.copy_from_slice(bytes); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::LittleEndian; + + #[test] + fn test_endian_vec() { + let mut w = EndianVec::new(LittleEndian); + assert_eq!(w.endian(), LittleEndian); + assert_eq!(w.len(), 0); + + w.write(&[1, 2]).unwrap(); + assert_eq!(w.slice(), &[1, 2]); + assert_eq!(w.len(), 2); + + w.write(&[3, 4, 5]).unwrap(); + assert_eq!(w.slice(), &[1, 2, 3, 4, 5]); + assert_eq!(w.len(), 5); + + w.write_at(0, &[6, 7]).unwrap(); + assert_eq!(w.slice(), &[6, 7, 3, 4, 5]); + assert_eq!(w.len(), 5); + + w.write_at(3, &[8, 9]).unwrap(); + assert_eq!(w.slice(), &[6, 7, 3, 8, 9]); + assert_eq!(w.len(), 5); + + assert_eq!(w.write_at(4, &[6, 7]), Err(Error::LengthOutOfBounds)); + assert_eq!(w.write_at(5, &[6, 7]), Err(Error::LengthOutOfBounds)); + assert_eq!(w.write_at(6, &[6, 7]), Err(Error::OffsetOutOfBounds)); + + assert_eq!(w.into_vec(), vec![6, 7, 3, 8, 9]); + } +} diff --git a/third_party/rust/gimli/src/write/line.rs b/third_party/rust/gimli/src/write/line.rs new file mode 100644 index 000000000000..c78e01e418a9 --- /dev/null +++ b/third_party/rust/gimli/src/write/line.rs @@ -0,0 +1,1957 @@ +use alloc::vec::Vec; +use indexmap::{IndexMap, IndexSet}; +use std::ops::{Deref, DerefMut}; + +use crate::common::{DebugLineOffset, Encoding, Format, LineEncoding, SectionId}; +use crate::constants; +use crate::leb128; +use crate::write::{ + Address, DebugLineStrOffsets, DebugStrOffsets, Error, LineStringId, LineStringTable, Result, + Section, StringId, Writer, +}; + +/// The number assigned to the first special opcode. +// +// We output all instructions for all DWARF versions, since readers +// should be able to ignore instructions they don't support. +const OPCODE_BASE: u8 = 13; + +/// A line number program. +#[derive(Debug, Clone)] +pub struct LineProgram { + /// True if this line program was created with `LineProgram::none()`. + none: bool, + encoding: Encoding, + line_encoding: LineEncoding, + + /// A list of source directory path names. + /// + /// If a path is relative, then the directory is located relative to the working + /// directory of the compilation unit. + /// + /// The first entry is for the working directory of the compilation unit. + directories: IndexSet, + + /// A list of source file entries. + /// + /// Each entry has a path name and a directory. + /// + /// If a path is a relative, then the file is located relative to the + /// directory. Otherwise the directory is meaningless. + /// + /// Does not include comp_file, even for version >= 5. + files: IndexMap<(LineString, DirectoryId), FileInfo>, + + /// The primary source file of the compilation unit. + /// This is required for version >= 5, but we never reference it elsewhere + /// because DWARF defines DW_AT_decl_file=0 to mean not specified. + comp_file: (LineString, FileInfo), + + /// True if the file entries may have valid timestamps. + /// + /// Entries may still have a timestamp of 0 even if this is set. + /// For version <= 4, this is ignored. + /// For version 5, this controls whether to emit `DW_LNCT_timestamp`. + pub file_has_timestamp: bool, + + /// True if the file entries may have valid sizes. + /// + /// Entries may still have a size of 0 even if this is set. + /// For version <= 4, this is ignored. + /// For version 5, this controls whether to emit `DW_LNCT_size`. + pub file_has_size: bool, + + /// True if the file entries have valid MD5 checksums. + /// + /// For version <= 4, this is ignored. + /// For version 5, this controls whether to emit `DW_LNCT_MD5`. + pub file_has_md5: bool, + + prev_row: LineRow, + row: LineRow, + // TODO: this probably should be either rows or sequences instead + instructions: Vec, + in_sequence: bool, +} + +impl LineProgram { + /// Create a new `LineProgram`. + /// + /// `comp_dir` defines the working directory of the compilation unit, + /// and must be the same as the `DW_AT_comp_dir` attribute + /// of the compilation unit DIE. + /// + /// `comp_file` and `comp_file_info` define the primary source file + /// of the compilation unit and must be the same as the `DW_AT_name` + /// attribute of the compilation unit DIE. + /// + /// # Panics + /// + /// Panics if `line_encoding.line_base` > 0. + /// + /// Panics if `line_encoding.line_base` + `line_encoding.line_range` <= 0. + /// + /// Panics if `comp_dir` is empty or contains a null byte. + /// + /// Panics if `comp_file` is empty or contains a null byte. + pub fn new( + encoding: Encoding, + line_encoding: LineEncoding, + comp_dir: LineString, + comp_file: LineString, + comp_file_info: Option, + ) -> LineProgram { + // We require a special opcode for a line advance of 0. + // See the debug_asserts in generate_row(). + assert!(line_encoding.line_base <= 0); + assert!(line_encoding.line_base + line_encoding.line_range as i8 > 0); + let mut program = LineProgram { + none: false, + encoding, + line_encoding, + directories: IndexSet::new(), + files: IndexMap::new(), + comp_file: (comp_file, comp_file_info.unwrap_or_default()), + prev_row: LineRow::initial_state(line_encoding), + row: LineRow::initial_state(line_encoding), + instructions: Vec::new(), + in_sequence: false, + file_has_timestamp: false, + file_has_size: false, + file_has_md5: false, + }; + // For all DWARF versions, directory index 0 is comp_dir. + // For version <= 4, the entry is implicit. We still add + // it here so that we use it, but we don't emit it. + program.add_directory(comp_dir); + program + } + + /// Create a new `LineProgram` with no fields set. + /// + /// This can be used when the `LineProgram` will not be used. + /// + /// You should not attempt to add files or line instructions to + /// this line program, or write it to the `.debug_line` section. + pub fn none() -> Self { + let line_encoding = LineEncoding::default(); + LineProgram { + none: true, + encoding: Encoding { + format: Format::Dwarf32, + version: 2, + address_size: 0, + }, + line_encoding, + directories: IndexSet::new(), + files: IndexMap::new(), + comp_file: (LineString::String(Vec::new()), FileInfo::default()), + prev_row: LineRow::initial_state(line_encoding), + row: LineRow::initial_state(line_encoding), + instructions: Vec::new(), + in_sequence: false, + file_has_timestamp: false, + file_has_size: false, + file_has_md5: false, + } + } + + /// Return true if this line program was created with `LineProgram::none()`. + #[inline] + pub fn is_none(&self) -> bool { + self.none + } + + /// Return the encoding parameters for this line program. + #[inline] + pub fn encoding(&self) -> Encoding { + self.encoding + } + + /// Return the DWARF version for this line program. + #[inline] + pub fn version(&self) -> u16 { + self.encoding.version + } + + /// Return the address size in bytes for this line program. + #[inline] + pub fn address_size(&self) -> u8 { + self.encoding.address_size + } + + /// Return the DWARF format for this line program. + #[inline] + pub fn format(&self) -> Format { + self.encoding.format + } + + /// Return the id for the working directory of the compilation unit. + #[inline] + pub fn default_directory(&self) -> DirectoryId { + DirectoryId(0) + } + + /// Add a directory entry and return its id. + /// + /// If the directory already exists, then return the id of the existing entry. + /// + /// If the path is relative, then the directory is located relative to the working + /// directory of the compilation unit. + /// + /// # Panics + /// + /// Panics if `directory` is empty or contains a null byte. + pub fn add_directory(&mut self, directory: LineString) -> DirectoryId { + if let LineString::String(ref val) = directory { + // For DWARF version <= 4, directories must not be empty. + // The first directory isn't emitted so skip the check for it. + if self.encoding.version <= 4 && !self.directories.is_empty() { + assert!(!val.is_empty()); + } + assert!(!val.contains(&0)); + } + let (index, _) = self.directories.insert_full(directory); + DirectoryId(index) + } + + /// Get a reference to a directory entry. + /// + /// # Panics + /// + /// Panics if `id` is invalid. + pub fn get_directory(&self, id: DirectoryId) -> &LineString { + self.directories.get_index(id.0).unwrap() + } + + /// Add a file entry and return its id. + /// + /// If the file already exists, then return the id of the existing entry. + /// + /// If the file path is relative, then the file is located relative + /// to the directory. Otherwise the directory is meaningless, but it + /// is still used as a key for file entries. + /// + /// If `info` is `None`, then new entries are assigned + /// default information, and existing entries are unmodified. + /// + /// If `info` is not `None`, then it is always assigned to the + /// entry, even if the entry already exists. + /// + /// # Panics + /// + /// Panics if 'file' is empty or contains a null byte. + pub fn add_file( + &mut self, + file: LineString, + directory: DirectoryId, + info: Option, + ) -> FileId { + if let LineString::String(ref val) = file { + assert!(!val.is_empty()); + assert!(!val.contains(&0)); + } + + let key = (file, directory); + let index = if let Some(info) = info { + let (index, _) = self.files.insert_full(key, info); + index + } else { + let entry = self.files.entry(key); + let index = entry.index(); + entry.or_default(); + index + }; + FileId::new(index) + } + + /// Get a reference to a file entry. + /// + /// # Panics + /// + /// Panics if `id` is invalid. + pub fn get_file(&self, id: FileId) -> (&LineString, DirectoryId) { + match id.index() { + None => (&self.comp_file.0, DirectoryId(0)), + Some(index) => self + .files + .get_index(index) + .map(|entry| (&(entry.0).0, (entry.0).1)) + .unwrap(), + } + } + + /// Get a reference to the info for a file entry. + /// + /// # Panics + /// + /// Panics if `id` is invalid. + pub fn get_file_info(&self, id: FileId) -> &FileInfo { + match id.index() { + None => &self.comp_file.1, + Some(index) => self.files.get_index(index).map(|entry| entry.1).unwrap(), + } + } + + /// Get a mutable reference to the info for a file entry. + /// + /// # Panics + /// + /// Panics if `id` is invalid. + pub fn get_file_info_mut(&mut self, id: FileId) -> &mut FileInfo { + match id.index() { + None => &mut self.comp_file.1, + Some(index) => self + .files + .get_index_mut(index) + .map(|entry| entry.1) + .unwrap(), + } + } + + /// Begin a new sequence and set its base address. + /// + /// # Panics + /// + /// Panics if a sequence has already begun. + pub fn begin_sequence(&mut self, address: Option
) { + assert!(!self.in_sequence); + self.in_sequence = true; + if let Some(address) = address { + self.instructions.push(LineInstruction::SetAddress(address)); + } + } + + /// End the sequence, and reset the row to its default values. + /// + /// Only the `address_offset` and op_index` fields of the current row are used. + /// + /// # Panics + /// + /// Panics if a sequence has not begun. + pub fn end_sequence(&mut self, address_offset: u64) { + assert!(self.in_sequence); + self.in_sequence = false; + self.row.address_offset = address_offset; + let op_advance = self.op_advance(); + if op_advance != 0 { + self.instructions + .push(LineInstruction::AdvancePc(op_advance)); + } + self.instructions.push(LineInstruction::EndSequence); + self.prev_row = LineRow::initial_state(self.line_encoding); + self.row = LineRow::initial_state(self.line_encoding); + } + + /// Return true if a sequence has begun. + #[inline] + pub fn in_sequence(&self) -> bool { + self.in_sequence + } + + /// Returns a reference to the data for the current row. + #[inline] + pub fn row(&mut self) -> &mut LineRow { + &mut self.row + } + + /// Generates the line number information instructions for the current row. + /// + /// After the instructions are generated, it sets `discriminator` to 0, and sets + /// `basic_block`, `prologue_end`, and `epilogue_begin` to false. + /// + /// # Panics + /// + /// Panics if a sequence has not begun. + /// Panics if the address_offset decreases. + pub fn generate_row(&mut self) { + assert!(self.in_sequence); + + // Output fields that are reset on every row. + if self.row.discriminator != 0 { + self.instructions + .push(LineInstruction::SetDiscriminator(self.row.discriminator)); + self.row.discriminator = 0; + } + if self.row.basic_block { + self.instructions.push(LineInstruction::SetBasicBlock); + self.row.basic_block = false; + } + if self.row.prologue_end { + self.instructions.push(LineInstruction::SetPrologueEnd); + self.row.prologue_end = false; + } + if self.row.epilogue_begin { + self.instructions.push(LineInstruction::SetEpilogueBegin); + self.row.epilogue_begin = false; + } + + // Output fields that are not reset on every row. + if self.row.is_statement != self.prev_row.is_statement { + self.instructions.push(LineInstruction::NegateStatement); + } + if self.row.file != self.prev_row.file { + self.instructions + .push(LineInstruction::SetFile(self.row.file)); + } + if self.row.column != self.prev_row.column { + self.instructions + .push(LineInstruction::SetColumn(self.row.column)); + } + if self.row.isa != self.prev_row.isa { + self.instructions + .push(LineInstruction::SetIsa(self.row.isa)); + } + + // Advance the line, address, and operation index. + let line_base = i64::from(self.line_encoding.line_base) as u64; + let line_range = u64::from(self.line_encoding.line_range); + let line_advance = self.row.line as i64 - self.prev_row.line as i64; + let op_advance = self.op_advance(); + + // Default to special advances of 0. + let special_base = u64::from(OPCODE_BASE); + // TODO: handle lack of special opcodes for 0 line advance + debug_assert!(self.line_encoding.line_base <= 0); + debug_assert!(self.line_encoding.line_base + self.line_encoding.line_range as i8 >= 0); + let special_default = special_base.wrapping_sub(line_base); + let mut special = special_default; + let mut use_special = false; + + if line_advance != 0 { + let special_line = (line_advance as u64).wrapping_sub(line_base); + if special_line < line_range { + special = special_base + special_line; + use_special = true; + } else { + self.instructions + .push(LineInstruction::AdvanceLine(line_advance)); + } + } + + if op_advance != 0 { + // Using ConstAddPc can save a byte. + let (special_op_advance, const_add_pc) = if special + op_advance * line_range <= 255 { + (op_advance, false) + } else { + let op_range = (255 - special_base) / line_range; + (op_advance - op_range, true) + }; + + let special_op = special_op_advance * line_range; + if special + special_op <= 255 { + special += special_op; + use_special = true; + if const_add_pc { + self.instructions.push(LineInstruction::ConstAddPc); + } + } else { + self.instructions + .push(LineInstruction::AdvancePc(op_advance)); + } + } + + if use_special && special != special_default { + debug_assert!(special >= special_base); + debug_assert!(special <= 255); + self.instructions + .push(LineInstruction::Special(special as u8)); + } else { + self.instructions.push(LineInstruction::Copy); + } + + self.prev_row = self.row; + } + + fn op_advance(&self) -> u64 { + debug_assert!(self.row.address_offset >= self.prev_row.address_offset); + let mut address_advance = self.row.address_offset - self.prev_row.address_offset; + if self.line_encoding.minimum_instruction_length != 1 { + debug_assert_eq!( + self.row.address_offset % u64::from(self.line_encoding.minimum_instruction_length), + 0 + ); + address_advance /= u64::from(self.line_encoding.minimum_instruction_length); + } + address_advance * u64::from(self.line_encoding.maximum_operations_per_instruction) + + self.row.op_index + - self.prev_row.op_index + } + + /// Returns true if the line number program has no instructions. + /// + /// Does not check the file or directory entries. + #[inline] + pub fn is_empty(&self) -> bool { + self.instructions.is_empty() + } + + /// Write the line number program to the given section. + /// + /// # Panics + /// + /// Panics if `self.is_none()`. + pub fn write( + &self, + w: &mut DebugLine, + encoding: Encoding, + debug_line_str_offsets: &DebugLineStrOffsets, + debug_str_offsets: &DebugStrOffsets, + ) -> Result { + assert!(!self.is_none()); + + if encoding.version < self.version() + || encoding.format != self.format() + || encoding.address_size != self.address_size() + { + return Err(Error::IncompatibleLineProgramEncoding); + } + + let offset = w.offset(); + + let length_offset = w.write_initial_length(self.format())?; + let length_base = w.len(); + + if self.version() < 2 || self.version() > 5 { + return Err(Error::UnsupportedVersion(self.version())); + } + w.write_u16(self.version())?; + + if self.version() >= 5 { + w.write_u8(self.address_size())?; + // Segment selector size. + w.write_u8(0)?; + } + + let header_length_offset = w.len(); + w.write_udata(0, self.format().word_size())?; + let header_length_base = w.len(); + + w.write_u8(self.line_encoding.minimum_instruction_length)?; + if self.version() >= 4 { + w.write_u8(self.line_encoding.maximum_operations_per_instruction)?; + } else if self.line_encoding.maximum_operations_per_instruction != 1 { + return Err(Error::NeedVersion(4)); + }; + w.write_u8(if self.line_encoding.default_is_stmt { + 1 + } else { + 0 + })?; + w.write_u8(self.line_encoding.line_base as u8)?; + w.write_u8(self.line_encoding.line_range)?; + w.write_u8(OPCODE_BASE)?; + w.write(&[0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1])?; + + if self.version() <= 4 { + // The first directory is stored as DW_AT_comp_dir. + for dir in self.directories.iter().skip(1) { + dir.write( + w, + constants::DW_FORM_string, + self.encoding, + debug_line_str_offsets, + debug_str_offsets, + )?; + } + w.write_u8(0)?; + + for ((file, dir), info) in self.files.iter() { + file.write( + w, + constants::DW_FORM_string, + self.encoding, + debug_line_str_offsets, + debug_str_offsets, + )?; + w.write_uleb128(dir.0 as u64)?; + w.write_uleb128(info.timestamp)?; + w.write_uleb128(info.size)?; + } + w.write_u8(0)?; + } else { + // Directory entry formats (only ever 1). + w.write_u8(1)?; + w.write_uleb128(u64::from(constants::DW_LNCT_path.0))?; + let dir_form = self.directories.get_index(0).unwrap().form(); + w.write_uleb128(dir_form.0.into())?; + + // Directory entries. + w.write_uleb128(self.directories.len() as u64)?; + for dir in self.directories.iter() { + dir.write( + w, + dir_form, + self.encoding, + debug_line_str_offsets, + debug_str_offsets, + )?; + } + + // File name entry formats. + let count = 2 + + if self.file_has_timestamp { 1 } else { 0 } + + if self.file_has_size { 1 } else { 0 } + + if self.file_has_md5 { 1 } else { 0 }; + w.write_u8(count)?; + w.write_uleb128(u64::from(constants::DW_LNCT_path.0))?; + let file_form = self.comp_file.0.form(); + w.write_uleb128(file_form.0.into())?; + w.write_uleb128(u64::from(constants::DW_LNCT_directory_index.0))?; + w.write_uleb128(constants::DW_FORM_udata.0.into())?; + if self.file_has_timestamp { + w.write_uleb128(u64::from(constants::DW_LNCT_timestamp.0))?; + w.write_uleb128(constants::DW_FORM_udata.0.into())?; + } + if self.file_has_size { + w.write_uleb128(u64::from(constants::DW_LNCT_size.0))?; + w.write_uleb128(constants::DW_FORM_udata.0.into())?; + } + if self.file_has_md5 { + w.write_uleb128(u64::from(constants::DW_LNCT_MD5.0))?; + w.write_uleb128(constants::DW_FORM_data16.0.into())?; + } + + // File name entries. + w.write_uleb128(self.files.len() as u64 + 1)?; + let mut write_file = |file: &LineString, dir: DirectoryId, info: &FileInfo| { + file.write( + w, + file_form, + self.encoding, + debug_line_str_offsets, + debug_str_offsets, + )?; + w.write_uleb128(dir.0 as u64)?; + if self.file_has_timestamp { + w.write_uleb128(info.timestamp)?; + } + if self.file_has_size { + w.write_uleb128(info.size)?; + } + if self.file_has_md5 { + w.write(&info.md5)?; + } + Ok(()) + }; + write_file(&self.comp_file.0, DirectoryId(0), &self.comp_file.1)?; + for ((file, dir), info) in self.files.iter() { + write_file(file, *dir, info)?; + } + } + + let header_length = (w.len() - header_length_base) as u64; + w.write_udata_at( + header_length_offset, + header_length, + self.format().word_size(), + )?; + + for instruction in &self.instructions { + instruction.write(w, self.address_size())?; + } + + let length = (w.len() - length_base) as u64; + w.write_initial_length_at(length_offset, length, self.format())?; + + Ok(offset) + } +} + +/// A row in the line number table that corresponds to a machine instruction. +#[derive(Debug, Clone, Copy)] +pub struct LineRow { + /// The offset of the instruction from the start address of the sequence. + pub address_offset: u64, + /// The index of an operation within a VLIW instruction. + /// + /// The index of the first operation is 0. + /// Set to 0 for non-VLIW instructions. + pub op_index: u64, + + /// The source file corresponding to the instruction. + pub file: FileId, + /// The line number within the source file. + /// + /// Lines are numbered beginning at 1. Set to 0 if there is no source line. + pub line: u64, + /// The column number within the source line. + /// + /// Columns are numbered beginning at 1. Set to 0 for the "left edge" of the line. + pub column: u64, + /// An additional discriminator used to distinguish between source locations. + /// This value is assigned arbitrarily by the DWARF producer. + pub discriminator: u64, + + /// Set to true if the instruction is a recommended breakpoint for a statement. + pub is_statement: bool, + /// Set to true if the instruction is the beginning of a basic block. + pub basic_block: bool, + /// Set to true if the instruction is a recommended breakpoint at the entry of a + /// function. + pub prologue_end: bool, + /// Set to true if the instruction is a recommended breakpoint prior to the exit of + /// a function. + pub epilogue_begin: bool, + + /// The instruction set architecture of the instruction. + /// + /// Set to 0 for the default ISA. Other values are defined by the architecture ABI. + pub isa: u64, +} + +impl LineRow { + /// Return the initial state as specified in the DWARF standard. + fn initial_state(line_encoding: LineEncoding) -> Self { + LineRow { + address_offset: 0, + op_index: 0, + + file: FileId::initial_state(), + line: 1, + column: 0, + discriminator: 0, + + is_statement: line_encoding.default_is_stmt, + basic_block: false, + prologue_end: false, + epilogue_begin: false, + + isa: 0, + } + } +} + +/// An instruction in a line number program. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum LineInstruction { + // Special opcodes + Special(u8), + + // Standard opcodes + Copy, + AdvancePc(u64), + AdvanceLine(i64), + SetFile(FileId), + SetColumn(u64), + NegateStatement, + SetBasicBlock, + ConstAddPc, + // DW_LNS_fixed_advance_pc is not supported. + SetPrologueEnd, + SetEpilogueBegin, + SetIsa(u64), + + // Extended opcodes + EndSequence, + // TODO: this doubles the size of this enum. + SetAddress(Address), + // DW_LNE_define_file is not supported. + SetDiscriminator(u64), +} + +impl LineInstruction { + /// Write the line number instruction to the given section. + fn write(self, w: &mut DebugLine, address_size: u8) -> Result<()> { + use self::LineInstruction::*; + match self { + Special(val) => w.write_u8(val)?, + Copy => w.write_u8(constants::DW_LNS_copy.0)?, + AdvancePc(val) => { + w.write_u8(constants::DW_LNS_advance_pc.0)?; + w.write_uleb128(val)?; + } + AdvanceLine(val) => { + w.write_u8(constants::DW_LNS_advance_line.0)?; + w.write_sleb128(val)?; + } + SetFile(val) => { + w.write_u8(constants::DW_LNS_set_file.0)?; + w.write_uleb128(val.raw())?; + } + SetColumn(val) => { + w.write_u8(constants::DW_LNS_set_column.0)?; + w.write_uleb128(val)?; + } + NegateStatement => w.write_u8(constants::DW_LNS_negate_stmt.0)?, + SetBasicBlock => w.write_u8(constants::DW_LNS_set_basic_block.0)?, + ConstAddPc => w.write_u8(constants::DW_LNS_const_add_pc.0)?, + SetPrologueEnd => w.write_u8(constants::DW_LNS_set_prologue_end.0)?, + SetEpilogueBegin => w.write_u8(constants::DW_LNS_set_epilogue_begin.0)?, + SetIsa(val) => { + w.write_u8(constants::DW_LNS_set_isa.0)?; + w.write_uleb128(val)?; + } + EndSequence => { + w.write_u8(0)?; + w.write_uleb128(1)?; + w.write_u8(constants::DW_LNE_end_sequence.0)?; + } + SetAddress(address) => { + w.write_u8(0)?; + w.write_uleb128(1 + u64::from(address_size))?; + w.write_u8(constants::DW_LNE_set_address.0)?; + w.write_address(address, address_size)?; + } + SetDiscriminator(val) => { + let mut bytes = [0u8; 10]; + // bytes is long enough so this will never fail. + let len = leb128::write::unsigned(&mut { &mut bytes[..] }, val).unwrap(); + w.write_u8(0)?; + w.write_uleb128(1 + len as u64)?; + w.write_u8(constants::DW_LNE_set_discriminator.0)?; + w.write(&bytes[..len])?; + } + } + Ok(()) + } +} + +/// A string value for use in defining paths in line number programs. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum LineString { + /// A slice of bytes representing a string. Must not include null bytes. + /// Not guaranteed to be UTF-8 or anything like that. + String(Vec), + + /// A reference to a string in the `.debug_str` section. + StringRef(StringId), + + /// A reference to a string in the `.debug_line_str` section. + LineStringRef(LineStringId), +} + +impl LineString { + /// Create a `LineString` using the normal form for the given encoding. + pub fn new(val: T, encoding: Encoding, line_strings: &mut LineStringTable) -> Self + where + T: Into>, + { + let val = val.into(); + if encoding.version <= 4 { + LineString::String(val) + } else { + LineString::LineStringRef(line_strings.add(val)) + } + } + + fn form(&self) -> constants::DwForm { + match *self { + LineString::String(..) => constants::DW_FORM_string, + LineString::StringRef(..) => constants::DW_FORM_strp, + LineString::LineStringRef(..) => constants::DW_FORM_line_strp, + } + } + + fn write( + &self, + w: &mut DebugLine, + form: constants::DwForm, + encoding: Encoding, + debug_line_str_offsets: &DebugLineStrOffsets, + debug_str_offsets: &DebugStrOffsets, + ) -> Result<()> { + if form != self.form() { + return Err(Error::LineStringFormMismatch); + } + + match *self { + LineString::String(ref val) => { + if encoding.version <= 4 { + debug_assert!(!val.is_empty()); + } + w.write(val)?; + w.write_u8(0)?; + } + LineString::StringRef(val) => { + if encoding.version < 5 { + return Err(Error::NeedVersion(5)); + } + w.write_offset( + debug_str_offsets.get(val).0, + SectionId::DebugStr, + encoding.format.word_size(), + )?; + } + LineString::LineStringRef(val) => { + if encoding.version < 5 { + return Err(Error::NeedVersion(5)); + } + w.write_offset( + debug_line_str_offsets.get(val).0, + SectionId::DebugLineStr, + encoding.format.word_size(), + )?; + } + } + Ok(()) + } +} + +/// An identifier for a directory in a `LineProgram`. +/// +/// Defaults to the working directory of the compilation unit. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct DirectoryId(usize); + +// Force FileId access via the methods. +mod id { + /// An identifier for a file in a `LineProgram`. + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct FileId(usize); + + impl FileId { + /// Create a FileId given an index into `LineProgram::files`. + pub(crate) fn new(index: usize) -> Self { + FileId(index + 1) + } + + /// The index of the file in `LineProgram::files`. + pub(super) fn index(self) -> Option { + if self.0 == 0 { + None + } else { + Some(self.0 - 1) + } + } + + /// The initial state of the file register. + pub(super) fn initial_state() -> Self { + FileId(1) + } + + /// The raw value used when writing. + pub(crate) fn raw(self) -> u64 { + self.0 as u64 + } + + /// The id for file index 0 in DWARF version 5. + /// Only used when converting. + // Used for tests only. + #[allow(unused)] + pub(super) fn zero() -> Self { + FileId(0) + } + } +} +pub use self::id::*; + +/// Extra information for file in a `LineProgram`. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub struct FileInfo { + /// The implementation defined timestamp of the last modification of the file, + /// or 0 if not available. + pub timestamp: u64, + + /// The size of the file in bytes, or 0 if not available. + pub size: u64, + + /// A 16-byte MD5 digest of the file contents. + /// + /// Only used if version >= 5 and `LineProgram::file_has_md5` is `true`. + pub md5: [u8; 16], +} + +define_section!( + DebugLine, + DebugLineOffset, + "A writable `.debug_line` section." +); + +#[cfg(feature = "read")] +mod convert { + use super::*; + use crate::read::{self, Reader}; + use crate::write::{self, ConvertError, ConvertResult}; + + impl LineProgram { + /// Create a line number program by reading the data from the given program. + /// + /// Return the program and a mapping from file index to `FileId`. + pub fn from>( + mut from_program: read::IncompleteLineProgram, + dwarf: &read::Dwarf, + line_strings: &mut write::LineStringTable, + strings: &mut write::StringTable, + convert_address: &dyn Fn(u64) -> Option
, + ) -> ConvertResult<(LineProgram, Vec)> { + // Create mappings in case the source has duplicate files or directories. + let mut dirs = Vec::new(); + let mut files = Vec::new(); + + let mut program = { + let from_header = from_program.header(); + let encoding = from_header.encoding(); + + let comp_dir = match from_header.directory(0) { + Some(comp_dir) => LineString::from(comp_dir, dwarf, line_strings, strings)?, + None => LineString::new(&[][..], encoding, line_strings), + }; + + let (comp_name, comp_file_info) = match from_header.file(0) { + Some(comp_file) => { + if comp_file.directory_index() != 0 { + return Err(ConvertError::InvalidDirectoryIndex); + } + ( + LineString::from(comp_file.path_name(), dwarf, line_strings, strings)?, + Some(FileInfo { + timestamp: comp_file.timestamp(), + size: comp_file.size(), + md5: *comp_file.md5(), + }), + ) + } + None => (LineString::new(&[][..], encoding, line_strings), None), + }; + + if from_header.line_base() > 0 { + return Err(ConvertError::InvalidLineBase); + } + let mut program = LineProgram::new( + encoding, + from_header.line_encoding(), + comp_dir, + comp_name, + comp_file_info, + ); + + let file_skip; + if from_header.version() <= 4 { + // The first directory is implicit. + dirs.push(DirectoryId(0)); + // A file index of 0 is invalid for version <= 4, but putting + // something there makes the indexing easier. + file_skip = 0; + files.push(FileId::zero()); + } else { + // We don't add the first file to `files`, but still allow + // it to be referenced from converted instructions. + file_skip = 1; + files.push(FileId::zero()); + } + + for from_dir in from_header.include_directories() { + let from_dir = + LineString::from(from_dir.clone(), dwarf, line_strings, strings)?; + dirs.push(program.add_directory(from_dir)); + } + + program.file_has_timestamp = from_header.file_has_timestamp(); + program.file_has_size = from_header.file_has_size(); + program.file_has_md5 = from_header.file_has_md5(); + for from_file in from_header.file_names().iter().skip(file_skip) { + let from_name = + LineString::from(from_file.path_name(), dwarf, line_strings, strings)?; + let from_dir = from_file.directory_index(); + if from_dir >= dirs.len() as u64 { + return Err(ConvertError::InvalidDirectoryIndex); + } + let from_dir = dirs[from_dir as usize]; + let from_info = Some(FileInfo { + timestamp: from_file.timestamp(), + size: from_file.size(), + md5: *from_file.md5(), + }); + files.push(program.add_file(from_name, from_dir, from_info)); + } + + program + }; + + // We can't use the `from_program.rows()` because that wouldn't let + // us preserve address relocations. + let mut from_row = read::LineRow::new(from_program.header()); + let mut instructions = from_program.header().instructions(); + let mut address = None; + while let Some(instruction) = instructions.next_instruction(from_program.header())? { + match instruction { + read::LineInstruction::SetAddress(val) => { + if program.in_sequence() { + return Err(ConvertError::UnsupportedLineInstruction); + } + match convert_address(val) { + Some(val) => address = Some(val), + None => return Err(ConvertError::InvalidAddress), + } + from_row.execute(read::LineInstruction::SetAddress(0), &mut from_program); + } + read::LineInstruction::DefineFile(_) => { + return Err(ConvertError::UnsupportedLineInstruction); + } + _ => { + if from_row.execute(instruction, &mut from_program) { + if !program.in_sequence() { + program.begin_sequence(address); + address = None; + } + if from_row.end_sequence() { + program.end_sequence(from_row.address()); + } else { + program.row().address_offset = from_row.address(); + program.row().op_index = from_row.op_index(); + program.row().file = { + let file = from_row.file_index(); + if file >= files.len() as u64 { + return Err(ConvertError::InvalidFileIndex); + } + if file == 0 && program.version() <= 4 { + return Err(ConvertError::InvalidFileIndex); + } + files[file as usize] + }; + program.row().line = match from_row.line() { + Some(line) => line.get(), + None => 0, + }; + program.row().column = match from_row.column() { + read::ColumnType::LeftEdge => 0, + read::ColumnType::Column(val) => val.get(), + }; + program.row().discriminator = from_row.discriminator(); + program.row().is_statement = from_row.is_stmt(); + program.row().basic_block = from_row.basic_block(); + program.row().prologue_end = from_row.prologue_end(); + program.row().epilogue_begin = from_row.epilogue_begin(); + program.row().isa = from_row.isa(); + program.generate_row(); + } + from_row.reset(from_program.header()); + } + } + }; + } + Ok((program, files)) + } + } + + impl LineString { + fn from>( + from_attr: read::AttributeValue, + dwarf: &read::Dwarf, + line_strings: &mut write::LineStringTable, + strings: &mut write::StringTable, + ) -> ConvertResult { + Ok(match from_attr { + read::AttributeValue::String(r) => LineString::String(r.to_slice()?.to_vec()), + read::AttributeValue::DebugStrRef(offset) => { + let r = dwarf.debug_str.get_str(offset)?; + let id = strings.add(r.to_slice()?); + LineString::StringRef(id) + } + read::AttributeValue::DebugLineStrRef(offset) => { + let r = dwarf.debug_line_str.get_str(offset)?; + let id = line_strings.add(r.to_slice()?); + LineString::LineStringRef(id) + } + _ => return Err(ConvertError::UnsupportedLineStringForm), + }) + } + } +} + +#[cfg(test)] +#[cfg(feature = "read")] +mod tests { + use super::*; + use crate::read; + use crate::write::{DebugLineStr, DebugStr, EndianVec, StringTable}; + use crate::LittleEndian; + + #[test] + fn test_line_program_table() { + let dir1 = LineString::String(b"dir1".to_vec()); + let file1 = LineString::String(b"file1".to_vec()); + let dir2 = LineString::String(b"dir2".to_vec()); + let file2 = LineString::String(b"file2".to_vec()); + + let mut programs = Vec::new(); + for &version in &[2, 3, 4, 5] { + for &address_size in &[4, 8] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + let mut program = LineProgram::new( + encoding, + LineEncoding::default(), + dir1.clone(), + file1.clone(), + None, + ); + + { + assert_eq!(&dir1, program.get_directory(program.default_directory())); + program.file_has_timestamp = true; + program.file_has_size = true; + if encoding.version >= 5 { + program.file_has_md5 = true; + } + + let dir_id = program.add_directory(dir2.clone()); + assert_eq!(&dir2, program.get_directory(dir_id)); + assert_eq!(dir_id, program.add_directory(dir2.clone())); + + let file_info = FileInfo { + timestamp: 1, + size: 2, + md5: if encoding.version >= 5 { + [3; 16] + } else { + [0; 16] + }, + }; + let file_id = program.add_file(file2.clone(), dir_id, Some(file_info)); + assert_eq!((&file2, dir_id), program.get_file(file_id)); + assert_eq!(file_info, *program.get_file_info(file_id)); + + program.get_file_info_mut(file_id).size = 3; + assert_ne!(file_info, *program.get_file_info(file_id)); + assert_eq!(file_id, program.add_file(file2.clone(), dir_id, None)); + assert_ne!(file_info, *program.get_file_info(file_id)); + assert_eq!( + file_id, + program.add_file(file2.clone(), dir_id, Some(file_info)) + ); + assert_eq!(file_info, *program.get_file_info(file_id)); + + programs.push((program, file_id, encoding)); + } + } + } + } + + let debug_line_str_offsets = DebugLineStrOffsets::none(); + let debug_str_offsets = DebugStrOffsets::none(); + let mut debug_line = DebugLine::from(EndianVec::new(LittleEndian)); + let mut debug_line_offsets = Vec::new(); + for (program, _, encoding) in &programs { + debug_line_offsets.push( + program + .write( + &mut debug_line, + *encoding, + &debug_line_str_offsets, + &debug_str_offsets, + ) + .unwrap(), + ); + } + + let read_debug_line = read::DebugLine::new(debug_line.slice(), LittleEndian); + + let convert_address = &|address| Some(Address::Constant(address)); + for ((program, file_id, encoding), offset) in programs.iter().zip(debug_line_offsets.iter()) + { + let read_program = read_debug_line + .program( + *offset, + encoding.address_size, + Some(read::EndianSlice::new(b"dir1", LittleEndian)), + Some(read::EndianSlice::new(b"file1", LittleEndian)), + ) + .unwrap(); + + let dwarf = read::Dwarf::default(); + let mut convert_line_strings = LineStringTable::default(); + let mut convert_strings = StringTable::default(); + let (convert_program, convert_files) = LineProgram::from( + read_program, + &dwarf, + &mut convert_line_strings, + &mut convert_strings, + convert_address, + ) + .unwrap(); + assert_eq!(convert_program.version(), program.version()); + assert_eq!(convert_program.address_size(), program.address_size()); + assert_eq!(convert_program.format(), program.format()); + + let convert_file_id = convert_files[file_id.raw() as usize]; + let (file, dir) = program.get_file(*file_id); + let (convert_file, convert_dir) = convert_program.get_file(convert_file_id); + assert_eq!(file, convert_file); + assert_eq!( + program.get_directory(dir), + convert_program.get_directory(convert_dir) + ); + assert_eq!( + program.get_file_info(*file_id), + convert_program.get_file_info(convert_file_id) + ); + } + } + + #[test] + fn test_line_row() { + let dir1 = &b"dir1"[..]; + let file1 = &b"file1"[..]; + let file2 = &b"file2"[..]; + let convert_address = &|address| Some(Address::Constant(address)); + + let debug_line_str_offsets = DebugLineStrOffsets::none(); + let debug_str_offsets = DebugStrOffsets::none(); + + for &version in &[2, 3, 4, 5] { + for &address_size in &[4, 8] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + let line_base = -5; + let line_range = 14; + let neg_line_base = (-line_base) as u8; + let mut program = LineProgram::new( + encoding, + LineEncoding { + line_base, + line_range, + ..Default::default() + }, + LineString::String(dir1.to_vec()), + LineString::String(file1.to_vec()), + None, + ); + let dir_id = program.default_directory(); + program.add_file(LineString::String(file1.to_vec()), dir_id, None); + let file_id = + program.add_file(LineString::String(file2.to_vec()), dir_id, None); + + // Test sequences. + { + let mut program = program.clone(); + let address = Address::Constant(0x12); + program.begin_sequence(Some(address)); + assert_eq!( + program.instructions, + vec![LineInstruction::SetAddress(address)] + ); + } + + { + let mut program = program.clone(); + program.begin_sequence(None); + assert_eq!(program.instructions, Vec::new()); + } + + { + let mut program = program.clone(); + program.begin_sequence(None); + program.end_sequence(0x1234); + assert_eq!( + program.instructions, + vec![ + LineInstruction::AdvancePc(0x1234), + LineInstruction::EndSequence + ] + ); + } + + // Create a base program. + program.begin_sequence(None); + program.row.line = 0x1000; + program.generate_row(); + let base_row = program.row; + let base_instructions = program.instructions.clone(); + + // Create test cases. + let mut tests = Vec::new(); + + let row = base_row; + tests.push((row, vec![LineInstruction::Copy])); + + let mut row = base_row; + row.line -= u64::from(neg_line_base); + tests.push((row, vec![LineInstruction::Special(OPCODE_BASE)])); + + let mut row = base_row; + row.line += u64::from(line_range) - 1; + row.line -= u64::from(neg_line_base); + tests.push(( + row, + vec![LineInstruction::Special(OPCODE_BASE + line_range - 1)], + )); + + let mut row = base_row; + row.line += u64::from(line_range); + row.line -= u64::from(neg_line_base); + tests.push(( + row, + vec![ + LineInstruction::AdvanceLine(i64::from(line_range - neg_line_base)), + LineInstruction::Copy, + ], + )); + + let mut row = base_row; + row.address_offset = 1; + row.line -= u64::from(neg_line_base); + tests.push(( + row, + vec![LineInstruction::Special(OPCODE_BASE + line_range)], + )); + + let op_range = (255 - OPCODE_BASE) / line_range; + let mut row = base_row; + row.address_offset = u64::from(op_range); + row.line -= u64::from(neg_line_base); + tests.push(( + row, + vec![LineInstruction::Special( + OPCODE_BASE + op_range * line_range, + )], + )); + + let mut row = base_row; + row.address_offset = u64::from(op_range); + row.line += u64::from(255 - OPCODE_BASE - op_range * line_range); + row.line -= u64::from(neg_line_base); + tests.push((row, vec![LineInstruction::Special(255)])); + + let mut row = base_row; + row.address_offset = u64::from(op_range); + row.line += u64::from(255 - OPCODE_BASE - op_range * line_range) + 1; + row.line -= u64::from(neg_line_base); + tests.push(( + row, + vec![LineInstruction::ConstAddPc, LineInstruction::Copy], + )); + + let mut row = base_row; + row.address_offset = u64::from(op_range); + row.line += u64::from(255 - OPCODE_BASE - op_range * line_range) + 2; + row.line -= u64::from(neg_line_base); + tests.push(( + row, + vec![ + LineInstruction::ConstAddPc, + LineInstruction::Special(OPCODE_BASE + 6), + ], + )); + + let mut row = base_row; + row.address_offset = u64::from(op_range) * 2; + row.line += u64::from(255 - OPCODE_BASE - op_range * line_range); + row.line -= u64::from(neg_line_base); + tests.push(( + row, + vec![LineInstruction::ConstAddPc, LineInstruction::Special(255)], + )); + + let mut row = base_row; + row.address_offset = u64::from(op_range) * 2; + row.line += u64::from(255 - OPCODE_BASE - op_range * line_range) + 1; + row.line -= u64::from(neg_line_base); + tests.push(( + row, + vec![ + LineInstruction::AdvancePc(row.address_offset), + LineInstruction::Copy, + ], + )); + + let mut row = base_row; + row.address_offset = u64::from(op_range) * 2; + row.line += u64::from(255 - OPCODE_BASE - op_range * line_range) + 2; + row.line -= u64::from(neg_line_base); + tests.push(( + row, + vec![ + LineInstruction::AdvancePc(row.address_offset), + LineInstruction::Special(OPCODE_BASE + 6), + ], + )); + + let mut row = base_row; + row.address_offset = 0x1234; + tests.push(( + row, + vec![LineInstruction::AdvancePc(0x1234), LineInstruction::Copy], + )); + + let mut row = base_row; + row.line += 0x1234; + tests.push(( + row, + vec![LineInstruction::AdvanceLine(0x1234), LineInstruction::Copy], + )); + + let mut row = base_row; + row.file = file_id; + tests.push(( + row, + vec![LineInstruction::SetFile(file_id), LineInstruction::Copy], + )); + + let mut row = base_row; + row.column = 0x1234; + tests.push(( + row, + vec![LineInstruction::SetColumn(0x1234), LineInstruction::Copy], + )); + + let mut row = base_row; + row.discriminator = 0x1234; + tests.push(( + row, + vec![ + LineInstruction::SetDiscriminator(0x1234), + LineInstruction::Copy, + ], + )); + + let mut row = base_row; + row.is_statement = !row.is_statement; + tests.push(( + row, + vec![LineInstruction::NegateStatement, LineInstruction::Copy], + )); + + let mut row = base_row; + row.basic_block = true; + tests.push(( + row, + vec![LineInstruction::SetBasicBlock, LineInstruction::Copy], + )); + + let mut row = base_row; + row.prologue_end = true; + tests.push(( + row, + vec![LineInstruction::SetPrologueEnd, LineInstruction::Copy], + )); + + let mut row = base_row; + row.epilogue_begin = true; + tests.push(( + row, + vec![LineInstruction::SetEpilogueBegin, LineInstruction::Copy], + )); + + let mut row = base_row; + row.isa = 0x1234; + tests.push(( + row, + vec![LineInstruction::SetIsa(0x1234), LineInstruction::Copy], + )); + + for test in tests { + // Test generate_row(). + let mut program = program.clone(); + program.row = test.0; + program.generate_row(); + assert_eq!( + &program.instructions[base_instructions.len()..], + &test.1[..] + ); + + // Test LineProgram::from(). + let mut debug_line = DebugLine::from(EndianVec::new(LittleEndian)); + let debug_line_offset = program + .write( + &mut debug_line, + encoding, + &debug_line_str_offsets, + &debug_str_offsets, + ) + .unwrap(); + + let read_debug_line = + read::DebugLine::new(debug_line.slice(), LittleEndian); + let read_program = read_debug_line + .program( + debug_line_offset, + address_size, + Some(read::EndianSlice::new(dir1, LittleEndian)), + Some(read::EndianSlice::new(file1, LittleEndian)), + ) + .unwrap(); + + let dwarf = read::Dwarf::default(); + let mut convert_line_strings = LineStringTable::default(); + let mut convert_strings = StringTable::default(); + let (convert_program, _convert_files) = LineProgram::from( + read_program, + &dwarf, + &mut convert_line_strings, + &mut convert_strings, + convert_address, + ) + .unwrap(); + assert_eq!( + &convert_program.instructions[base_instructions.len()..], + &test.1[..] + ); + } + } + } + } + } + + #[test] + fn test_line_instruction() { + let dir1 = &b"dir1"[..]; + let file1 = &b"file1"[..]; + + let debug_line_str_offsets = DebugLineStrOffsets::none(); + let debug_str_offsets = DebugStrOffsets::none(); + + for &version in &[2, 3, 4, 5] { + for &address_size in &[4, 8] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + let mut program = LineProgram::new( + encoding, + LineEncoding::default(), + LineString::String(dir1.to_vec()), + LineString::String(file1.to_vec()), + None, + ); + let dir_id = program.default_directory(); + let file_id = + program.add_file(LineString::String(file1.to_vec()), dir_id, None); + + for (inst, expect_inst) in &[ + ( + LineInstruction::Special(OPCODE_BASE), + read::LineInstruction::Special(OPCODE_BASE), + ), + ( + LineInstruction::Special(255), + read::LineInstruction::Special(255), + ), + (LineInstruction::Copy, read::LineInstruction::Copy), + ( + LineInstruction::AdvancePc(0x12), + read::LineInstruction::AdvancePc(0x12), + ), + ( + LineInstruction::AdvanceLine(0x12), + read::LineInstruction::AdvanceLine(0x12), + ), + ( + LineInstruction::SetFile(file_id), + read::LineInstruction::SetFile(file_id.raw()), + ), + ( + LineInstruction::SetColumn(0x12), + read::LineInstruction::SetColumn(0x12), + ), + ( + LineInstruction::NegateStatement, + read::LineInstruction::NegateStatement, + ), + ( + LineInstruction::SetBasicBlock, + read::LineInstruction::SetBasicBlock, + ), + ( + LineInstruction::ConstAddPc, + read::LineInstruction::ConstAddPc, + ), + ( + LineInstruction::SetPrologueEnd, + read::LineInstruction::SetPrologueEnd, + ), + ( + LineInstruction::SetEpilogueBegin, + read::LineInstruction::SetEpilogueBegin, + ), + ( + LineInstruction::SetIsa(0x12), + read::LineInstruction::SetIsa(0x12), + ), + ( + LineInstruction::EndSequence, + read::LineInstruction::EndSequence, + ), + ( + LineInstruction::SetAddress(Address::Constant(0x12)), + read::LineInstruction::SetAddress(0x12), + ), + ( + LineInstruction::SetDiscriminator(0x12), + read::LineInstruction::SetDiscriminator(0x12), + ), + ][..] + { + let mut program = program.clone(); + program.instructions.push(*inst); + + let mut debug_line = DebugLine::from(EndianVec::new(LittleEndian)); + let debug_line_offset = program + .write( + &mut debug_line, + encoding, + &debug_line_str_offsets, + &debug_str_offsets, + ) + .unwrap(); + + let read_debug_line = + read::DebugLine::new(debug_line.slice(), LittleEndian); + let read_program = read_debug_line + .program( + debug_line_offset, + address_size, + Some(read::EndianSlice::new(dir1, LittleEndian)), + Some(read::EndianSlice::new(file1, LittleEndian)), + ) + .unwrap(); + let read_header = read_program.header(); + let mut read_insts = read_header.instructions(); + assert_eq!( + *expect_inst, + read_insts.next_instruction(read_header).unwrap().unwrap() + ); + assert_eq!(None, read_insts.next_instruction(read_header).unwrap()); + } + } + } + } + } + + // Test that the address/line advance is correct. We don't test for optimality. + #[test] + fn test_advance() { + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 8, + }; + + let dir1 = &b"dir1"[..]; + let file1 = &b"file1"[..]; + + let addresses = 0..50; + let lines = -10..25i64; + + let debug_line_str_offsets = DebugLineStrOffsets::none(); + let debug_str_offsets = DebugStrOffsets::none(); + + for minimum_instruction_length in [1, 4] { + for maximum_operations_per_instruction in [1, 3] { + for line_base in [-5, 0] { + for line_range in [10, 20] { + let line_encoding = LineEncoding { + minimum_instruction_length, + maximum_operations_per_instruction, + line_base, + line_range, + default_is_stmt: true, + }; + let mut program = LineProgram::new( + encoding, + line_encoding, + LineString::String(dir1.to_vec()), + LineString::String(file1.to_vec()), + None, + ); + for address_advance in addresses.clone() { + program.begin_sequence(Some(Address::Constant(0x1000))); + program.row().line = 0x10000; + program.generate_row(); + for line_advance in lines.clone() { + { + let row = program.row(); + row.address_offset += + address_advance * u64::from(minimum_instruction_length); + row.line = row.line.wrapping_add(line_advance as u64); + } + program.generate_row(); + } + let address_offset = program.row().address_offset + + u64::from(minimum_instruction_length); + program.end_sequence(address_offset); + } + + let mut debug_line = DebugLine::from(EndianVec::new(LittleEndian)); + let debug_line_offset = program + .write( + &mut debug_line, + encoding, + &debug_line_str_offsets, + &debug_str_offsets, + ) + .unwrap(); + + let read_debug_line = + read::DebugLine::new(debug_line.slice(), LittleEndian); + let read_program = read_debug_line + .program( + debug_line_offset, + 8, + Some(read::EndianSlice::new(dir1, LittleEndian)), + Some(read::EndianSlice::new(file1, LittleEndian)), + ) + .unwrap(); + + let mut rows = read_program.rows(); + for address_advance in addresses.clone() { + let mut address; + let mut line; + { + let row = rows.next_row().unwrap().unwrap().1; + address = row.address(); + line = row.line().unwrap().get(); + } + assert_eq!(address, 0x1000); + assert_eq!(line, 0x10000); + for line_advance in lines.clone() { + let row = rows.next_row().unwrap().unwrap().1; + assert_eq!( + row.address() - address, + address_advance * u64::from(minimum_instruction_length) + ); + assert_eq!( + (row.line().unwrap().get() as i64) - (line as i64), + line_advance + ); + address = row.address(); + line = row.line().unwrap().get(); + } + let row = rows.next_row().unwrap().unwrap().1; + assert!(row.end_sequence()); + } + } + } + } + } + } + + #[test] + fn test_line_string() { + let version = 5; + + let file = b"file1"; + + let mut strings = StringTable::default(); + let string_id = strings.add("file2"); + let mut debug_str = DebugStr::from(EndianVec::new(LittleEndian)); + let debug_str_offsets = strings.write(&mut debug_str).unwrap(); + + let mut line_strings = LineStringTable::default(); + let line_string_id = line_strings.add("file3"); + let mut debug_line_str = DebugLineStr::from(EndianVec::new(LittleEndian)); + let debug_line_str_offsets = line_strings.write(&mut debug_line_str).unwrap(); + + for &address_size in &[4, 8] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + + for (file, expect_file) in [ + ( + LineString::String(file.to_vec()), + read::AttributeValue::String(read::EndianSlice::new(file, LittleEndian)), + ), + ( + LineString::StringRef(string_id), + read::AttributeValue::DebugStrRef(debug_str_offsets.get(string_id)), + ), + ( + LineString::LineStringRef(line_string_id), + read::AttributeValue::DebugLineStrRef( + debug_line_str_offsets.get(line_string_id), + ), + ), + ] { + let program = LineProgram::new( + encoding, + LineEncoding::default(), + LineString::String(b"dir".to_vec()), + file, + None, + ); + + let mut debug_line = DebugLine::from(EndianVec::new(LittleEndian)); + let debug_line_offset = program + .write( + &mut debug_line, + encoding, + &debug_line_str_offsets, + &debug_str_offsets, + ) + .unwrap(); + + let read_debug_line = read::DebugLine::new(debug_line.slice(), LittleEndian); + let read_program = read_debug_line + .program(debug_line_offset, address_size, None, None) + .unwrap(); + let read_header = read_program.header(); + assert_eq!(read_header.file(0).unwrap().path_name(), expect_file); + } + } + } + } + + #[test] + fn test_missing_comp_dir() { + let debug_line_str_offsets = DebugLineStrOffsets::none(); + let debug_str_offsets = DebugStrOffsets::none(); + + for &version in &[2, 3, 4, 5] { + for &address_size in &[4, 8] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + let program = LineProgram::new( + encoding, + LineEncoding::default(), + LineString::String(Vec::new()), + LineString::String(Vec::new()), + None, + ); + + let mut debug_line = DebugLine::from(EndianVec::new(LittleEndian)); + let debug_line_offset = program + .write( + &mut debug_line, + encoding, + &debug_line_str_offsets, + &debug_str_offsets, + ) + .unwrap(); + + let read_debug_line = read::DebugLine::new(debug_line.slice(), LittleEndian); + let read_program = read_debug_line + .program( + debug_line_offset, + address_size, + // Testing missing comp_dir/comp_name. + None, + None, + ) + .unwrap(); + + let dwarf = read::Dwarf::default(); + let mut convert_line_strings = LineStringTable::default(); + let mut convert_strings = StringTable::default(); + let convert_address = &|address| Some(Address::Constant(address)); + LineProgram::from( + read_program, + &dwarf, + &mut convert_line_strings, + &mut convert_strings, + convert_address, + ) + .unwrap(); + } + } + } + } +} diff --git a/third_party/rust/gimli/src/write/loc.rs b/third_party/rust/gimli/src/write/loc.rs new file mode 100644 index 000000000000..bd1800876054 --- /dev/null +++ b/third_party/rust/gimli/src/write/loc.rs @@ -0,0 +1,550 @@ +use alloc::vec::Vec; +use indexmap::IndexSet; +use std::ops::{Deref, DerefMut}; + +use crate::common::{Encoding, LocationListsOffset, SectionId}; +use crate::write::{ + Address, BaseId, DebugInfoReference, Error, Expression, Result, Section, Sections, UnitOffsets, + Writer, +}; + +define_section!( + DebugLoc, + LocationListsOffset, + "A writable `.debug_loc` section." +); +define_section!( + DebugLocLists, + LocationListsOffset, + "A writable `.debug_loclists` section." +); + +define_offsets!( + LocationListOffsets: LocationListId => LocationListsOffset, + "The section offsets of a series of location lists within the `.debug_loc` or `.debug_loclists` sections." +); + +define_id!( + LocationListId, + "An identifier for a location list in a `LocationListTable`." +); + +/// A table of location lists that will be stored in a `.debug_loc` or `.debug_loclists` section. +#[derive(Debug, Default)] +pub struct LocationListTable { + base_id: BaseId, + locations: IndexSet, +} + +impl LocationListTable { + /// Add a location list to the table. + pub fn add(&mut self, loc_list: LocationList) -> LocationListId { + let (index, _) = self.locations.insert_full(loc_list); + LocationListId::new(self.base_id, index) + } + + /// Write the location list table to the appropriate section for the given DWARF version. + pub(crate) fn write( + &self, + sections: &mut Sections, + encoding: Encoding, + unit_offsets: Option<&UnitOffsets>, + ) -> Result { + if self.locations.is_empty() { + return Ok(LocationListOffsets::none()); + } + + match encoding.version { + 2..=4 => self.write_loc( + &mut sections.debug_loc, + &mut sections.debug_loc_refs, + encoding, + unit_offsets, + ), + 5 => self.write_loclists( + &mut sections.debug_loclists, + &mut sections.debug_loclists_refs, + encoding, + unit_offsets, + ), + _ => Err(Error::UnsupportedVersion(encoding.version)), + } + } + + /// Write the location list table to the `.debug_loc` section. + fn write_loc( + &self, + w: &mut DebugLoc, + refs: &mut Vec, + encoding: Encoding, + unit_offsets: Option<&UnitOffsets>, + ) -> Result { + let address_size = encoding.address_size; + let mut offsets = Vec::new(); + for loc_list in self.locations.iter() { + offsets.push(w.offset()); + for loc in &loc_list.0 { + // Note that we must ensure none of the ranges have both begin == 0 and end == 0. + // We do this by ensuring that begin != end, which is a bit more restrictive + // than required, but still seems reasonable. + match *loc { + Location::BaseAddress { address } => { + let marker = !0 >> (64 - address_size * 8); + w.write_udata(marker, address_size)?; + w.write_address(address, address_size)?; + } + Location::OffsetPair { + begin, + end, + ref data, + } => { + if begin == end { + return Err(Error::InvalidRange); + } + w.write_udata(begin, address_size)?; + w.write_udata(end, address_size)?; + write_expression(&mut w.0, refs, encoding, unit_offsets, data)?; + } + Location::StartEnd { + begin, + end, + ref data, + } => { + if begin == end { + return Err(Error::InvalidRange); + } + w.write_address(begin, address_size)?; + w.write_address(end, address_size)?; + write_expression(&mut w.0, refs, encoding, unit_offsets, data)?; + } + Location::StartLength { + begin, + length, + ref data, + } => { + let end = match begin { + Address::Constant(begin) => Address::Constant(begin + length), + Address::Symbol { symbol, addend } => Address::Symbol { + symbol, + addend: addend + length as i64, + }, + }; + if begin == end { + return Err(Error::InvalidRange); + } + w.write_address(begin, address_size)?; + w.write_address(end, address_size)?; + write_expression(&mut w.0, refs, encoding, unit_offsets, data)?; + } + Location::DefaultLocation { .. } => { + return Err(Error::InvalidRange); + } + } + } + w.write_udata(0, address_size)?; + w.write_udata(0, address_size)?; + } + Ok(LocationListOffsets { + base_id: self.base_id, + offsets, + }) + } + + /// Write the location list table to the `.debug_loclists` section. + fn write_loclists( + &self, + w: &mut DebugLocLists, + refs: &mut Vec, + encoding: Encoding, + unit_offsets: Option<&UnitOffsets>, + ) -> Result { + let mut offsets = Vec::new(); + + if encoding.version != 5 { + return Err(Error::NeedVersion(5)); + } + + let length_offset = w.write_initial_length(encoding.format)?; + let length_base = w.len(); + + w.write_u16(encoding.version)?; + w.write_u8(encoding.address_size)?; + w.write_u8(0)?; // segment_selector_size + w.write_u32(0)?; // offset_entry_count (when set to zero DW_FORM_rnglistx can't be used, see section 7.28) + // FIXME implement DW_FORM_rnglistx writing and implement the offset entry list + + for loc_list in self.locations.iter() { + offsets.push(w.offset()); + for loc in &loc_list.0 { + match *loc { + Location::BaseAddress { address } => { + w.write_u8(crate::constants::DW_LLE_base_address.0)?; + w.write_address(address, encoding.address_size)?; + } + Location::OffsetPair { + begin, + end, + ref data, + } => { + w.write_u8(crate::constants::DW_LLE_offset_pair.0)?; + w.write_uleb128(begin)?; + w.write_uleb128(end)?; + write_expression(&mut w.0, refs, encoding, unit_offsets, data)?; + } + Location::StartEnd { + begin, + end, + ref data, + } => { + w.write_u8(crate::constants::DW_LLE_start_end.0)?; + w.write_address(begin, encoding.address_size)?; + w.write_address(end, encoding.address_size)?; + write_expression(&mut w.0, refs, encoding, unit_offsets, data)?; + } + Location::StartLength { + begin, + length, + ref data, + } => { + w.write_u8(crate::constants::DW_LLE_start_length.0)?; + w.write_address(begin, encoding.address_size)?; + w.write_uleb128(length)?; + write_expression(&mut w.0, refs, encoding, unit_offsets, data)?; + } + Location::DefaultLocation { ref data } => { + w.write_u8(crate::constants::DW_LLE_default_location.0)?; + write_expression(&mut w.0, refs, encoding, unit_offsets, data)?; + } + } + } + + w.write_u8(crate::constants::DW_LLE_end_of_list.0)?; + } + + let length = (w.len() - length_base) as u64; + w.write_initial_length_at(length_offset, length, encoding.format)?; + + Ok(LocationListOffsets { + base_id: self.base_id, + offsets, + }) + } +} + +/// A locations list that will be stored in a `.debug_loc` or `.debug_loclists` section. +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub struct LocationList(pub Vec); + +/// A single location. +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub enum Location { + /// DW_LLE_base_address + BaseAddress { + /// Base address. + address: Address, + }, + /// DW_LLE_offset_pair + OffsetPair { + /// Start of range relative to base address. + begin: u64, + /// End of range relative to base address. + end: u64, + /// Location description. + data: Expression, + }, + /// DW_LLE_start_end + StartEnd { + /// Start of range. + begin: Address, + /// End of range. + end: Address, + /// Location description. + data: Expression, + }, + /// DW_LLE_start_length + StartLength { + /// Start of range. + begin: Address, + /// Length of range. + length: u64, + /// Location description. + data: Expression, + }, + /// DW_LLE_default_location + DefaultLocation { + /// Location description. + data: Expression, + }, +} + +fn write_expression( + w: &mut W, + refs: &mut Vec, + encoding: Encoding, + unit_offsets: Option<&UnitOffsets>, + val: &Expression, +) -> Result<()> { + let size = val.size(encoding, unit_offsets) as u64; + if encoding.version <= 4 { + w.write_udata(size, 2)?; + } else { + w.write_uleb128(size)?; + } + val.write(w, Some(refs), encoding, unit_offsets)?; + Ok(()) +} + +#[cfg(feature = "read")] +mod convert { + use super::*; + + use crate::read::{self, Reader}; + use crate::write::{ConvertError, ConvertResult, ConvertUnitContext}; + + impl LocationList { + /// Create a location list by reading the data from the give location list iter. + pub(crate) fn from>( + mut from: read::RawLocListIter, + context: &ConvertUnitContext<'_, R>, + ) -> ConvertResult { + let mut have_base_address = context.base_address != Address::Constant(0); + let convert_address = + |x| (context.convert_address)(x).ok_or(ConvertError::InvalidAddress); + let convert_expression = |x| { + Expression::from( + x, + context.unit.encoding(), + Some(context.dwarf), + Some(context.unit), + Some(context.entry_ids), + context.convert_address, + ) + }; + let mut loc_list = Vec::new(); + while let Some(from_loc) = from.next()? { + let loc = match from_loc { + read::RawLocListEntry::AddressOrOffsetPair { begin, end, data } => { + // These were parsed as addresses, even if they are offsets. + let begin = convert_address(begin)?; + let end = convert_address(end)?; + let data = convert_expression(data)?; + match (begin, end) { + (Address::Constant(begin_offset), Address::Constant(end_offset)) => { + if have_base_address { + Location::OffsetPair { + begin: begin_offset, + end: end_offset, + data, + } + } else { + Location::StartEnd { begin, end, data } + } + } + _ => { + if have_base_address { + // At least one of begin/end is an address, but we also have + // a base address. Adding addresses is undefined. + return Err(ConvertError::InvalidRangeRelativeAddress); + } + Location::StartEnd { begin, end, data } + } + } + } + read::RawLocListEntry::BaseAddress { addr } => { + have_base_address = true; + let address = convert_address(addr)?; + Location::BaseAddress { address } + } + read::RawLocListEntry::BaseAddressx { addr } => { + have_base_address = true; + let address = convert_address(context.dwarf.address(context.unit, addr)?)?; + Location::BaseAddress { address } + } + read::RawLocListEntry::StartxEndx { begin, end, data } => { + let begin = convert_address(context.dwarf.address(context.unit, begin)?)?; + let end = convert_address(context.dwarf.address(context.unit, end)?)?; + let data = convert_expression(data)?; + Location::StartEnd { begin, end, data } + } + read::RawLocListEntry::StartxLength { + begin, + length, + data, + } => { + let begin = convert_address(context.dwarf.address(context.unit, begin)?)?; + let data = convert_expression(data)?; + Location::StartLength { + begin, + length, + data, + } + } + read::RawLocListEntry::OffsetPair { begin, end, data } => { + let data = convert_expression(data)?; + Location::OffsetPair { begin, end, data } + } + read::RawLocListEntry::StartEnd { begin, end, data } => { + let begin = convert_address(begin)?; + let end = convert_address(end)?; + let data = convert_expression(data)?; + Location::StartEnd { begin, end, data } + } + read::RawLocListEntry::StartLength { + begin, + length, + data, + } => { + let begin = convert_address(begin)?; + let data = convert_expression(data)?; + Location::StartLength { + begin, + length, + data, + } + } + read::RawLocListEntry::DefaultLocation { data } => { + let data = convert_expression(data)?; + Location::DefaultLocation { data } + } + }; + // In some cases, existing data may contain begin == end, filtering + // these out. + match loc { + Location::StartLength { length: 0, .. } => continue, + Location::StartEnd { begin, end, .. } if begin == end => continue, + Location::OffsetPair { begin, end, .. } if begin == end => continue, + _ => (), + } + loc_list.push(loc); + } + Ok(LocationList(loc_list)) + } + } +} + +#[cfg(test)] +#[cfg(feature = "read")] +mod tests { + use super::*; + use crate::common::{ + DebugAbbrevOffset, DebugAddrBase, DebugInfoOffset, DebugLocListsBase, DebugRngListsBase, + DebugStrOffsetsBase, Format, + }; + use crate::read; + use crate::write::{ + ConvertUnitContext, EndianVec, LineStringTable, RangeListTable, StringTable, + }; + use crate::LittleEndian; + use std::collections::HashMap; + use std::sync::Arc; + + #[test] + fn test_loc_list() { + let mut line_strings = LineStringTable::default(); + let mut strings = StringTable::default(); + let mut expression = Expression::new(); + expression.op_constu(0); + + for &version in &[2, 3, 4, 5] { + for &address_size in &[4, 8] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + + let mut loc_list = LocationList(vec![ + Location::StartLength { + begin: Address::Constant(6666), + length: 7777, + data: expression.clone(), + }, + Location::StartEnd { + begin: Address::Constant(4444), + end: Address::Constant(5555), + data: expression.clone(), + }, + Location::BaseAddress { + address: Address::Constant(1111), + }, + Location::OffsetPair { + begin: 2222, + end: 3333, + data: expression.clone(), + }, + ]); + if version >= 5 { + loc_list.0.push(Location::DefaultLocation { + data: expression.clone(), + }); + } + + let mut locations = LocationListTable::default(); + let loc_list_id = locations.add(loc_list.clone()); + + let mut sections = Sections::new(EndianVec::new(LittleEndian)); + let loc_list_offsets = locations.write(&mut sections, encoding, None).unwrap(); + assert!(sections.debug_loc_refs.is_empty()); + assert!(sections.debug_loclists_refs.is_empty()); + + let read_debug_loc = + read::DebugLoc::new(sections.debug_loc.slice(), LittleEndian); + let read_debug_loclists = + read::DebugLocLists::new(sections.debug_loclists.slice(), LittleEndian); + let read_loc = read::LocationLists::new(read_debug_loc, read_debug_loclists); + let offset = loc_list_offsets.get(loc_list_id); + let read_loc_list = read_loc.raw_locations(offset, encoding).unwrap(); + + let dwarf = read::Dwarf { + locations: read_loc, + ..Default::default() + }; + let unit = read::Unit { + header: read::UnitHeader::new( + encoding, + 0, + read::UnitType::Compilation, + DebugAbbrevOffset(0), + DebugInfoOffset(0).into(), + read::EndianSlice::default(), + ), + abbreviations: Arc::new(read::Abbreviations::default()), + name: None, + comp_dir: None, + low_pc: 0, + str_offsets_base: DebugStrOffsetsBase(0), + addr_base: DebugAddrBase(0), + loclists_base: DebugLocListsBase(0), + rnglists_base: DebugRngListsBase(0), + line_program: None, + dwo_id: None, + }; + let context = ConvertUnitContext { + dwarf: &dwarf, + unit: &unit, + line_strings: &mut line_strings, + strings: &mut strings, + ranges: &mut RangeListTable::default(), + locations: &mut locations, + convert_address: &|address| Some(Address::Constant(address)), + base_address: Address::Constant(0), + line_program_offset: None, + line_program_files: Vec::new(), + entry_ids: &HashMap::new(), + }; + let convert_loc_list = LocationList::from(read_loc_list, &context).unwrap(); + + if version <= 4 { + loc_list.0[0] = Location::StartEnd { + begin: Address::Constant(6666), + end: Address::Constant(6666 + 7777), + data: expression.clone(), + }; + } + assert_eq!(loc_list, convert_loc_list); + } + } + } + } +} diff --git a/third_party/rust/gimli/src/write/mod.rs b/third_party/rust/gimli/src/write/mod.rs new file mode 100644 index 000000000000..b3b894e9fa71 --- /dev/null +++ b/third_party/rust/gimli/src/write/mod.rs @@ -0,0 +1,428 @@ +//! Write DWARF debugging information. +//! +//! ## API Structure +//! +//! This module works by building up a representation of the debugging information +//! in memory, and then writing it all at once. It supports two major use cases: +//! +//! * Use the [`DwarfUnit`](./struct.DwarfUnit.html) type when writing DWARF +//! for a single compilation unit. +//! +//! * Use the [`Dwarf`](./struct.Dwarf.html) type when writing DWARF for multiple +//! compilation units. +//! +//! The module also supports reading in DWARF debugging information and writing it out +//! again, possibly after modifying it. Create a [`read::Dwarf`](../read/struct.Dwarf.html) +//! instance, and then use [`Dwarf::from`](./struct.Dwarf.html#method.from) to convert +//! it to a writable instance. +//! +//! ## Example Usage +//! +//! Write a compilation unit containing only the top level DIE. +//! +//! ```rust +//! use gimli::write::{ +//! Address, AttributeValue, DwarfUnit, EndianVec, Error, Range, RangeList, Sections, +//! }; +//! +//! fn example() -> Result<(), Error> { +//! // Choose the encoding parameters. +//! let encoding = gimli::Encoding { +//! format: gimli::Format::Dwarf32, +//! version: 5, +//! address_size: 8, +//! }; +//! // Create a container for a single compilation unit. +//! let mut dwarf = DwarfUnit::new(encoding); +//! // Set a range attribute on the root DIE. +//! let range_list = RangeList(vec![Range::StartLength { +//! begin: Address::Constant(0x100), +//! length: 42, +//! }]); +//! let range_list_id = dwarf.unit.ranges.add(range_list); +//! let root = dwarf.unit.root(); +//! dwarf.unit.get_mut(root).set( +//! gimli::DW_AT_ranges, +//! AttributeValue::RangeListRef(range_list_id), +//! ); +//! // Create a `Vec` for each DWARF section. +//! let mut sections = Sections::new(EndianVec::new(gimli::LittleEndian)); +//! // Finally, write the DWARF data to the sections. +//! dwarf.write(&mut sections)?; +//! sections.for_each(|id, data| { +//! // Here you can add the data to the output object file. +//! Ok(()) +//! }) +//! } +//! # fn main() { +//! # example().unwrap(); +//! # } + +use std::error; +use std::fmt; +use std::result; + +use crate::constants; + +mod endian_vec; +pub use self::endian_vec::*; + +mod writer; +pub use self::writer::*; + +mod relocate; +pub use self::relocate::*; + +#[macro_use] +mod section; +pub use self::section::*; + +macro_rules! define_id { + ($name:ident, $docs:expr) => { + #[doc=$docs] + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct $name { + base_id: BaseId, + index: usize, + } + + impl $name { + #[inline] + fn new(base_id: BaseId, index: usize) -> Self { + $name { base_id, index } + } + } + }; +} + +macro_rules! define_offsets { + ($offsets:ident: $id:ident => $offset:ident, $off_doc:expr) => { + #[doc=$off_doc] + #[derive(Debug)] + pub struct $offsets { + base_id: BaseId, + // We know ids start at 0. + offsets: Vec<$offset>, + } + + impl $offsets { + /// Return an empty list of offsets. + #[inline] + pub fn none() -> Self { + $offsets { + base_id: BaseId::default(), + offsets: Vec::new(), + } + } + + /// Get the offset + /// + /// # Panics + /// + /// Panics if `id` is invalid. + #[inline] + pub fn get(&self, id: $id) -> $offset { + debug_assert_eq!(self.base_id, id.base_id); + self.offsets[id.index] + } + + /// Return the number of offsets. + #[inline] + pub fn count(&self) -> usize { + self.offsets.len() + } + } + }; +} + +mod abbrev; +pub use self::abbrev::*; + +mod cfi; +pub use self::cfi::*; + +mod dwarf; +pub use self::dwarf::*; + +mod line; +pub use self::line::*; + +mod loc; +pub use self::loc::*; + +mod op; +pub use self::op::*; + +mod range; +pub use self::range::*; + +mod str; +pub use self::str::*; + +mod unit; +pub use self::unit::*; + +/// An error that occurred when writing. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Error { + /// The given offset is out of bounds. + OffsetOutOfBounds, + /// The given length is out of bounds. + LengthOutOfBounds, + /// The attribute value is an invalid for writing. + InvalidAttributeValue, + /// The value is too large for the encoding form. + ValueTooLarge, + /// Unsupported word size. + UnsupportedWordSize(u8), + /// Unsupported DWARF version. + UnsupportedVersion(u16), + /// The unit length is too large for the requested DWARF format. + InitialLengthOverflow, + /// The address is invalid. + InvalidAddress, + /// The reference is invalid. + InvalidReference, + /// A requested feature requires a different DWARF version. + NeedVersion(u16), + /// Strings in line number program have mismatched forms. + LineStringFormMismatch, + /// The range is empty or otherwise invalid. + InvalidRange, + /// The line number program encoding is incompatible with the unit encoding. + IncompatibleLineProgramEncoding, + /// Could not encode code offset for a frame instruction. + InvalidFrameCodeOffset(u32), + /// Could not encode data offset for a frame instruction. + InvalidFrameDataOffset(i32), + /// Unsupported eh_frame pointer encoding. + UnsupportedPointerEncoding(constants::DwEhPe), + /// Unsupported reference in CFI expression. + UnsupportedCfiExpressionReference, + /// Unsupported forward reference in expression. + UnsupportedExpressionForwardReference, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> result::Result<(), fmt::Error> { + match *self { + Error::OffsetOutOfBounds => write!(f, "The given offset is out of bounds."), + Error::LengthOutOfBounds => write!(f, "The given length is out of bounds."), + Error::InvalidAttributeValue => { + write!(f, "The attribute value is an invalid for writing.") + } + Error::ValueTooLarge => write!(f, "The value is too large for the encoding form."), + Error::UnsupportedWordSize(size) => write!(f, "Unsupported word size: {}", size), + Error::UnsupportedVersion(version) => { + write!(f, "Unsupported DWARF version: {}", version) + } + Error::InitialLengthOverflow => write!( + f, + "The unit length is too large for the requested DWARF format." + ), + Error::InvalidAddress => write!(f, "The address is invalid."), + Error::InvalidReference => write!(f, "The reference is invalid."), + Error::NeedVersion(version) => write!( + f, + "A requested feature requires a DWARF version {}.", + version + ), + Error::LineStringFormMismatch => { + write!(f, "Strings in line number program have mismatched forms.") + } + Error::InvalidRange => write!(f, "The range is empty or otherwise invalid."), + Error::IncompatibleLineProgramEncoding => write!( + f, + "The line number program encoding is incompatible with the unit encoding." + ), + Error::InvalidFrameCodeOffset(offset) => write!( + f, + "Could not encode code offset ({}) for a frame instruction.", + offset, + ), + Error::InvalidFrameDataOffset(offset) => write!( + f, + "Could not encode data offset ({}) for a frame instruction.", + offset, + ), + Error::UnsupportedPointerEncoding(eh_pe) => { + write!(f, "Unsupported eh_frame pointer encoding ({}).", eh_pe) + } + Error::UnsupportedCfiExpressionReference => { + write!(f, "Unsupported reference in CFI expression.") + } + Error::UnsupportedExpressionForwardReference => { + write!(f, "Unsupported forward reference in expression.") + } + } + } +} + +impl error::Error for Error {} + +/// The result of a write. +pub type Result = result::Result; + +/// An address. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Address { + /// A fixed address that does not require relocation. + Constant(u64), + /// An address that is relative to a symbol which may be relocated. + Symbol { + /// The symbol that the address is relative to. + /// + /// The meaning of this value is decided by the writer, but + /// will typically be an index into a symbol table. + symbol: usize, + /// The offset of the address relative to the symbol. + /// + /// This will typically be used as the addend in a relocation. + addend: i64, + }, +} + +/// A reference to a `.debug_info` entry. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Reference { + /// An external symbol. + /// + /// The meaning of this value is decided by the writer, but + /// will typically be an index into a symbol table. + Symbol(usize), + /// An entry in the same section. + /// + /// This only supports references in units that are emitted together. + Entry(UnitId, UnitEntryId), +} + +// This type is only used in debug assertions. +#[cfg(not(debug_assertions))] +type BaseId = (); + +#[cfg(debug_assertions)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +struct BaseId(usize); + +#[cfg(debug_assertions)] +impl Default for BaseId { + fn default() -> Self { + use std::sync::atomic; + static BASE_ID: atomic::AtomicUsize = atomic::AtomicUsize::new(0); + BaseId(BASE_ID.fetch_add(1, atomic::Ordering::Relaxed)) + } +} + +#[cfg(feature = "read")] +mod convert { + use super::*; + use crate::read; + + pub(crate) use super::unit::convert::*; + + /// An error that occurred when converting a read value into a write value. + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + pub enum ConvertError { + /// An error occurred when reading. + Read(read::Error), + /// Writing of this attribute value is not implemented yet. + UnsupportedAttributeValue, + /// This attribute value is an invalid name/form combination. + InvalidAttributeValue, + /// A `.debug_info` reference does not refer to a valid entry. + InvalidDebugInfoOffset, + /// An address could not be converted. + InvalidAddress, + /// Writing this line number instruction is not implemented yet. + UnsupportedLineInstruction, + /// Writing this form of line string is not implemented yet. + UnsupportedLineStringForm, + /// A `.debug_line` file index is invalid. + InvalidFileIndex, + /// A `.debug_line` directory index is invalid. + InvalidDirectoryIndex, + /// A `.debug_line` line base is invalid. + InvalidLineBase, + /// A `.debug_line` reference is invalid. + InvalidLineRef, + /// A `.debug_info` unit entry reference is invalid. + InvalidUnitRef, + /// A `.debug_info` reference is invalid. + InvalidDebugInfoRef, + /// Invalid relative address in a range list. + InvalidRangeRelativeAddress, + /// Writing this CFI instruction is not implemented yet. + UnsupportedCfiInstruction, + /// Writing indirect pointers is not implemented yet. + UnsupportedIndirectAddress, + /// Writing this expression operation is not implemented yet. + UnsupportedOperation, + /// Operation branch target is invalid. + InvalidBranchTarget, + /// Writing this unit type is not supported yet. + UnsupportedUnitType, + } + + impl fmt::Display for ConvertError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> result::Result<(), fmt::Error> { + use self::ConvertError::*; + match *self { + Read(ref e) => e.fmt(f), + UnsupportedAttributeValue => { + write!(f, "Writing of this attribute value is not implemented yet.") + } + InvalidAttributeValue => write!( + f, + "This attribute value is an invalid name/form combination." + ), + InvalidDebugInfoOffset => write!( + f, + "A `.debug_info` reference does not refer to a valid entry." + ), + InvalidAddress => write!(f, "An address could not be converted."), + UnsupportedLineInstruction => write!( + f, + "Writing this line number instruction is not implemented yet." + ), + UnsupportedLineStringForm => write!( + f, + "Writing this form of line string is not implemented yet." + ), + InvalidFileIndex => write!(f, "A `.debug_line` file index is invalid."), + InvalidDirectoryIndex => write!(f, "A `.debug_line` directory index is invalid."), + InvalidLineBase => write!(f, "A `.debug_line` line base is invalid."), + InvalidLineRef => write!(f, "A `.debug_line` reference is invalid."), + InvalidUnitRef => write!(f, "A `.debug_info` unit entry reference is invalid."), + InvalidDebugInfoRef => write!(f, "A `.debug_info` reference is invalid."), + InvalidRangeRelativeAddress => { + write!(f, "Invalid relative address in a range list.") + } + UnsupportedCfiInstruction => { + write!(f, "Writing this CFI instruction is not implemented yet.") + } + UnsupportedIndirectAddress => { + write!(f, "Writing indirect pointers is not implemented yet.") + } + UnsupportedOperation => write!( + f, + "Writing this expression operation is not implemented yet." + ), + InvalidBranchTarget => write!(f, "Operation branch target is invalid."), + UnsupportedUnitType => write!(f, "Writing this unit type is not supported yet."), + } + } + } + + impl error::Error for ConvertError {} + + impl From for ConvertError { + fn from(e: read::Error) -> Self { + ConvertError::Read(e) + } + } + + /// The result of a conversion. + pub type ConvertResult = result::Result; +} +#[cfg(feature = "read")] +pub use self::convert::*; diff --git a/third_party/rust/gimli/src/write/op.rs b/third_party/rust/gimli/src/write/op.rs new file mode 100644 index 000000000000..dc9b5fb763a6 --- /dev/null +++ b/third_party/rust/gimli/src/write/op.rs @@ -0,0 +1,1624 @@ +use alloc::boxed::Box; +use alloc::vec::Vec; + +use crate::common::{Encoding, Register}; +use crate::constants::{self, DwOp}; +use crate::leb128::write::{sleb128_size, uleb128_size}; +use crate::write::{ + Address, DebugInfoReference, Error, Reference, Result, UnitEntryId, UnitOffsets, Writer, +}; + +/// The bytecode for a DWARF expression or location description. +#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)] +pub struct Expression { + operations: Vec, +} + +impl Expression { + /// Create an empty expression. + #[inline] + pub fn new() -> Self { + Self::default() + } + + /// Create an expression from raw bytecode. + /// + /// This does not support operations that require references, such as `DW_OP_addr`. + #[inline] + pub fn raw(bytecode: Vec) -> Self { + Expression { + operations: vec![Operation::Raw(bytecode)], + } + } + + /// Add an operation to the expression. + /// + /// This should only be used for operations that have no explicit operands. + pub fn op(&mut self, opcode: DwOp) { + self.operations.push(Operation::Simple(opcode)); + } + + /// Add a `DW_OP_addr` operation to the expression. + pub fn op_addr(&mut self, address: Address) { + self.operations.push(Operation::Address(address)); + } + + /// Add a `DW_OP_constu` operation to the expression. + /// + /// This may be emitted as a smaller equivalent operation. + pub fn op_constu(&mut self, value: u64) { + self.operations.push(Operation::UnsignedConstant(value)); + } + + /// Add a `DW_OP_consts` operation to the expression. + /// + /// This may be emitted as a smaller equivalent operation. + pub fn op_consts(&mut self, value: i64) { + self.operations.push(Operation::SignedConstant(value)); + } + + /// Add a `DW_OP_const_type` or `DW_OP_GNU_const_type` operation to the expression. + pub fn op_const_type(&mut self, base: UnitEntryId, value: Box<[u8]>) { + self.operations.push(Operation::ConstantType(base, value)); + } + + /// Add a `DW_OP_fbreg` operation to the expression. + pub fn op_fbreg(&mut self, offset: i64) { + self.operations.push(Operation::FrameOffset(offset)); + } + + /// Add a `DW_OP_bregx` operation to the expression. + /// + /// This may be emitted as a smaller equivalent operation. + pub fn op_breg(&mut self, register: Register, offset: i64) { + self.operations + .push(Operation::RegisterOffset(register, offset)); + } + + /// Add a `DW_OP_regval_type` or `DW_OP_GNU_regval_type` operation to the expression. + /// + /// This may be emitted as a smaller equivalent operation. + pub fn op_regval_type(&mut self, register: Register, base: UnitEntryId) { + self.operations + .push(Operation::RegisterType(register, base)); + } + + /// Add a `DW_OP_pick` operation to the expression. + /// + /// This may be emitted as a `DW_OP_dup` or `DW_OP_over` operation. + pub fn op_pick(&mut self, index: u8) { + self.operations.push(Operation::Pick(index)); + } + + /// Add a `DW_OP_deref` operation to the expression. + pub fn op_deref(&mut self) { + self.operations.push(Operation::Deref { space: false }); + } + + /// Add a `DW_OP_xderef` operation to the expression. + pub fn op_xderef(&mut self) { + self.operations.push(Operation::Deref { space: true }); + } + + /// Add a `DW_OP_deref_size` operation to the expression. + pub fn op_deref_size(&mut self, size: u8) { + self.operations + .push(Operation::DerefSize { size, space: false }); + } + + /// Add a `DW_OP_xderef_size` operation to the expression. + pub fn op_xderef_size(&mut self, size: u8) { + self.operations + .push(Operation::DerefSize { size, space: true }); + } + + /// Add a `DW_OP_deref_type` or `DW_OP_GNU_deref_type` operation to the expression. + pub fn op_deref_type(&mut self, size: u8, base: UnitEntryId) { + self.operations.push(Operation::DerefType { + size, + base, + space: false, + }); + } + + /// Add a `DW_OP_xderef_type` operation to the expression. + pub fn op_xderef_type(&mut self, size: u8, base: UnitEntryId) { + self.operations.push(Operation::DerefType { + size, + base, + space: true, + }); + } + + /// Add a `DW_OP_plus_uconst` operation to the expression. + pub fn op_plus_uconst(&mut self, value: u64) { + self.operations.push(Operation::PlusConstant(value)); + } + + /// Add a `DW_OP_skip` operation to the expression. + /// + /// Returns the index of the operation. The caller must call `set_target` with + /// this index to set the target of the branch. + pub fn op_skip(&mut self) -> usize { + let index = self.next_index(); + self.operations.push(Operation::Skip(!0)); + index + } + + /// Add a `DW_OP_bra` operation to the expression. + /// + /// Returns the index of the operation. The caller must call `set_target` with + /// this index to set the target of the branch. + pub fn op_bra(&mut self) -> usize { + let index = self.next_index(); + self.operations.push(Operation::Branch(!0)); + index + } + + /// Return the index that will be assigned to the next operation. + /// + /// This can be passed to `set_target`. + #[inline] + pub fn next_index(&self) -> usize { + self.operations.len() + } + + /// Set the target of a `DW_OP_skip` or `DW_OP_bra` operation . + pub fn set_target(&mut self, operation: usize, new_target: usize) { + debug_assert!(new_target <= self.next_index()); + debug_assert_ne!(operation, new_target); + match self.operations[operation] { + Operation::Skip(ref mut target) | Operation::Branch(ref mut target) => { + *target = new_target; + } + _ => unimplemented!(), + } + } + + /// Add a `DW_OP_call4` operation to the expression. + pub fn op_call(&mut self, entry: UnitEntryId) { + self.operations.push(Operation::Call(entry)); + } + + /// Add a `DW_OP_call_ref` operation to the expression. + pub fn op_call_ref(&mut self, entry: Reference) { + self.operations.push(Operation::CallRef(entry)); + } + + /// Add a `DW_OP_convert` or `DW_OP_GNU_convert` operation to the expression. + /// + /// `base` is the DIE of the base type, or `None` for the generic type. + pub fn op_convert(&mut self, base: Option) { + self.operations.push(Operation::Convert(base)); + } + + /// Add a `DW_OP_reinterpret` or `DW_OP_GNU_reinterpret` operation to the expression. + /// + /// `base` is the DIE of the base type, or `None` for the generic type. + pub fn op_reinterpret(&mut self, base: Option) { + self.operations.push(Operation::Reinterpret(base)); + } + + /// Add a `DW_OP_entry_value` or `DW_OP_GNU_entry_value` operation to the expression. + pub fn op_entry_value(&mut self, expression: Expression) { + self.operations.push(Operation::EntryValue(expression)); + } + + /// Add a `DW_OP_regx` operation to the expression. + /// + /// This may be emitted as a smaller equivalent operation. + pub fn op_reg(&mut self, register: Register) { + self.operations.push(Operation::Register(register)); + } + + /// Add a `DW_OP_implicit_value` operation to the expression. + pub fn op_implicit_value(&mut self, data: Box<[u8]>) { + self.operations.push(Operation::ImplicitValue(data)); + } + + /// Add a `DW_OP_implicit_pointer` or `DW_OP_GNU_implicit_pointer` operation to the expression. + pub fn op_implicit_pointer(&mut self, entry: Reference, byte_offset: i64) { + self.operations + .push(Operation::ImplicitPointer { entry, byte_offset }); + } + + /// Add a `DW_OP_piece` operation to the expression. + pub fn op_piece(&mut self, size_in_bytes: u64) { + self.operations.push(Operation::Piece { size_in_bytes }); + } + + /// Add a `DW_OP_bit_piece` operation to the expression. + pub fn op_bit_piece(&mut self, size_in_bits: u64, bit_offset: u64) { + self.operations.push(Operation::BitPiece { + size_in_bits, + bit_offset, + }); + } + + /// Add a `DW_OP_GNU_parameter_ref` operation to the expression. + pub fn op_gnu_parameter_ref(&mut self, entry: UnitEntryId) { + self.operations.push(Operation::ParameterRef(entry)); + } + + /// Add a `DW_OP_WASM_location 0x0` operation to the expression. + pub fn op_wasm_local(&mut self, index: u32) { + self.operations.push(Operation::WasmLocal(index)); + } + + /// Add a `DW_OP_WASM_location 0x1` operation to the expression. + pub fn op_wasm_global(&mut self, index: u32) { + self.operations.push(Operation::WasmGlobal(index)); + } + + /// Add a `DW_OP_WASM_location 0x2` operation to the expression. + pub fn op_wasm_stack(&mut self, index: u32) { + self.operations.push(Operation::WasmStack(index)); + } + + pub(crate) fn size(&self, encoding: Encoding, unit_offsets: Option<&UnitOffsets>) -> usize { + let mut size = 0; + for operation in &self.operations { + size += operation.size(encoding, unit_offsets); + } + size + } + + pub(crate) fn write( + &self, + w: &mut W, + mut refs: Option<&mut Vec>, + encoding: Encoding, + unit_offsets: Option<&UnitOffsets>, + ) -> Result<()> { + // TODO: only calculate offsets if needed? + let mut offsets = Vec::with_capacity(self.operations.len()); + let mut offset = w.len(); + for operation in &self.operations { + offsets.push(offset); + offset += operation.size(encoding, unit_offsets); + } + offsets.push(offset); + for (operation, offset) in self.operations.iter().zip(offsets.iter().copied()) { + debug_assert_eq!(w.len(), offset); + operation.write(w, refs.as_deref_mut(), encoding, unit_offsets, &offsets)?; + } + Ok(()) + } +} + +/// A single DWARF operation. +// +// This type is intentionally not public so that we can change the +// representation of expressions as needed. +// +// Variants are listed in the order they appear in Section 2.5. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum Operation { + /// Raw bytecode. + /// + /// Does not support references. + Raw(Vec), + /// An operation that has no explicit operands. + /// + /// Represents: + /// - `DW_OP_drop`, `DW_OP_swap`, `DW_OP_rot` + /// - `DW_OP_push_object_address`, `DW_OP_form_tls_address`, `DW_OP_call_frame_cfa` + /// - `DW_OP_abs`, `DW_OP_and`, `DW_OP_div`, `DW_OP_minus`, `DW_OP_mod`, `DW_OP_mul`, + /// `DW_OP_neg`, `DW_OP_not`, `DW_OP_or`, `DW_OP_plus`, `DW_OP_shl`, `DW_OP_shr`, + /// `DW_OP_shra`, `DW_OP_xor` + /// - `DW_OP_le`, `DW_OP_ge`, `DW_OP_eq`, `DW_OP_lt`, `DW_OP_gt`, `DW_OP_ne` + /// - `DW_OP_nop` + /// - `DW_OP_stack_value` + Simple(DwOp), + /// Relocate the address if needed, and push it on the stack. + /// + /// Represents `DW_OP_addr`. + Address(Address), + /// Push an unsigned constant value on the stack. + /// + /// Represents `DW_OP_constu`. + UnsignedConstant(u64), + /// Push a signed constant value on the stack. + /// + /// Represents `DW_OP_consts`. + SignedConstant(i64), + /* TODO: requires .debug_addr write support + /// Read the address at the given index in `.debug_addr, relocate the address if needed, + /// and push it on the stack. + /// + /// Represents `DW_OP_addrx`. + AddressIndex(DebugAddrIndex), + /// Read the address at the given index in `.debug_addr, and push it on the stack. + /// Do not relocate the address. + /// + /// Represents `DW_OP_constx`. + ConstantIndex(DebugAddrIndex), + */ + /// Interpret the value bytes as a constant of a given type, and push it on the stack. + /// + /// Represents `DW_OP_const_type`. + ConstantType(UnitEntryId, Box<[u8]>), + /// Compute the frame base (using `DW_AT_frame_base`), add the + /// given offset, and then push the resulting sum on the stack. + /// + /// Represents `DW_OP_fbreg`. + FrameOffset(i64), + /// Find the contents of the given register, add the offset, and then + /// push the resulting sum on the stack. + /// + /// Represents `DW_OP_bregx`. + RegisterOffset(Register, i64), + /// Interpret the contents of the given register as a value of the given type, + /// and push it on the stack. + /// + /// Represents `DW_OP_regval_type`. + RegisterType(Register, UnitEntryId), + /// Copy the item at a stack index and push it on top of the stack. + /// + /// Represents `DW_OP_pick`, `DW_OP_dup`, and `DW_OP_over`. + Pick(u8), + /// Pop the topmost value of the stack, dereference it, and push the + /// resulting value. + /// + /// Represents `DW_OP_deref` and `DW_OP_xderef`. + Deref { + /// True if the dereference operation takes an address space + /// argument from the stack; false otherwise. + space: bool, + }, + /// Pop the topmost value of the stack, dereference it to obtain a value + /// of the given size, and push the resulting value. + /// + /// Represents `DW_OP_deref_size` and `DW_OP_xderef_size`. + DerefSize { + /// True if the dereference operation takes an address space + /// argument from the stack; false otherwise. + space: bool, + /// The size of the data to dereference. + size: u8, + }, + /// Pop the topmost value of the stack, dereference it to obtain a value + /// of the given type, and push the resulting value. + /// + /// Represents `DW_OP_deref_type` and `DW_OP_xderef_type`. + DerefType { + /// True if the dereference operation takes an address space + /// argument from the stack; false otherwise. + space: bool, + /// The size of the data to dereference. + size: u8, + /// The DIE of the base type, or `None` for the generic type. + base: UnitEntryId, + }, + /// Add an unsigned constant to the topmost value on the stack. + /// + /// Represents `DW_OP_plus_uconst`. + PlusConstant(u64), + /// Unconditional branch to the target location. + /// + /// The value is the index within the expression of the operation to branch to. + /// This will be converted to a relative offset when writing. + /// + /// Represents `DW_OP_skip`. + Skip(usize), + /// Branch to the target location if the top of stack is nonzero. + /// + /// The value is the index within the expression of the operation to branch to. + /// This will be converted to a relative offset when writing. + /// + /// Represents `DW_OP_bra`. + Branch(usize), + /// Evaluate a DWARF expression as a subroutine. + /// + /// The expression comes from the `DW_AT_location` attribute of the indicated DIE. + /// + /// Represents `DW_OP_call4`. + Call(UnitEntryId), + /// Evaluate an external DWARF expression as a subroutine. + /// + /// The expression comes from the `DW_AT_location` attribute of the indicated DIE, + /// which may be in another compilation unit or shared object. + /// + /// Represents `DW_OP_call_ref`. + CallRef(Reference), + /// Pop the top stack entry, convert it to a different type, and push it on the stack. + /// + /// Represents `DW_OP_convert`. + Convert(Option), + /// Pop the top stack entry, reinterpret the bits in its value as a different type, + /// and push it on the stack. + /// + /// Represents `DW_OP_reinterpret`. + Reinterpret(Option), + /// Evaluate an expression at the entry to the current subprogram, and push it on the stack. + /// + /// Represents `DW_OP_entry_value`. + EntryValue(Expression), + // FIXME: EntryRegister + /// Indicate that this piece's location is in the given register. + /// + /// Completes the piece or expression. + /// + /// Represents `DW_OP_regx`. + Register(Register), + /// The object has no location, but has a known constant value. + /// + /// Completes the piece or expression. + /// + /// Represents `DW_OP_implicit_value`. + ImplicitValue(Box<[u8]>), + /// The object is a pointer to a value which has no actual location, such as + /// an implicit value or a stack value. + /// + /// Completes the piece or expression. + /// + /// Represents `DW_OP_implicit_pointer`. + ImplicitPointer { + /// The DIE of the value that this is an implicit pointer into. + entry: Reference, + /// The byte offset into the value that the implicit pointer points to. + byte_offset: i64, + }, + /// Terminate a piece. + /// + /// Represents `DW_OP_piece`. + Piece { + /// The size of this piece in bytes. + size_in_bytes: u64, + }, + /// Terminate a piece with a size in bits. + /// + /// Represents `DW_OP_bit_piece`. + BitPiece { + /// The size of this piece in bits. + size_in_bits: u64, + /// The bit offset of this piece. + bit_offset: u64, + }, + /// This represents a parameter that was optimized out. + /// + /// The entry is the definition of the parameter, and is matched to + /// the `DW_TAG_GNU_call_site_parameter` in the caller that also + /// points to the same definition of the parameter. + /// + /// Represents `DW_OP_GNU_parameter_ref`. + ParameterRef(UnitEntryId), + /// The index of a local in the currently executing function. + /// + /// Represents `DW_OP_WASM_location 0x00`. + WasmLocal(u32), + /// The index of a global. + /// + /// Represents `DW_OP_WASM_location 0x01`. + WasmGlobal(u32), + /// The index of an item on the operand stack. + /// + /// Represents `DW_OP_WASM_location 0x02`. + WasmStack(u32), +} + +impl Operation { + fn size(&self, encoding: Encoding, unit_offsets: Option<&UnitOffsets>) -> usize { + let base_size = |base| { + // Errors are handled during writes. + match unit_offsets { + Some(offsets) => uleb128_size(offsets.unit_offset(base)), + None => 0, + } + }; + 1 + match *self { + Operation::Raw(ref bytecode) => return bytecode.len(), + Operation::Simple(_) => 0, + Operation::Address(_) => encoding.address_size as usize, + Operation::UnsignedConstant(value) => { + if value < 32 { + 0 + } else { + uleb128_size(value) + } + } + Operation::SignedConstant(value) => sleb128_size(value), + Operation::ConstantType(base, ref value) => base_size(base) + 1 + value.len(), + Operation::FrameOffset(offset) => sleb128_size(offset), + Operation::RegisterOffset(register, offset) => { + if register.0 < 32 { + sleb128_size(offset) + } else { + uleb128_size(register.0.into()) + sleb128_size(offset) + } + } + Operation::RegisterType(register, base) => { + uleb128_size(register.0.into()) + base_size(base) + } + Operation::Pick(index) => { + if index > 1 { + 1 + } else { + 0 + } + } + Operation::Deref { .. } => 0, + Operation::DerefSize { .. } => 1, + Operation::DerefType { base, .. } => 1 + base_size(base), + Operation::PlusConstant(value) => uleb128_size(value), + Operation::Skip(_) => 2, + Operation::Branch(_) => 2, + Operation::Call(_) => 4, + Operation::CallRef(_) => encoding.format.word_size() as usize, + Operation::Convert(base) => match base { + Some(base) => base_size(base), + None => 1, + }, + Operation::Reinterpret(base) => match base { + Some(base) => base_size(base), + None => 1, + }, + Operation::EntryValue(ref expression) => { + let length = expression.size(encoding, unit_offsets); + uleb128_size(length as u64) + length + } + Operation::Register(register) => { + if register.0 < 32 { + 0 + } else { + uleb128_size(register.0.into()) + } + } + Operation::ImplicitValue(ref data) => uleb128_size(data.len() as u64) + data.len(), + Operation::ImplicitPointer { byte_offset, .. } => { + let size = if encoding.version == 2 { + encoding.address_size + } else { + encoding.format.word_size() + }; + size as usize + sleb128_size(byte_offset) + } + Operation::Piece { size_in_bytes } => uleb128_size(size_in_bytes), + Operation::BitPiece { + size_in_bits, + bit_offset, + } => uleb128_size(size_in_bits) + uleb128_size(bit_offset), + Operation::ParameterRef(_) => 4, + Operation::WasmLocal(index) + | Operation::WasmGlobal(index) + | Operation::WasmStack(index) => 1 + uleb128_size(index.into()), + } + } + + pub(crate) fn write( + &self, + w: &mut W, + refs: Option<&mut Vec>, + encoding: Encoding, + unit_offsets: Option<&UnitOffsets>, + offsets: &[usize], + ) -> Result<()> { + let entry_offset = |entry| match unit_offsets { + Some(offsets) => { + let offset = offsets.unit_offset(entry); + if offset == 0 { + Err(Error::UnsupportedExpressionForwardReference) + } else { + Ok(offset) + } + } + None => Err(Error::UnsupportedCfiExpressionReference), + }; + match *self { + Operation::Raw(ref bytecode) => w.write(bytecode)?, + Operation::Simple(opcode) => w.write_u8(opcode.0)?, + Operation::Address(address) => { + w.write_u8(constants::DW_OP_addr.0)?; + w.write_address(address, encoding.address_size)?; + } + Operation::UnsignedConstant(value) => { + if value < 32 { + w.write_u8(constants::DW_OP_lit0.0 + value as u8)?; + } else { + w.write_u8(constants::DW_OP_constu.0)?; + w.write_uleb128(value)?; + } + } + Operation::SignedConstant(value) => { + w.write_u8(constants::DW_OP_consts.0)?; + w.write_sleb128(value)?; + } + Operation::ConstantType(base, ref value) => { + if encoding.version >= 5 { + w.write_u8(constants::DW_OP_const_type.0)?; + } else { + w.write_u8(constants::DW_OP_GNU_const_type.0)?; + } + w.write_uleb128(entry_offset(base)?)?; + w.write_udata(value.len() as u64, 1)?; + w.write(value)?; + } + Operation::FrameOffset(offset) => { + w.write_u8(constants::DW_OP_fbreg.0)?; + w.write_sleb128(offset)?; + } + Operation::RegisterOffset(register, offset) => { + if register.0 < 32 { + w.write_u8(constants::DW_OP_breg0.0 + register.0 as u8)?; + } else { + w.write_u8(constants::DW_OP_bregx.0)?; + w.write_uleb128(register.0.into())?; + } + w.write_sleb128(offset)?; + } + Operation::RegisterType(register, base) => { + if encoding.version >= 5 { + w.write_u8(constants::DW_OP_regval_type.0)?; + } else { + w.write_u8(constants::DW_OP_GNU_regval_type.0)?; + } + w.write_uleb128(register.0.into())?; + w.write_uleb128(entry_offset(base)?)?; + } + Operation::Pick(index) => match index { + 0 => w.write_u8(constants::DW_OP_dup.0)?, + 1 => w.write_u8(constants::DW_OP_over.0)?, + _ => { + w.write_u8(constants::DW_OP_pick.0)?; + w.write_u8(index)?; + } + }, + Operation::Deref { space } => { + if space { + w.write_u8(constants::DW_OP_xderef.0)?; + } else { + w.write_u8(constants::DW_OP_deref.0)?; + } + } + Operation::DerefSize { space, size } => { + if space { + w.write_u8(constants::DW_OP_xderef_size.0)?; + } else { + w.write_u8(constants::DW_OP_deref_size.0)?; + } + w.write_u8(size)?; + } + Operation::DerefType { space, size, base } => { + if space { + w.write_u8(constants::DW_OP_xderef_type.0)?; + } else { + if encoding.version >= 5 { + w.write_u8(constants::DW_OP_deref_type.0)?; + } else { + w.write_u8(constants::DW_OP_GNU_deref_type.0)?; + } + } + w.write_u8(size)?; + w.write_uleb128(entry_offset(base)?)?; + } + Operation::PlusConstant(value) => { + w.write_u8(constants::DW_OP_plus_uconst.0)?; + w.write_uleb128(value)?; + } + Operation::Skip(target) => { + w.write_u8(constants::DW_OP_skip.0)?; + let offset = offsets[target] as i64 - (w.len() as i64 + 2); + w.write_sdata(offset, 2)?; + } + Operation::Branch(target) => { + w.write_u8(constants::DW_OP_bra.0)?; + let offset = offsets[target] as i64 - (w.len() as i64 + 2); + w.write_sdata(offset, 2)?; + } + Operation::Call(entry) => { + w.write_u8(constants::DW_OP_call4.0)?; + // TODO: this probably won't work in practice, because we may + // only know the offsets of base type DIEs at this point. + w.write_udata(entry_offset(entry)?, 4)?; + } + Operation::CallRef(entry) => { + w.write_u8(constants::DW_OP_call_ref.0)?; + let size = encoding.format.word_size(); + match entry { + Reference::Symbol(symbol) => w.write_reference(symbol, size)?, + Reference::Entry(unit, entry) => { + let refs = refs.ok_or(Error::InvalidReference)?; + refs.push(DebugInfoReference { + offset: w.len(), + unit, + entry, + size, + }); + w.write_udata(0, size)?; + } + } + } + Operation::Convert(base) => { + if encoding.version >= 5 { + w.write_u8(constants::DW_OP_convert.0)?; + } else { + w.write_u8(constants::DW_OP_GNU_convert.0)?; + } + match base { + Some(base) => w.write_uleb128(entry_offset(base)?)?, + None => w.write_u8(0)?, + } + } + Operation::Reinterpret(base) => { + if encoding.version >= 5 { + w.write_u8(constants::DW_OP_reinterpret.0)?; + } else { + w.write_u8(constants::DW_OP_GNU_reinterpret.0)?; + } + match base { + Some(base) => w.write_uleb128(entry_offset(base)?)?, + None => w.write_u8(0)?, + } + } + Operation::EntryValue(ref expression) => { + if encoding.version >= 5 { + w.write_u8(constants::DW_OP_entry_value.0)?; + } else { + w.write_u8(constants::DW_OP_GNU_entry_value.0)?; + } + let length = expression.size(encoding, unit_offsets); + w.write_uleb128(length as u64)?; + expression.write(w, refs, encoding, unit_offsets)?; + } + Operation::Register(register) => { + if register.0 < 32 { + w.write_u8(constants::DW_OP_reg0.0 + register.0 as u8)?; + } else { + w.write_u8(constants::DW_OP_regx.0)?; + w.write_uleb128(register.0.into())?; + } + } + Operation::ImplicitValue(ref data) => { + w.write_u8(constants::DW_OP_implicit_value.0)?; + w.write_uleb128(data.len() as u64)?; + w.write(data)?; + } + Operation::ImplicitPointer { entry, byte_offset } => { + if encoding.version >= 5 { + w.write_u8(constants::DW_OP_implicit_pointer.0)?; + } else { + w.write_u8(constants::DW_OP_GNU_implicit_pointer.0)?; + } + let size = if encoding.version == 2 { + encoding.address_size + } else { + encoding.format.word_size() + }; + match entry { + Reference::Symbol(symbol) => { + w.write_reference(symbol, size)?; + } + Reference::Entry(unit, entry) => { + let refs = refs.ok_or(Error::InvalidReference)?; + refs.push(DebugInfoReference { + offset: w.len(), + unit, + entry, + size, + }); + w.write_udata(0, size)?; + } + } + w.write_sleb128(byte_offset)?; + } + Operation::Piece { size_in_bytes } => { + w.write_u8(constants::DW_OP_piece.0)?; + w.write_uleb128(size_in_bytes)?; + } + Operation::BitPiece { + size_in_bits, + bit_offset, + } => { + w.write_u8(constants::DW_OP_bit_piece.0)?; + w.write_uleb128(size_in_bits)?; + w.write_uleb128(bit_offset)?; + } + Operation::ParameterRef(entry) => { + w.write_u8(constants::DW_OP_GNU_parameter_ref.0)?; + w.write_udata(entry_offset(entry)?, 4)?; + } + Operation::WasmLocal(index) => { + w.write(&[constants::DW_OP_WASM_location.0, 0])?; + w.write_uleb128(index.into())?; + } + Operation::WasmGlobal(index) => { + w.write(&[constants::DW_OP_WASM_location.0, 1])?; + w.write_uleb128(index.into())?; + } + Operation::WasmStack(index) => { + w.write(&[constants::DW_OP_WASM_location.0, 2])?; + w.write_uleb128(index.into())?; + } + } + Ok(()) + } +} + +#[cfg(feature = "read")] +pub(crate) mod convert { + use super::*; + use crate::common::UnitSectionOffset; + use crate::read::{self, Reader}; + use crate::write::{ConvertError, ConvertResult, UnitId}; + use std::collections::HashMap; + + impl Expression { + /// Create an expression from the input expression. + pub fn from>( + from_expression: read::Expression, + encoding: Encoding, + dwarf: Option<&read::Dwarf>, + unit: Option<&read::Unit>, + entry_ids: Option<&HashMap>, + convert_address: &dyn Fn(u64) -> Option
, + ) -> ConvertResult { + let convert_unit_offset = |offset: read::UnitOffset| -> ConvertResult<_> { + let entry_ids = entry_ids.ok_or(ConvertError::UnsupportedOperation)?; + let unit = unit.ok_or(ConvertError::UnsupportedOperation)?; + let id = entry_ids + .get(&offset.to_unit_section_offset(unit)) + .ok_or(ConvertError::InvalidUnitRef)?; + Ok(id.1) + }; + let convert_debug_info_offset = |offset| -> ConvertResult<_> { + // TODO: support relocations + let entry_ids = entry_ids.ok_or(ConvertError::UnsupportedOperation)?; + let id = entry_ids + .get(&UnitSectionOffset::DebugInfoOffset(offset)) + .ok_or(ConvertError::InvalidDebugInfoRef)?; + Ok(Reference::Entry(id.0, id.1)) + }; + + // Calculate offsets for use in branch/skip operations. + let mut offsets = Vec::new(); + let mut offset = 0; + let mut from_operations = from_expression.clone().operations(encoding); + while from_operations.next()?.is_some() { + offsets.push(offset); + offset = from_operations.offset_from(&from_expression); + } + offsets.push(from_expression.0.len()); + + let mut from_operations = from_expression.clone().operations(encoding); + let mut operations = Vec::new(); + while let Some(from_operation) = from_operations.next()? { + let operation = match from_operation { + read::Operation::Deref { + base_type, + size, + space, + } => { + if base_type.0 != 0 { + let base = convert_unit_offset(base_type)?; + Operation::DerefType { space, size, base } + } else if size != encoding.address_size { + Operation::DerefSize { space, size } + } else { + Operation::Deref { space } + } + } + read::Operation::Drop => Operation::Simple(constants::DW_OP_drop), + read::Operation::Pick { index } => Operation::Pick(index), + read::Operation::Swap => Operation::Simple(constants::DW_OP_swap), + read::Operation::Rot => Operation::Simple(constants::DW_OP_rot), + read::Operation::Abs => Operation::Simple(constants::DW_OP_abs), + read::Operation::And => Operation::Simple(constants::DW_OP_and), + read::Operation::Div => Operation::Simple(constants::DW_OP_div), + read::Operation::Minus => Operation::Simple(constants::DW_OP_minus), + read::Operation::Mod => Operation::Simple(constants::DW_OP_mod), + read::Operation::Mul => Operation::Simple(constants::DW_OP_mul), + read::Operation::Neg => Operation::Simple(constants::DW_OP_neg), + read::Operation::Not => Operation::Simple(constants::DW_OP_not), + read::Operation::Or => Operation::Simple(constants::DW_OP_or), + read::Operation::Plus => Operation::Simple(constants::DW_OP_plus), + read::Operation::PlusConstant { value } => Operation::PlusConstant(value), + read::Operation::Shl => Operation::Simple(constants::DW_OP_shl), + read::Operation::Shr => Operation::Simple(constants::DW_OP_shr), + read::Operation::Shra => Operation::Simple(constants::DW_OP_shra), + read::Operation::Xor => Operation::Simple(constants::DW_OP_xor), + read::Operation::Eq => Operation::Simple(constants::DW_OP_eq), + read::Operation::Ge => Operation::Simple(constants::DW_OP_ge), + read::Operation::Gt => Operation::Simple(constants::DW_OP_gt), + read::Operation::Le => Operation::Simple(constants::DW_OP_le), + read::Operation::Lt => Operation::Simple(constants::DW_OP_lt), + read::Operation::Ne => Operation::Simple(constants::DW_OP_ne), + read::Operation::Bra { target } => { + let offset = from_operations + .offset_from(&from_expression) + .wrapping_add(i64::from(target) as usize); + let index = offsets + .binary_search(&offset) + .map_err(|_| ConvertError::InvalidBranchTarget)?; + Operation::Branch(index) + } + read::Operation::Skip { target } => { + let offset = from_operations + .offset_from(&from_expression) + .wrapping_add(i64::from(target) as usize); + let index = offsets + .binary_search(&offset) + .map_err(|_| ConvertError::InvalidBranchTarget)?; + Operation::Skip(index) + } + read::Operation::UnsignedConstant { value } => { + Operation::UnsignedConstant(value) + } + read::Operation::SignedConstant { value } => Operation::SignedConstant(value), + read::Operation::Register { register } => Operation::Register(register), + read::Operation::RegisterOffset { + register, + offset, + base_type, + } => { + if base_type.0 != 0 { + Operation::RegisterType(register, convert_unit_offset(base_type)?) + } else { + Operation::RegisterOffset(register, offset) + } + } + read::Operation::FrameOffset { offset } => Operation::FrameOffset(offset), + read::Operation::Nop => Operation::Simple(constants::DW_OP_nop), + read::Operation::PushObjectAddress => { + Operation::Simple(constants::DW_OP_push_object_address) + } + read::Operation::Call { offset } => match offset { + read::DieReference::UnitRef(offset) => { + Operation::Call(convert_unit_offset(offset)?) + } + read::DieReference::DebugInfoRef(offset) => { + Operation::CallRef(convert_debug_info_offset(offset)?) + } + }, + read::Operation::TLS => Operation::Simple(constants::DW_OP_form_tls_address), + read::Operation::CallFrameCFA => { + Operation::Simple(constants::DW_OP_call_frame_cfa) + } + read::Operation::Piece { + size_in_bits, + bit_offset: None, + } => Operation::Piece { + size_in_bytes: size_in_bits / 8, + }, + read::Operation::Piece { + size_in_bits, + bit_offset: Some(bit_offset), + } => Operation::BitPiece { + size_in_bits, + bit_offset, + }, + read::Operation::ImplicitValue { data } => { + Operation::ImplicitValue(data.to_slice()?.into_owned().into()) + } + read::Operation::StackValue => Operation::Simple(constants::DW_OP_stack_value), + read::Operation::ImplicitPointer { value, byte_offset } => { + let entry = convert_debug_info_offset(value)?; + Operation::ImplicitPointer { entry, byte_offset } + } + read::Operation::EntryValue { expression } => { + let expression = Expression::from( + read::Expression(expression), + encoding, + dwarf, + unit, + entry_ids, + convert_address, + )?; + Operation::EntryValue(expression) + } + read::Operation::ParameterRef { offset } => { + let entry = convert_unit_offset(offset)?; + Operation::ParameterRef(entry) + } + read::Operation::Address { address } => { + let address = + convert_address(address).ok_or(ConvertError::InvalidAddress)?; + Operation::Address(address) + } + read::Operation::AddressIndex { index } => { + let dwarf = dwarf.ok_or(ConvertError::UnsupportedOperation)?; + let unit = unit.ok_or(ConvertError::UnsupportedOperation)?; + let val = dwarf.address(unit, index)?; + let address = convert_address(val).ok_or(ConvertError::InvalidAddress)?; + Operation::Address(address) + } + read::Operation::ConstantIndex { index } => { + let dwarf = dwarf.ok_or(ConvertError::UnsupportedOperation)?; + let unit = unit.ok_or(ConvertError::UnsupportedOperation)?; + let val = dwarf.address(unit, index)?; + Operation::UnsignedConstant(val) + } + read::Operation::TypedLiteral { base_type, value } => { + let entry = convert_unit_offset(base_type)?; + Operation::ConstantType(entry, value.to_slice()?.into_owned().into()) + } + read::Operation::Convert { base_type } => { + if base_type.0 == 0 { + Operation::Convert(None) + } else { + let entry = convert_unit_offset(base_type)?; + Operation::Convert(Some(entry)) + } + } + read::Operation::Reinterpret { base_type } => { + if base_type.0 == 0 { + Operation::Reinterpret(None) + } else { + let entry = convert_unit_offset(base_type)?; + Operation::Reinterpret(Some(entry)) + } + } + read::Operation::WasmLocal { index } => Operation::WasmLocal(index), + read::Operation::WasmGlobal { index } => Operation::WasmGlobal(index), + read::Operation::WasmStack { index } => Operation::WasmStack(index), + }; + operations.push(operation); + } + Ok(Expression { operations }) + } + } +} + +#[cfg(test)] +#[cfg(feature = "read")] +mod tests { + use super::*; + use crate::common::{ + DebugAbbrevOffset, DebugAddrBase, DebugInfoOffset, DebugLocListsBase, DebugRngListsBase, + DebugStrOffsetsBase, Format, SectionId, + }; + use crate::read; + use crate::write::{ + DebugLineStrOffsets, DebugStrOffsets, EndianVec, LineProgram, Sections, Unit, UnitTable, + }; + use crate::LittleEndian; + use std::collections::HashMap; + use std::sync::Arc; + + #[test] + #[allow(clippy::type_complexity)] + fn test_operation() { + for version in [2, 3, 4, 5] { + for address_size in [4, 8] { + for format in [Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + + let mut units = UnitTable::default(); + let unit_id = units.add(Unit::new(encoding, LineProgram::none())); + let unit = units.get_mut(unit_id); + let entry_id = unit.add(unit.root(), constants::DW_TAG_base_type); + let reference = Reference::Entry(unit_id, entry_id); + + let mut sections = Sections::new(EndianVec::new(LittleEndian)); + let debug_line_str_offsets = DebugLineStrOffsets::none(); + let debug_str_offsets = DebugStrOffsets::none(); + let debug_info_offsets = units + .write(&mut sections, &debug_line_str_offsets, &debug_str_offsets) + .unwrap(); + let unit_offsets = debug_info_offsets.unit_offsets(unit_id); + let debug_info_offset = unit_offsets.debug_info_offset(entry_id); + let entry_offset = + read::UnitOffset(unit_offsets.unit_offset(entry_id) as usize); + + let mut reg_expression = Expression::new(); + reg_expression.op_reg(Register(23)); + + let operations: &[(&dyn Fn(&mut Expression), Operation, read::Operation<_>)] = + &[ + ( + &|x| x.op_deref(), + Operation::Deref { space: false }, + read::Operation::Deref { + base_type: read::UnitOffset(0), + size: address_size, + space: false, + }, + ), + ( + &|x| x.op_xderef(), + Operation::Deref { space: true }, + read::Operation::Deref { + base_type: read::UnitOffset(0), + size: address_size, + space: true, + }, + ), + ( + &|x| x.op_deref_size(2), + Operation::DerefSize { + space: false, + size: 2, + }, + read::Operation::Deref { + base_type: read::UnitOffset(0), + size: 2, + space: false, + }, + ), + ( + &|x| x.op_xderef_size(2), + Operation::DerefSize { + space: true, + size: 2, + }, + read::Operation::Deref { + base_type: read::UnitOffset(0), + size: 2, + space: true, + }, + ), + ( + &|x| x.op_deref_type(2, entry_id), + Operation::DerefType { + space: false, + size: 2, + base: entry_id, + }, + read::Operation::Deref { + base_type: entry_offset, + size: 2, + space: false, + }, + ), + ( + &|x| x.op_xderef_type(2, entry_id), + Operation::DerefType { + space: true, + size: 2, + base: entry_id, + }, + read::Operation::Deref { + base_type: entry_offset, + size: 2, + space: true, + }, + ), + ( + &|x| x.op(constants::DW_OP_drop), + Operation::Simple(constants::DW_OP_drop), + read::Operation::Drop, + ), + ( + &|x| x.op_pick(0), + Operation::Pick(0), + read::Operation::Pick { index: 0 }, + ), + ( + &|x| x.op_pick(1), + Operation::Pick(1), + read::Operation::Pick { index: 1 }, + ), + ( + &|x| x.op_pick(2), + Operation::Pick(2), + read::Operation::Pick { index: 2 }, + ), + ( + &|x| x.op(constants::DW_OP_swap), + Operation::Simple(constants::DW_OP_swap), + read::Operation::Swap, + ), + ( + &|x| x.op(constants::DW_OP_rot), + Operation::Simple(constants::DW_OP_rot), + read::Operation::Rot, + ), + ( + &|x| x.op(constants::DW_OP_abs), + Operation::Simple(constants::DW_OP_abs), + read::Operation::Abs, + ), + ( + &|x| x.op(constants::DW_OP_and), + Operation::Simple(constants::DW_OP_and), + read::Operation::And, + ), + ( + &|x| x.op(constants::DW_OP_div), + Operation::Simple(constants::DW_OP_div), + read::Operation::Div, + ), + ( + &|x| x.op(constants::DW_OP_minus), + Operation::Simple(constants::DW_OP_minus), + read::Operation::Minus, + ), + ( + &|x| x.op(constants::DW_OP_mod), + Operation::Simple(constants::DW_OP_mod), + read::Operation::Mod, + ), + ( + &|x| x.op(constants::DW_OP_mul), + Operation::Simple(constants::DW_OP_mul), + read::Operation::Mul, + ), + ( + &|x| x.op(constants::DW_OP_neg), + Operation::Simple(constants::DW_OP_neg), + read::Operation::Neg, + ), + ( + &|x| x.op(constants::DW_OP_not), + Operation::Simple(constants::DW_OP_not), + read::Operation::Not, + ), + ( + &|x| x.op(constants::DW_OP_or), + Operation::Simple(constants::DW_OP_or), + read::Operation::Or, + ), + ( + &|x| x.op(constants::DW_OP_plus), + Operation::Simple(constants::DW_OP_plus), + read::Operation::Plus, + ), + ( + &|x| x.op_plus_uconst(23), + Operation::PlusConstant(23), + read::Operation::PlusConstant { value: 23 }, + ), + ( + &|x| x.op(constants::DW_OP_shl), + Operation::Simple(constants::DW_OP_shl), + read::Operation::Shl, + ), + ( + &|x| x.op(constants::DW_OP_shr), + Operation::Simple(constants::DW_OP_shr), + read::Operation::Shr, + ), + ( + &|x| x.op(constants::DW_OP_shra), + Operation::Simple(constants::DW_OP_shra), + read::Operation::Shra, + ), + ( + &|x| x.op(constants::DW_OP_xor), + Operation::Simple(constants::DW_OP_xor), + read::Operation::Xor, + ), + ( + &|x| x.op(constants::DW_OP_eq), + Operation::Simple(constants::DW_OP_eq), + read::Operation::Eq, + ), + ( + &|x| x.op(constants::DW_OP_ge), + Operation::Simple(constants::DW_OP_ge), + read::Operation::Ge, + ), + ( + &|x| x.op(constants::DW_OP_gt), + Operation::Simple(constants::DW_OP_gt), + read::Operation::Gt, + ), + ( + &|x| x.op(constants::DW_OP_le), + Operation::Simple(constants::DW_OP_le), + read::Operation::Le, + ), + ( + &|x| x.op(constants::DW_OP_lt), + Operation::Simple(constants::DW_OP_lt), + read::Operation::Lt, + ), + ( + &|x| x.op(constants::DW_OP_ne), + Operation::Simple(constants::DW_OP_ne), + read::Operation::Ne, + ), + ( + &|x| x.op_constu(23), + Operation::UnsignedConstant(23), + read::Operation::UnsignedConstant { value: 23 }, + ), + ( + &|x| x.op_consts(-23), + Operation::SignedConstant(-23), + read::Operation::SignedConstant { value: -23 }, + ), + ( + &|x| x.op_reg(Register(23)), + Operation::Register(Register(23)), + read::Operation::Register { + register: Register(23), + }, + ), + ( + &|x| x.op_reg(Register(123)), + Operation::Register(Register(123)), + read::Operation::Register { + register: Register(123), + }, + ), + ( + &|x| x.op_breg(Register(23), 34), + Operation::RegisterOffset(Register(23), 34), + read::Operation::RegisterOffset { + register: Register(23), + offset: 34, + base_type: read::UnitOffset(0), + }, + ), + ( + &|x| x.op_breg(Register(123), 34), + Operation::RegisterOffset(Register(123), 34), + read::Operation::RegisterOffset { + register: Register(123), + offset: 34, + base_type: read::UnitOffset(0), + }, + ), + ( + &|x| x.op_regval_type(Register(23), entry_id), + Operation::RegisterType(Register(23), entry_id), + read::Operation::RegisterOffset { + register: Register(23), + offset: 0, + base_type: entry_offset, + }, + ), + ( + &|x| x.op_fbreg(34), + Operation::FrameOffset(34), + read::Operation::FrameOffset { offset: 34 }, + ), + ( + &|x| x.op(constants::DW_OP_nop), + Operation::Simple(constants::DW_OP_nop), + read::Operation::Nop, + ), + ( + &|x| x.op(constants::DW_OP_push_object_address), + Operation::Simple(constants::DW_OP_push_object_address), + read::Operation::PushObjectAddress, + ), + ( + &|x| x.op_call(entry_id), + Operation::Call(entry_id), + read::Operation::Call { + offset: read::DieReference::UnitRef(entry_offset), + }, + ), + ( + &|x| x.op_call_ref(reference), + Operation::CallRef(reference), + read::Operation::Call { + offset: read::DieReference::DebugInfoRef(debug_info_offset), + }, + ), + ( + &|x| x.op(constants::DW_OP_form_tls_address), + Operation::Simple(constants::DW_OP_form_tls_address), + read::Operation::TLS, + ), + ( + &|x| x.op(constants::DW_OP_call_frame_cfa), + Operation::Simple(constants::DW_OP_call_frame_cfa), + read::Operation::CallFrameCFA, + ), + ( + &|x| x.op_piece(23), + Operation::Piece { size_in_bytes: 23 }, + read::Operation::Piece { + size_in_bits: 23 * 8, + bit_offset: None, + }, + ), + ( + &|x| x.op_bit_piece(23, 34), + Operation::BitPiece { + size_in_bits: 23, + bit_offset: 34, + }, + read::Operation::Piece { + size_in_bits: 23, + bit_offset: Some(34), + }, + ), + ( + &|x| x.op_implicit_value(vec![23].into()), + Operation::ImplicitValue(vec![23].into()), + read::Operation::ImplicitValue { + data: read::EndianSlice::new(&[23], LittleEndian), + }, + ), + ( + &|x| x.op(constants::DW_OP_stack_value), + Operation::Simple(constants::DW_OP_stack_value), + read::Operation::StackValue, + ), + ( + &|x| x.op_implicit_pointer(reference, 23), + Operation::ImplicitPointer { + entry: reference, + byte_offset: 23, + }, + read::Operation::ImplicitPointer { + value: debug_info_offset, + byte_offset: 23, + }, + ), + ( + &|x| x.op_entry_value(reg_expression.clone()), + Operation::EntryValue(reg_expression.clone()), + read::Operation::EntryValue { + expression: read::EndianSlice::new( + &[constants::DW_OP_reg23.0], + LittleEndian, + ), + }, + ), + ( + &|x| x.op_gnu_parameter_ref(entry_id), + Operation::ParameterRef(entry_id), + read::Operation::ParameterRef { + offset: entry_offset, + }, + ), + ( + &|x| x.op_addr(Address::Constant(23)), + Operation::Address(Address::Constant(23)), + read::Operation::Address { address: 23 }, + ), + ( + &|x| x.op_const_type(entry_id, vec![23].into()), + Operation::ConstantType(entry_id, vec![23].into()), + read::Operation::TypedLiteral { + base_type: entry_offset, + value: read::EndianSlice::new(&[23], LittleEndian), + }, + ), + ( + &|x| x.op_convert(None), + Operation::Convert(None), + read::Operation::Convert { + base_type: read::UnitOffset(0), + }, + ), + ( + &|x| x.op_convert(Some(entry_id)), + Operation::Convert(Some(entry_id)), + read::Operation::Convert { + base_type: entry_offset, + }, + ), + ( + &|x| x.op_reinterpret(None), + Operation::Reinterpret(None), + read::Operation::Reinterpret { + base_type: read::UnitOffset(0), + }, + ), + ( + &|x| x.op_reinterpret(Some(entry_id)), + Operation::Reinterpret(Some(entry_id)), + read::Operation::Reinterpret { + base_type: entry_offset, + }, + ), + ( + &|x| x.op_wasm_local(1000), + Operation::WasmLocal(1000), + read::Operation::WasmLocal { index: 1000 }, + ), + ( + &|x| x.op_wasm_global(1000), + Operation::WasmGlobal(1000), + read::Operation::WasmGlobal { index: 1000 }, + ), + ( + &|x| x.op_wasm_stack(1000), + Operation::WasmStack(1000), + read::Operation::WasmStack { index: 1000 }, + ), + ]; + + let mut expression = Expression::new(); + let start_index = expression.next_index(); + for (f, o, _) in operations { + f(&mut expression); + assert_eq!(expression.operations.last(), Some(o)); + } + + let bra_index = expression.op_bra(); + let skip_index = expression.op_skip(); + expression.op(constants::DW_OP_nop); + let end_index = expression.next_index(); + expression.set_target(bra_index, start_index); + expression.set_target(skip_index, end_index); + + let mut w = EndianVec::new(LittleEndian); + let mut refs = Vec::new(); + expression + .write(&mut w, Some(&mut refs), encoding, Some(unit_offsets)) + .unwrap(); + for r in &refs { + assert_eq!(r.unit, unit_id); + assert_eq!(r.entry, entry_id); + w.write_offset_at( + r.offset, + debug_info_offset.0, + SectionId::DebugInfo, + r.size, + ) + .unwrap(); + } + + let read_expression = + read::Expression(read::EndianSlice::new(w.slice(), LittleEndian)); + let mut read_operations = read_expression.operations(encoding); + for (_, _, operation) in operations { + assert_eq!(read_operations.next(), Ok(Some(*operation))); + } + + // 4 = DW_OP_skip + i16 + DW_OP_nop + assert_eq!( + read_operations.next(), + Ok(Some(read::Operation::Bra { + target: -(w.len() as i16) + 4 + })) + ); + // 1 = DW_OP_nop + assert_eq!( + read_operations.next(), + Ok(Some(read::Operation::Skip { target: 1 })) + ); + assert_eq!(read_operations.next(), Ok(Some(read::Operation::Nop))); + assert_eq!(read_operations.next(), Ok(None)); + + // Fake the unit. + let unit = read::Unit { + header: read::UnitHeader::new( + encoding, + 0, + read::UnitType::Compilation, + DebugAbbrevOffset(0), + DebugInfoOffset(0).into(), + read::EndianSlice::new(&[], LittleEndian), + ), + abbreviations: Arc::new(read::Abbreviations::default()), + name: None, + comp_dir: None, + low_pc: 0, + str_offsets_base: DebugStrOffsetsBase(0), + addr_base: DebugAddrBase(0), + loclists_base: DebugLocListsBase(0), + rnglists_base: DebugRngListsBase(0), + line_program: None, + dwo_id: None, + }; + + let mut entry_ids = HashMap::new(); + entry_ids.insert(debug_info_offset.into(), (unit_id, entry_id)); + let convert_expression = Expression::from( + read_expression, + encoding, + None, /* dwarf */ + Some(&unit), + Some(&entry_ids), + &|address| Some(Address::Constant(address)), + ) + .unwrap(); + let mut convert_operations = convert_expression.operations.iter(); + for (_, operation, _) in operations { + assert_eq!(convert_operations.next(), Some(operation)); + } + assert_eq!( + convert_operations.next(), + Some(&Operation::Branch(start_index)) + ); + assert_eq!(convert_operations.next(), Some(&Operation::Skip(end_index))); + assert_eq!( + convert_operations.next(), + Some(&Operation::Simple(constants::DW_OP_nop)) + ); + } + } + } + } +} diff --git a/third_party/rust/gimli/src/write/range.rs b/third_party/rust/gimli/src/write/range.rs new file mode 100644 index 000000000000..602658b101e9 --- /dev/null +++ b/third_party/rust/gimli/src/write/range.rs @@ -0,0 +1,416 @@ +use alloc::vec::Vec; +use indexmap::IndexSet; +use std::ops::{Deref, DerefMut}; + +use crate::common::{Encoding, RangeListsOffset, SectionId}; +use crate::write::{Address, BaseId, Error, Result, Section, Sections, Writer}; + +define_section!( + DebugRanges, + RangeListsOffset, + "A writable `.debug_ranges` section." +); +define_section!( + DebugRngLists, + RangeListsOffset, + "A writable `.debug_rnglists` section." +); + +define_offsets!( + RangeListOffsets: RangeListId => RangeListsOffset, + "The section offsets of a series of range lists within the `.debug_ranges` or `.debug_rnglists` sections." +); + +define_id!( + RangeListId, + "An identifier for a range list in a `RangeListTable`." +); + +/// A table of range lists that will be stored in a `.debug_ranges` or `.debug_rnglists` section. +#[derive(Debug, Default)] +pub struct RangeListTable { + base_id: BaseId, + ranges: IndexSet, +} + +impl RangeListTable { + /// Add a range list to the table. + pub fn add(&mut self, range_list: RangeList) -> RangeListId { + let (index, _) = self.ranges.insert_full(range_list); + RangeListId::new(self.base_id, index) + } + + /// Write the range list table to the appropriate section for the given DWARF version. + pub(crate) fn write( + &self, + sections: &mut Sections, + encoding: Encoding, + ) -> Result { + if self.ranges.is_empty() { + return Ok(RangeListOffsets::none()); + } + + match encoding.version { + 2..=4 => self.write_ranges(&mut sections.debug_ranges, encoding.address_size), + 5 => self.write_rnglists(&mut sections.debug_rnglists, encoding), + _ => Err(Error::UnsupportedVersion(encoding.version)), + } + } + + /// Write the range list table to the `.debug_ranges` section. + fn write_ranges( + &self, + w: &mut DebugRanges, + address_size: u8, + ) -> Result { + let mut offsets = Vec::new(); + for range_list in self.ranges.iter() { + offsets.push(w.offset()); + for range in &range_list.0 { + // Note that we must ensure none of the ranges have both begin == 0 and end == 0. + // We do this by ensuring that begin != end, which is a bit more restrictive + // than required, but still seems reasonable. + match *range { + Range::BaseAddress { address } => { + let marker = !0 >> (64 - address_size * 8); + w.write_udata(marker, address_size)?; + w.write_address(address, address_size)?; + } + Range::OffsetPair { begin, end } => { + if begin == end { + return Err(Error::InvalidRange); + } + w.write_udata(begin, address_size)?; + w.write_udata(end, address_size)?; + } + Range::StartEnd { begin, end } => { + if begin == end { + return Err(Error::InvalidRange); + } + w.write_address(begin, address_size)?; + w.write_address(end, address_size)?; + } + Range::StartLength { begin, length } => { + let end = match begin { + Address::Constant(begin) => Address::Constant(begin + length), + Address::Symbol { symbol, addend } => Address::Symbol { + symbol, + addend: addend + length as i64, + }, + }; + if begin == end { + return Err(Error::InvalidRange); + } + w.write_address(begin, address_size)?; + w.write_address(end, address_size)?; + } + } + } + w.write_udata(0, address_size)?; + w.write_udata(0, address_size)?; + } + Ok(RangeListOffsets { + base_id: self.base_id, + offsets, + }) + } + + /// Write the range list table to the `.debug_rnglists` section. + fn write_rnglists( + &self, + w: &mut DebugRngLists, + encoding: Encoding, + ) -> Result { + let mut offsets = Vec::new(); + + if encoding.version != 5 { + return Err(Error::NeedVersion(5)); + } + + let length_offset = w.write_initial_length(encoding.format)?; + let length_base = w.len(); + + w.write_u16(encoding.version)?; + w.write_u8(encoding.address_size)?; + w.write_u8(0)?; // segment_selector_size + w.write_u32(0)?; // offset_entry_count (when set to zero DW_FORM_rnglistx can't be used, see section 7.28) + // FIXME implement DW_FORM_rnglistx writing and implement the offset entry list + + for range_list in self.ranges.iter() { + offsets.push(w.offset()); + for range in &range_list.0 { + match *range { + Range::BaseAddress { address } => { + w.write_u8(crate::constants::DW_RLE_base_address.0)?; + w.write_address(address, encoding.address_size)?; + } + Range::OffsetPair { begin, end } => { + w.write_u8(crate::constants::DW_RLE_offset_pair.0)?; + w.write_uleb128(begin)?; + w.write_uleb128(end)?; + } + Range::StartEnd { begin, end } => { + w.write_u8(crate::constants::DW_RLE_start_end.0)?; + w.write_address(begin, encoding.address_size)?; + w.write_address(end, encoding.address_size)?; + } + Range::StartLength { begin, length } => { + w.write_u8(crate::constants::DW_RLE_start_length.0)?; + w.write_address(begin, encoding.address_size)?; + w.write_uleb128(length)?; + } + } + } + + w.write_u8(crate::constants::DW_RLE_end_of_list.0)?; + } + + let length = (w.len() - length_base) as u64; + w.write_initial_length_at(length_offset, length, encoding.format)?; + + Ok(RangeListOffsets { + base_id: self.base_id, + offsets, + }) + } +} + +/// A range list that will be stored in a `.debug_ranges` or `.debug_rnglists` section. +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub struct RangeList(pub Vec); + +/// A single range. +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub enum Range { + /// DW_RLE_base_address + BaseAddress { + /// Base address. + address: Address, + }, + /// DW_RLE_offset_pair + OffsetPair { + /// Start of range relative to base address. + begin: u64, + /// End of range relative to base address. + end: u64, + }, + /// DW_RLE_start_end + StartEnd { + /// Start of range. + begin: Address, + /// End of range. + end: Address, + }, + /// DW_RLE_start_length + StartLength { + /// Start of range. + begin: Address, + /// Length of range. + length: u64, + }, +} + +#[cfg(feature = "read")] +mod convert { + use super::*; + + use crate::read::{self, Reader}; + use crate::write::{ConvertError, ConvertResult, ConvertUnitContext}; + + impl RangeList { + /// Create a range list by reading the data from the give range list iter. + pub(crate) fn from>( + mut from: read::RawRngListIter, + context: &ConvertUnitContext<'_, R>, + ) -> ConvertResult { + let mut have_base_address = context.base_address != Address::Constant(0); + let convert_address = + |x| (context.convert_address)(x).ok_or(ConvertError::InvalidAddress); + let mut ranges = Vec::new(); + while let Some(from_range) = from.next()? { + let range = match from_range { + read::RawRngListEntry::AddressOrOffsetPair { begin, end } => { + // These were parsed as addresses, even if they are offsets. + let begin = convert_address(begin)?; + let end = convert_address(end)?; + match (begin, end) { + (Address::Constant(begin_offset), Address::Constant(end_offset)) => { + if have_base_address { + Range::OffsetPair { + begin: begin_offset, + end: end_offset, + } + } else { + Range::StartEnd { begin, end } + } + } + _ => { + if have_base_address { + // At least one of begin/end is an address, but we also have + // a base address. Adding addresses is undefined. + return Err(ConvertError::InvalidRangeRelativeAddress); + } + Range::StartEnd { begin, end } + } + } + } + read::RawRngListEntry::BaseAddress { addr } => { + have_base_address = true; + let address = convert_address(addr)?; + Range::BaseAddress { address } + } + read::RawRngListEntry::BaseAddressx { addr } => { + have_base_address = true; + let address = convert_address(context.dwarf.address(context.unit, addr)?)?; + Range::BaseAddress { address } + } + read::RawRngListEntry::StartxEndx { begin, end } => { + let begin = convert_address(context.dwarf.address(context.unit, begin)?)?; + let end = convert_address(context.dwarf.address(context.unit, end)?)?; + Range::StartEnd { begin, end } + } + read::RawRngListEntry::StartxLength { begin, length } => { + let begin = convert_address(context.dwarf.address(context.unit, begin)?)?; + Range::StartLength { begin, length } + } + read::RawRngListEntry::OffsetPair { begin, end } => { + Range::OffsetPair { begin, end } + } + read::RawRngListEntry::StartEnd { begin, end } => { + let begin = convert_address(begin)?; + let end = convert_address(end)?; + Range::StartEnd { begin, end } + } + read::RawRngListEntry::StartLength { begin, length } => { + let begin = convert_address(begin)?; + Range::StartLength { begin, length } + } + }; + // Filtering empty ranges out. + match range { + Range::StartLength { length: 0, .. } => continue, + Range::StartEnd { begin, end, .. } if begin == end => continue, + Range::OffsetPair { begin, end, .. } if begin == end => continue, + _ => (), + } + ranges.push(range); + } + Ok(RangeList(ranges)) + } + } +} + +#[cfg(test)] +#[cfg(feature = "read")] +mod tests { + use super::*; + use crate::common::{ + DebugAbbrevOffset, DebugAddrBase, DebugInfoOffset, DebugLocListsBase, DebugRngListsBase, + DebugStrOffsetsBase, Format, + }; + use crate::read; + use crate::write::{ + ConvertUnitContext, EndianVec, LineStringTable, LocationListTable, Range, RangeListTable, + StringTable, + }; + use crate::LittleEndian; + use std::collections::HashMap; + use std::sync::Arc; + + #[test] + fn test_range() { + let mut line_strings = LineStringTable::default(); + let mut strings = StringTable::default(); + + for &version in &[2, 3, 4, 5] { + for &address_size in &[4, 8] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + + let mut range_list = RangeList(vec![ + Range::StartLength { + begin: Address::Constant(6666), + length: 7777, + }, + Range::StartEnd { + begin: Address::Constant(4444), + end: Address::Constant(5555), + }, + Range::BaseAddress { + address: Address::Constant(1111), + }, + Range::OffsetPair { + begin: 2222, + end: 3333, + }, + ]); + + let mut ranges = RangeListTable::default(); + let range_list_id = ranges.add(range_list.clone()); + + let mut sections = Sections::new(EndianVec::new(LittleEndian)); + let range_list_offsets = ranges.write(&mut sections, encoding).unwrap(); + + let read_debug_ranges = + read::DebugRanges::new(sections.debug_ranges.slice(), LittleEndian); + let read_debug_rnglists = + read::DebugRngLists::new(sections.debug_rnglists.slice(), LittleEndian); + let read_ranges = read::RangeLists::new(read_debug_ranges, read_debug_rnglists); + let offset = range_list_offsets.get(range_list_id); + let read_range_list = read_ranges.raw_ranges(offset, encoding).unwrap(); + + let dwarf = read::Dwarf { + ranges: read_ranges, + ..Default::default() + }; + let unit = read::Unit { + header: read::UnitHeader::new( + encoding, + 0, + read::UnitType::Compilation, + DebugAbbrevOffset(0), + DebugInfoOffset(0).into(), + read::EndianSlice::default(), + ), + abbreviations: Arc::new(read::Abbreviations::default()), + name: None, + comp_dir: None, + low_pc: 0, + str_offsets_base: DebugStrOffsetsBase(0), + addr_base: DebugAddrBase(0), + loclists_base: DebugLocListsBase(0), + rnglists_base: DebugRngListsBase(0), + line_program: None, + dwo_id: None, + }; + let context = ConvertUnitContext { + dwarf: &dwarf, + unit: &unit, + line_strings: &mut line_strings, + strings: &mut strings, + ranges: &mut ranges, + locations: &mut LocationListTable::default(), + convert_address: &|address| Some(Address::Constant(address)), + base_address: Address::Constant(0), + line_program_offset: None, + line_program_files: Vec::new(), + entry_ids: &HashMap::new(), + }; + let convert_range_list = RangeList::from(read_range_list, &context).unwrap(); + + if version <= 4 { + range_list.0[0] = Range::StartEnd { + begin: Address::Constant(6666), + end: Address::Constant(6666 + 7777), + }; + } + assert_eq!(range_list, convert_range_list); + } + } + } + } +} diff --git a/third_party/rust/gimli/src/write/relocate.rs b/third_party/rust/gimli/src/write/relocate.rs new file mode 100644 index 000000000000..ff8dde13b714 --- /dev/null +++ b/third_party/rust/gimli/src/write/relocate.rs @@ -0,0 +1,280 @@ +use crate::constants; +use crate::write::{Address, Error, Result, Writer}; +use crate::SectionId; + +/// A relocation to be applied to a section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Relocation { + /// The offset within the section where the relocation should be applied. + pub offset: usize, + /// The size of the value to be relocated. + pub size: u8, + /// The target of the relocation. + pub target: RelocationTarget, + /// The addend to be applied to the relocated value. + pub addend: i64, + /// The pointer encoding for relocations in unwind information. + pub eh_pe: Option, +} + +/// The target of a relocation. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RelocationTarget { + /// The relocation target is a symbol. + /// + /// The meaning of this value is decided by the writer, but + /// will typically be an index into a symbol table. + Symbol(usize), + /// The relocation target is a section. + Section(SectionId), +} + +/// A `Writer` which also records relocations. +pub trait RelocateWriter { + /// The type of the writer being used to write the section data. + type Writer: Writer; + + /// Get the writer being used to write the section data. + fn writer(&self) -> &Self::Writer; + + /// Get the writer being used to write the section data. + fn writer_mut(&mut self) -> &mut Self::Writer; + + /// Record a relocation. + fn relocate(&mut self, relocation: Relocation); +} + +impl Writer for T { + type Endian = <::Writer as Writer>::Endian; + + fn endian(&self) -> Self::Endian { + self.writer().endian() + } + + fn len(&self) -> usize { + self.writer().len() + } + + fn write(&mut self, bytes: &[u8]) -> Result<()> { + self.writer_mut().write(bytes) + } + + fn write_at(&mut self, offset: usize, bytes: &[u8]) -> Result<()> { + self.writer_mut().write_at(offset, bytes) + } + + fn write_address(&mut self, address: Address, size: u8) -> Result<()> { + match address { + Address::Constant(val) => self.writer_mut().write_udata(val, size), + Address::Symbol { symbol, addend } => { + self.relocate(Relocation { + offset: self.len(), + size, + target: RelocationTarget::Symbol(symbol), + addend, + eh_pe: None, + }); + self.writer_mut().write_udata(0, size) + } + } + } + + fn write_offset(&mut self, val: usize, section: SectionId, size: u8) -> Result<()> { + self.relocate(Relocation { + offset: self.len(), + size, + target: RelocationTarget::Section(section), + addend: val as i64, + eh_pe: None, + }); + self.writer_mut().write_udata(0, size) + } + + fn write_offset_at( + &mut self, + offset: usize, + val: usize, + section: SectionId, + size: u8, + ) -> Result<()> { + self.relocate(Relocation { + offset, + size, + target: RelocationTarget::Section(section), + addend: val as i64, + eh_pe: None, + }); + self.writer_mut().write_udata_at(offset, 0, size) + } + + fn write_eh_pointer( + &mut self, + address: Address, + eh_pe: constants::DwEhPe, + size: u8, + ) -> Result<()> { + match address { + Address::Constant(_) => self.writer_mut().write_eh_pointer(address, eh_pe, size), + Address::Symbol { symbol, addend } => { + let size = match eh_pe.format() { + constants::DW_EH_PE_absptr => size, + constants::DW_EH_PE_udata2 => 2, + constants::DW_EH_PE_udata4 => 4, + constants::DW_EH_PE_udata8 => 8, + constants::DW_EH_PE_sdata2 => 2, + constants::DW_EH_PE_sdata4 => 4, + constants::DW_EH_PE_sdata8 => 8, + _ => return Err(Error::UnsupportedPointerEncoding(eh_pe)), + }; + self.relocate(Relocation { + offset: self.len(), + size, + target: RelocationTarget::Symbol(symbol), + addend, + eh_pe: Some(eh_pe), + }); + self.writer_mut().write_udata(0, size) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::write::EndianVec; + use crate::{LittleEndian, SectionId}; + use alloc::vec::Vec; + + struct Section { + writer: EndianVec, + relocations: Vec, + } + + impl RelocateWriter for Section { + type Writer = EndianVec; + + fn writer(&self) -> &Self::Writer { + &self.writer + } + + fn writer_mut(&mut self) -> &mut Self::Writer { + &mut self.writer + } + + fn relocate(&mut self, relocation: Relocation) { + self.relocations.push(relocation); + } + } + + #[test] + fn test_relocate_writer() { + let mut expected_data = Vec::new(); + let mut expected_relocations = Vec::new(); + + let mut section = Section { + writer: EndianVec::new(LittleEndian), + relocations: Vec::new(), + }; + + // No relocation for plain data. + section.write_udata(0x12345678, 4).unwrap(); + expected_data.extend_from_slice(&0x12345678u32.to_le_bytes()); + + // No relocation for a constant address. + section + .write_address(Address::Constant(0x87654321), 4) + .unwrap(); + expected_data.extend_from_slice(&0x87654321u32.to_le_bytes()); + + // Relocation for a symbol address. + let offset = section.len(); + section + .write_address( + Address::Symbol { + symbol: 1, + addend: 0x12345678, + }, + 4, + ) + .unwrap(); + expected_data.extend_from_slice(&[0; 4]); + expected_relocations.push(Relocation { + offset, + size: 4, + target: RelocationTarget::Symbol(1), + addend: 0x12345678, + eh_pe: None, + }); + + // Relocation for a section offset. + let offset = section.len(); + section + .write_offset(0x12345678, SectionId::DebugAbbrev, 4) + .unwrap(); + expected_data.extend_from_slice(&[0; 4]); + expected_relocations.push(Relocation { + offset, + size: 4, + target: RelocationTarget::Section(SectionId::DebugAbbrev), + addend: 0x12345678, + eh_pe: None, + }); + + // Relocation for a section offset at a specific offset. + let offset = section.len(); + section.write_udata(0x12345678, 4).unwrap(); + section + .write_offset_at(offset, 0x12345678, SectionId::DebugStr, 4) + .unwrap(); + expected_data.extend_from_slice(&[0; 4]); + expected_relocations.push(Relocation { + offset, + size: 4, + target: RelocationTarget::Section(SectionId::DebugStr), + addend: 0x12345678, + eh_pe: None, + }); + + // No relocation for a constant in unwind information. + section + .write_eh_pointer(Address::Constant(0x87654321), constants::DW_EH_PE_absptr, 8) + .unwrap(); + expected_data.extend_from_slice(&0x87654321u64.to_le_bytes()); + + // No relocation for a relative constant in unwind information. + let offset = section.len(); + section + .write_eh_pointer( + Address::Constant(offset as u64 - 8), + constants::DW_EH_PE_pcrel | constants::DW_EH_PE_sdata4, + 8, + ) + .unwrap(); + expected_data.extend_from_slice(&(-8i32).to_le_bytes()); + + // Relocation for a symbol in unwind information. + let offset = section.len(); + section + .write_eh_pointer( + Address::Symbol { + symbol: 2, + addend: 0x12345678, + }, + constants::DW_EH_PE_pcrel | constants::DW_EH_PE_sdata4, + 8, + ) + .unwrap(); + expected_data.extend_from_slice(&[0; 4]); + expected_relocations.push(Relocation { + offset, + size: 4, + target: RelocationTarget::Symbol(2), + addend: 0x12345678, + eh_pe: Some(constants::DW_EH_PE_pcrel | constants::DW_EH_PE_sdata4), + }); + + assert_eq!(section.writer.into_vec(), expected_data); + assert_eq!(section.relocations, expected_relocations); + } +} diff --git a/third_party/rust/gimli/src/write/section.rs b/third_party/rust/gimli/src/write/section.rs new file mode 100644 index 000000000000..22a44345486a --- /dev/null +++ b/third_party/rust/gimli/src/write/section.rs @@ -0,0 +1,208 @@ +use std::ops::DerefMut; +use std::result; +use std::vec::Vec; + +use crate::common::SectionId; +use crate::write::{ + DebugAbbrev, DebugFrame, DebugInfo, DebugInfoReference, DebugLine, DebugLineStr, DebugLoc, + DebugLocLists, DebugRanges, DebugRngLists, DebugStr, EhFrame, Writer, +}; + +macro_rules! define_section { + ($name:ident, $offset:ident, $docs:expr) => { + #[doc=$docs] + #[derive(Debug, Default)] + pub struct $name(pub W); + + impl $name { + /// Return the offset of the next write. + pub fn offset(&self) -> $offset { + $offset(self.len()) + } + } + + impl From for $name { + #[inline] + fn from(w: W) -> Self { + $name(w) + } + } + + impl Deref for $name { + type Target = W; + + #[inline] + fn deref(&self) -> &W { + &self.0 + } + } + + impl DerefMut for $name { + #[inline] + fn deref_mut(&mut self) -> &mut W { + &mut self.0 + } + } + + impl Section for $name { + #[inline] + fn id(&self) -> SectionId { + SectionId::$name + } + } + }; +} + +/// Functionality common to all writable DWARF sections. +pub trait Section: DerefMut { + /// Returns the DWARF section kind for this type. + fn id(&self) -> SectionId; + + /// Returns the ELF section name for this type. + fn name(&self) -> &'static str { + self.id().name() + } +} + +/// All of the writable DWARF sections. +#[derive(Debug, Default)] +pub struct Sections { + /// The `.debug_abbrev` section. + pub debug_abbrev: DebugAbbrev, + /// The `.debug_info` section. + pub debug_info: DebugInfo, + /// The `.debug_line` section. + pub debug_line: DebugLine, + /// The `.debug_line_str` section. + pub debug_line_str: DebugLineStr, + /// The `.debug_ranges` section. + pub debug_ranges: DebugRanges, + /// The `.debug_rnglists` section. + pub debug_rnglists: DebugRngLists, + /// The `.debug_loc` section. + pub debug_loc: DebugLoc, + /// The `.debug_loclists` section. + pub debug_loclists: DebugLocLists, + /// The `.debug_str` section. + pub debug_str: DebugStr, + /// The `.debug_frame` section. + pub debug_frame: DebugFrame, + /// The `.eh_frame` section. + pub eh_frame: EhFrame, + /// Unresolved references in the `.debug_info` section. + pub(crate) debug_info_refs: Vec, + /// Unresolved references in the `.debug_loc` section. + pub(crate) debug_loc_refs: Vec, + /// Unresolved references in the `.debug_loclists` section. + pub(crate) debug_loclists_refs: Vec, +} + +impl Sections { + /// Create a new `Sections` using clones of the given `section`. + pub fn new(section: W) -> Self { + Sections { + debug_abbrev: DebugAbbrev(section.clone()), + debug_info: DebugInfo(section.clone()), + debug_line: DebugLine(section.clone()), + debug_line_str: DebugLineStr(section.clone()), + debug_ranges: DebugRanges(section.clone()), + debug_rnglists: DebugRngLists(section.clone()), + debug_loc: DebugLoc(section.clone()), + debug_loclists: DebugLocLists(section.clone()), + debug_str: DebugStr(section.clone()), + debug_frame: DebugFrame(section.clone()), + eh_frame: EhFrame(section), + debug_info_refs: Vec::new(), + debug_loc_refs: Vec::new(), + debug_loclists_refs: Vec::new(), + } + } +} + +impl Sections { + /// Get the section with the given `id`. + pub fn get(&self, id: SectionId) -> Option<&W> { + match id { + SectionId::DebugAbbrev => Some(&self.debug_abbrev.0), + SectionId::DebugInfo => Some(&self.debug_info.0), + SectionId::DebugLine => Some(&self.debug_line.0), + SectionId::DebugLineStr => Some(&self.debug_line_str.0), + SectionId::DebugRanges => Some(&self.debug_ranges.0), + SectionId::DebugRngLists => Some(&self.debug_rnglists.0), + SectionId::DebugLoc => Some(&self.debug_loc.0), + SectionId::DebugLocLists => Some(&self.debug_loclists.0), + SectionId::DebugStr => Some(&self.debug_str.0), + SectionId::DebugFrame => Some(&self.debug_frame.0), + SectionId::EhFrame => Some(&self.eh_frame.0), + _ => None, + } + } + + /// Get the section with the given `id`. + pub fn get_mut(&mut self, id: SectionId) -> Option<&mut W> { + match id { + SectionId::DebugAbbrev => Some(&mut self.debug_abbrev.0), + SectionId::DebugInfo => Some(&mut self.debug_info.0), + SectionId::DebugLine => Some(&mut self.debug_line.0), + SectionId::DebugLineStr => Some(&mut self.debug_line_str.0), + SectionId::DebugRanges => Some(&mut self.debug_ranges.0), + SectionId::DebugRngLists => Some(&mut self.debug_rnglists.0), + SectionId::DebugLoc => Some(&mut self.debug_loc.0), + SectionId::DebugLocLists => Some(&mut self.debug_loclists.0), + SectionId::DebugStr => Some(&mut self.debug_str.0), + SectionId::DebugFrame => Some(&mut self.debug_frame.0), + SectionId::EhFrame => Some(&mut self.eh_frame.0), + _ => None, + } + } + + /// For each section, call `f` once with a shared reference. + pub fn for_each<'a, F, E>(&'a self, mut f: F) -> result::Result<(), E> + where + F: FnMut(SectionId, &'a W) -> result::Result<(), E>, + { + macro_rules! f { + ($s:expr) => { + f($s.id(), &$s) + }; + } + // Ordered so that earlier sections do not reference later sections. + f!(self.debug_abbrev)?; + f!(self.debug_str)?; + f!(self.debug_line_str)?; + f!(self.debug_line)?; + f!(self.debug_ranges)?; + f!(self.debug_rnglists)?; + f!(self.debug_loc)?; + f!(self.debug_loclists)?; + f!(self.debug_info)?; + f!(self.debug_frame)?; + f!(self.eh_frame)?; + Ok(()) + } + + /// For each section, call `f` once with a mutable reference. + pub fn for_each_mut<'a, F, E>(&'a mut self, mut f: F) -> result::Result<(), E> + where + F: FnMut(SectionId, &'a mut W) -> result::Result<(), E>, + { + macro_rules! f { + ($s:expr) => { + f($s.id(), &mut $s) + }; + } + // Ordered so that earlier sections do not reference later sections. + f!(self.debug_abbrev)?; + f!(self.debug_str)?; + f!(self.debug_line_str)?; + f!(self.debug_line)?; + f!(self.debug_ranges)?; + f!(self.debug_rnglists)?; + f!(self.debug_loc)?; + f!(self.debug_loclists)?; + f!(self.debug_info)?; + f!(self.debug_frame)?; + f!(self.eh_frame)?; + Ok(()) + } +} diff --git a/third_party/rust/gimli/src/write/str.rs b/third_party/rust/gimli/src/write/str.rs new file mode 100644 index 000000000000..83285c035f45 --- /dev/null +++ b/third_party/rust/gimli/src/write/str.rs @@ -0,0 +1,172 @@ +use alloc::vec::Vec; +use indexmap::IndexSet; +use std::ops::{Deref, DerefMut}; + +use crate::common::{DebugLineStrOffset, DebugStrOffset, SectionId}; +use crate::write::{BaseId, Result, Section, Writer}; + +// Requirements: +// - values are `[u8]`, null bytes are not allowed +// - insertion returns a fixed id +// - inserting a duplicate returns the id of the existing value +// - able to convert an id to a section offset +// Optional? +// - able to get an existing value given an id +// +// Limitations of current implementation (using IndexSet): +// - inserting requires either an allocation for duplicates, +// or a double lookup for non-duplicates +// - doesn't preserve offsets when updating an existing `.debug_str` section +// +// Possible changes: +// - calculate offsets as we add values, and use that as the id. +// This would avoid the need for DebugStrOffsets but would make it +// hard to implement `get`. +macro_rules! define_string_table { + ($name:ident, $id:ident, $section:ident, $offsets:ident, $docs:expr) => { + #[doc=$docs] + #[derive(Debug, Default)] + pub struct $name { + base_id: BaseId, + strings: IndexSet>, + } + + impl $name { + /// Add a string to the string table and return its id. + /// + /// If the string already exists, then return the id of the existing string. + /// + /// # Panics + /// + /// Panics if `bytes` contains a null byte. + pub fn add(&mut self, bytes: T) -> $id + where + T: Into>, + { + let bytes = bytes.into(); + assert!(!bytes.contains(&0)); + let (index, _) = self.strings.insert_full(bytes); + $id::new(self.base_id, index) + } + + /// Return the number of strings in the table. + #[inline] + pub fn count(&self) -> usize { + self.strings.len() + } + + /// Get a reference to a string in the table. + /// + /// # Panics + /// + /// Panics if `id` is invalid. + pub fn get(&self, id: $id) -> &[u8] { + debug_assert_eq!(self.base_id, id.base_id); + self.strings.get_index(id.index).map(Vec::as_slice).unwrap() + } + + /// Write the string table to the `.debug_str` section. + /// + /// Returns the offsets at which the strings are written. + pub fn write(&self, w: &mut $section) -> Result<$offsets> { + let mut offsets = Vec::new(); + for bytes in self.strings.iter() { + offsets.push(w.offset()); + w.write(bytes)?; + w.write_u8(0)?; + } + + Ok($offsets { + base_id: self.base_id, + offsets, + }) + } + } + }; +} + +define_id!(StringId, "An identifier for a string in a `StringTable`."); + +define_string_table!( + StringTable, + StringId, + DebugStr, + DebugStrOffsets, + "A table of strings that will be stored in a `.debug_str` section." +); + +define_section!(DebugStr, DebugStrOffset, "A writable `.debug_str` section."); + +define_offsets!( + DebugStrOffsets: StringId => DebugStrOffset, + "The section offsets of all strings within a `.debug_str` section." +); + +define_id!( + LineStringId, + "An identifier for a string in a `LineStringTable`." +); + +define_string_table!( + LineStringTable, + LineStringId, + DebugLineStr, + DebugLineStrOffsets, + "A table of strings that will be stored in a `.debug_line_str` section." +); + +define_section!( + DebugLineStr, + DebugLineStrOffset, + "A writable `.debug_line_str` section." +); + +define_offsets!( + DebugLineStrOffsets: LineStringId => DebugLineStrOffset, + "The section offsets of all strings within a `.debug_line_str` section." +); + +#[cfg(test)] +#[cfg(feature = "read")] +mod tests { + use super::*; + use crate::read; + use crate::write::EndianVec; + use crate::LittleEndian; + + #[test] + fn test_string_table() { + let mut strings = StringTable::default(); + assert_eq!(strings.count(), 0); + let id1 = strings.add(&b"one"[..]); + let id2 = strings.add(&b"two"[..]); + assert_eq!(strings.add(&b"one"[..]), id1); + assert_eq!(strings.add(&b"two"[..]), id2); + assert_eq!(strings.get(id1), &b"one"[..]); + assert_eq!(strings.get(id2), &b"two"[..]); + assert_eq!(strings.count(), 2); + + let mut debug_str = DebugStr::from(EndianVec::new(LittleEndian)); + let offsets = strings.write(&mut debug_str).unwrap(); + assert_eq!(debug_str.slice(), b"one\0two\0"); + assert_eq!(offsets.get(id1), DebugStrOffset(0)); + assert_eq!(offsets.get(id2), DebugStrOffset(4)); + assert_eq!(offsets.count(), 2); + } + + #[test] + fn test_string_table_read() { + let mut strings = StringTable::default(); + let id1 = strings.add(&b"one"[..]); + let id2 = strings.add(&b"two"[..]); + + let mut debug_str = DebugStr::from(EndianVec::new(LittleEndian)); + let offsets = strings.write(&mut debug_str).unwrap(); + + let read_debug_str = read::DebugStr::new(debug_str.slice(), LittleEndian); + let str1 = read_debug_str.get_str(offsets.get(id1)).unwrap(); + let str2 = read_debug_str.get_str(offsets.get(id2)).unwrap(); + assert_eq!(str1.slice(), &b"one"[..]); + assert_eq!(str2.slice(), &b"two"[..]); + } +} diff --git a/third_party/rust/gimli/src/write/unit.rs b/third_party/rust/gimli/src/write/unit.rs new file mode 100644 index 000000000000..fe63ec9144e1 --- /dev/null +++ b/third_party/rust/gimli/src/write/unit.rs @@ -0,0 +1,3152 @@ +use alloc::vec::Vec; +use std::ops::{Deref, DerefMut}; +use std::{slice, usize}; + +use crate::common::{ + DebugAbbrevOffset, DebugInfoOffset, DebugLineOffset, DebugMacinfoOffset, DebugMacroOffset, + DebugStrOffset, DebugTypeSignature, Encoding, Format, SectionId, +}; +use crate::constants; +use crate::leb128::write::{sleb128_size, uleb128_size}; +use crate::write::{ + Abbreviation, AbbreviationTable, Address, AttributeSpecification, BaseId, DebugLineStrOffsets, + DebugStrOffsets, Error, Expression, FileId, LineProgram, LineStringId, LocationListId, + LocationListOffsets, LocationListTable, RangeListId, RangeListOffsets, RangeListTable, + Reference, Result, Section, Sections, StringId, Writer, +}; + +define_id!(UnitId, "An identifier for a unit in a `UnitTable`."); + +define_id!(UnitEntryId, "An identifier for an entry in a `Unit`."); + +/// A table of units that will be stored in the `.debug_info` section. +#[derive(Debug, Default)] +pub struct UnitTable { + base_id: BaseId, + units: Vec, +} + +impl UnitTable { + /// Create a new unit and add it to the table. + /// + /// `address_size` must be in bytes. + /// + /// Returns the `UnitId` of the new unit. + #[inline] + pub fn add(&mut self, unit: Unit) -> UnitId { + let id = UnitId::new(self.base_id, self.units.len()); + self.units.push(unit); + id + } + + /// Return the number of units. + #[inline] + pub fn count(&self) -> usize { + self.units.len() + } + + /// Return the id of a unit. + /// + /// # Panics + /// + /// Panics if `index >= self.count()`. + #[inline] + pub fn id(&self, index: usize) -> UnitId { + assert!(index < self.count()); + UnitId::new(self.base_id, index) + } + + /// Get a reference to a unit. + /// + /// # Panics + /// + /// Panics if `id` is invalid. + #[inline] + pub fn get(&self, id: UnitId) -> &Unit { + debug_assert_eq!(self.base_id, id.base_id); + &self.units[id.index] + } + + /// Get a mutable reference to a unit. + /// + /// # Panics + /// + /// Panics if `id` is invalid. + #[inline] + pub fn get_mut(&mut self, id: UnitId) -> &mut Unit { + debug_assert_eq!(self.base_id, id.base_id); + &mut self.units[id.index] + } + + /// Write the units to the given sections. + /// + /// `strings` must contain the `.debug_str` offsets of the corresponding + /// `StringTable`. + pub fn write( + &mut self, + sections: &mut Sections, + line_strings: &DebugLineStrOffsets, + strings: &DebugStrOffsets, + ) -> Result { + let mut offsets = DebugInfoOffsets { + base_id: self.base_id, + units: Vec::new(), + }; + for unit in &mut self.units { + // TODO: maybe share abbreviation tables + let abbrev_offset = sections.debug_abbrev.offset(); + let mut abbrevs = AbbreviationTable::default(); + + offsets.units.push(unit.write( + sections, + abbrev_offset, + &mut abbrevs, + line_strings, + strings, + )?); + + abbrevs.write(&mut sections.debug_abbrev)?; + } + + write_section_refs( + &mut sections.debug_info_refs, + &mut sections.debug_info.0, + &offsets, + )?; + write_section_refs( + &mut sections.debug_loc_refs, + &mut sections.debug_loc.0, + &offsets, + )?; + write_section_refs( + &mut sections.debug_loclists_refs, + &mut sections.debug_loclists.0, + &offsets, + )?; + + Ok(offsets) + } +} + +fn write_section_refs( + references: &mut Vec, + w: &mut W, + offsets: &DebugInfoOffsets, +) -> Result<()> { + for r in references.drain(..) { + let entry_offset = offsets.entry(r.unit, r.entry).0; + debug_assert_ne!(entry_offset, 0); + w.write_offset_at(r.offset, entry_offset, SectionId::DebugInfo, r.size)?; + } + Ok(()) +} + +/// A unit's debugging information. +#[derive(Debug)] +pub struct Unit { + base_id: BaseId, + /// The encoding parameters for this unit. + encoding: Encoding, + /// The line number program for this unit. + pub line_program: LineProgram, + /// A table of range lists used by this unit. + pub ranges: RangeListTable, + /// A table of location lists used by this unit. + pub locations: LocationListTable, + /// All entries in this unit. The order is unrelated to the tree order. + // Requirements: + // - entries form a tree + // - entries can be added in any order + // - entries have a fixed id + // - able to quickly lookup an entry from its id + // Limitations of current implementation: + // - mutable iteration of children is messy due to borrow checker + entries: Vec, + /// The index of the root entry in entries. + root: UnitEntryId, +} + +impl Unit { + /// Create a new `Unit`. + pub fn new(encoding: Encoding, line_program: LineProgram) -> Self { + let base_id = BaseId::default(); + let ranges = RangeListTable::default(); + let locations = LocationListTable::default(); + let mut entries = Vec::new(); + let root = DebuggingInformationEntry::new( + base_id, + &mut entries, + None, + constants::DW_TAG_compile_unit, + ); + Unit { + base_id, + encoding, + line_program, + ranges, + locations, + entries, + root, + } + } + + /// Return the encoding parameters for this unit. + #[inline] + pub fn encoding(&self) -> Encoding { + self.encoding + } + + /// Return the DWARF version for this unit. + #[inline] + pub fn version(&self) -> u16 { + self.encoding.version + } + + /// Return the address size in bytes for this unit. + #[inline] + pub fn address_size(&self) -> u8 { + self.encoding.address_size + } + + /// Return the DWARF format for this unit. + #[inline] + pub fn format(&self) -> Format { + self.encoding.format + } + + /// Return the number of `DebuggingInformationEntry`s created for this unit. + /// + /// This includes entries that no longer have a parent. + #[inline] + pub fn count(&self) -> usize { + self.entries.len() + } + + /// Return the id of the root entry. + #[inline] + pub fn root(&self) -> UnitEntryId { + self.root + } + + /// Add a new `DebuggingInformationEntry` to this unit and return its id. + /// + /// The `parent` must be within the same unit. + /// + /// # Panics + /// + /// Panics if `parent` is invalid. + #[inline] + pub fn add(&mut self, parent: UnitEntryId, tag: constants::DwTag) -> UnitEntryId { + debug_assert_eq!(self.base_id, parent.base_id); + DebuggingInformationEntry::new(self.base_id, &mut self.entries, Some(parent), tag) + } + + /// Get a reference to an entry. + /// + /// # Panics + /// + /// Panics if `id` is invalid. + #[inline] + pub fn get(&self, id: UnitEntryId) -> &DebuggingInformationEntry { + debug_assert_eq!(self.base_id, id.base_id); + &self.entries[id.index] + } + + /// Get a mutable reference to an entry. + /// + /// # Panics + /// + /// Panics if `id` is invalid. + #[inline] + pub fn get_mut(&mut self, id: UnitEntryId) -> &mut DebuggingInformationEntry { + debug_assert_eq!(self.base_id, id.base_id); + &mut self.entries[id.index] + } + + /// Return true if `self.line_program` is used by a DIE. + fn line_program_in_use(&self) -> bool { + if self.line_program.is_none() { + return false; + } + if !self.line_program.is_empty() { + return true; + } + + for entry in &self.entries { + for attr in &entry.attrs { + if let AttributeValue::FileIndex(Some(_)) = attr.value { + return true; + } + } + } + + false + } + + /// Write the unit to the given sections. + pub(crate) fn write( + &mut self, + sections: &mut Sections, + abbrev_offset: DebugAbbrevOffset, + abbrevs: &mut AbbreviationTable, + line_strings: &DebugLineStrOffsets, + strings: &DebugStrOffsets, + ) -> Result { + let line_program = if self.line_program_in_use() { + self.entries[self.root.index] + .set(constants::DW_AT_stmt_list, AttributeValue::LineProgramRef); + Some(self.line_program.write( + &mut sections.debug_line, + self.encoding, + line_strings, + strings, + )?) + } else { + self.entries[self.root.index].delete(constants::DW_AT_stmt_list); + None + }; + + // TODO: use .debug_types for type units in DWARF v4. + let w = &mut sections.debug_info; + + let mut offsets = UnitOffsets { + base_id: self.base_id, + unit: w.offset(), + // Entries can be written in any order, so create the complete vec now. + entries: vec![EntryOffset::none(); self.entries.len()], + }; + + let length_offset = w.write_initial_length(self.format())?; + let length_base = w.len(); + + w.write_u16(self.version())?; + if 2 <= self.version() && self.version() <= 4 { + w.write_offset( + abbrev_offset.0, + SectionId::DebugAbbrev, + self.format().word_size(), + )?; + w.write_u8(self.address_size())?; + } else if self.version() == 5 { + w.write_u8(constants::DW_UT_compile.0)?; + w.write_u8(self.address_size())?; + w.write_offset( + abbrev_offset.0, + SectionId::DebugAbbrev, + self.format().word_size(), + )?; + } else { + return Err(Error::UnsupportedVersion(self.version())); + } + + // Calculate all DIE offsets, so that we are able to output references to them. + // However, references to base types in expressions use ULEB128, so base types + // must be moved to the front before we can calculate offsets. + self.reorder_base_types(); + let mut offset = w.len(); + self.entries[self.root.index].calculate_offsets( + self, + &mut offset, + &mut offsets, + abbrevs, + )?; + + let range_lists = self.ranges.write(sections, self.encoding)?; + // Location lists can't be written until we have DIE offsets. + let loc_lists = self + .locations + .write(sections, self.encoding, Some(&offsets))?; + + let w = &mut sections.debug_info; + let mut unit_refs = Vec::new(); + self.entries[self.root.index].write( + w, + &mut sections.debug_info_refs, + &mut unit_refs, + self, + &mut offsets, + line_program, + line_strings, + strings, + &range_lists, + &loc_lists, + )?; + + let length = (w.len() - length_base) as u64; + w.write_initial_length_at(length_offset, length, self.format())?; + + for (offset, entry) in unit_refs { + // This does not need relocation. + w.write_udata_at( + offset.0, + offsets.unit_offset(entry), + self.format().word_size(), + )?; + } + + Ok(offsets) + } + + /// Reorder base types to come first so that typed stack operations + /// can get their offset. + fn reorder_base_types(&mut self) { + let root = &self.entries[self.root.index]; + let mut root_children = Vec::with_capacity(root.children.len()); + for entry in &root.children { + if self.entries[entry.index].tag == constants::DW_TAG_base_type { + root_children.push(*entry); + } + } + for entry in &root.children { + if self.entries[entry.index].tag != constants::DW_TAG_base_type { + root_children.push(*entry); + } + } + self.entries[self.root.index].children = root_children; + } +} + +/// A Debugging Information Entry (DIE). +/// +/// DIEs have a set of attributes and optionally have children DIEs as well. +/// +/// DIEs form a tree without any cycles. This is enforced by specifying the +/// parent when creating a DIE, and disallowing changes of parent. +#[derive(Debug)] +pub struct DebuggingInformationEntry { + id: UnitEntryId, + parent: Option, + tag: constants::DwTag, + /// Whether to emit `DW_AT_sibling`. + sibling: bool, + attrs: Vec, + children: Vec, +} + +impl DebuggingInformationEntry { + /// Create a new `DebuggingInformationEntry`. + /// + /// # Panics + /// + /// Panics if `parent` is invalid. + #[allow(clippy::new_ret_no_self)] + fn new( + base_id: BaseId, + entries: &mut Vec, + parent: Option, + tag: constants::DwTag, + ) -> UnitEntryId { + let id = UnitEntryId::new(base_id, entries.len()); + entries.push(DebuggingInformationEntry { + id, + parent, + tag, + sibling: false, + attrs: Vec::new(), + children: Vec::new(), + }); + if let Some(parent) = parent { + debug_assert_eq!(base_id, parent.base_id); + assert_ne!(parent, id); + entries[parent.index].children.push(id); + } + id + } + + /// Return the id of this entry. + #[inline] + pub fn id(&self) -> UnitEntryId { + self.id + } + + /// Return the parent of this entry. + #[inline] + pub fn parent(&self) -> Option { + self.parent + } + + /// Return the tag of this entry. + #[inline] + pub fn tag(&self) -> constants::DwTag { + self.tag + } + + /// Return `true` if a `DW_AT_sibling` attribute will be emitted. + #[inline] + pub fn sibling(&self) -> bool { + self.sibling + } + + /// Set whether a `DW_AT_sibling` attribute will be emitted. + /// + /// The attribute will only be emitted if the DIE has children. + #[inline] + pub fn set_sibling(&mut self, sibling: bool) { + self.sibling = sibling; + } + + /// Iterate over the attributes of this entry. + #[inline] + pub fn attrs(&self) -> slice::Iter<'_, Attribute> { + self.attrs.iter() + } + + /// Iterate over the attributes of this entry for modification. + #[inline] + pub fn attrs_mut(&mut self) -> slice::IterMut<'_, Attribute> { + self.attrs.iter_mut() + } + + /// Get an attribute. + pub fn get(&self, name: constants::DwAt) -> Option<&AttributeValue> { + self.attrs + .iter() + .find(|attr| attr.name == name) + .map(|attr| &attr.value) + } + + /// Get an attribute for modification. + pub fn get_mut(&mut self, name: constants::DwAt) -> Option<&mut AttributeValue> { + self.attrs + .iter_mut() + .find(|attr| attr.name == name) + .map(|attr| &mut attr.value) + } + + /// Set an attribute. + /// + /// Replaces any existing attribute with the same name. + /// + /// # Panics + /// + /// Panics if `name` is `DW_AT_sibling`. Use `set_sibling` instead. + pub fn set(&mut self, name: constants::DwAt, value: AttributeValue) { + assert_ne!(name, constants::DW_AT_sibling); + if let Some(attr) = self.attrs.iter_mut().find(|attr| attr.name == name) { + attr.value = value; + return; + } + self.attrs.push(Attribute { name, value }); + } + + /// Delete an attribute. + /// + /// Replaces any existing attribute with the same name. + pub fn delete(&mut self, name: constants::DwAt) { + self.attrs.retain(|x| x.name != name); + } + + /// Iterate over the children of this entry. + /// + /// Note: use `Unit::add` to add a new child to this entry. + #[inline] + pub fn children(&self) -> slice::Iter<'_, UnitEntryId> { + self.children.iter() + } + + /// Delete a child entry and all of its children. + pub fn delete_child(&mut self, id: UnitEntryId) { + self.children.retain(|&child| child != id); + } + + /// Return the type abbreviation for this DIE. + fn abbreviation(&self, encoding: Encoding) -> Result { + let mut attrs = Vec::new(); + + if self.sibling && !self.children.is_empty() { + let form = match encoding.format { + Format::Dwarf32 => constants::DW_FORM_ref4, + Format::Dwarf64 => constants::DW_FORM_ref8, + }; + attrs.push(AttributeSpecification::new(constants::DW_AT_sibling, form)); + } + + for attr in &self.attrs { + attrs.push(attr.specification(encoding)?); + } + + Ok(Abbreviation::new( + self.tag, + !self.children.is_empty(), + attrs, + )) + } + + fn calculate_offsets( + &self, + unit: &Unit, + offset: &mut usize, + offsets: &mut UnitOffsets, + abbrevs: &mut AbbreviationTable, + ) -> Result<()> { + offsets.entries[self.id.index].offset = DebugInfoOffset(*offset); + offsets.entries[self.id.index].abbrev = abbrevs.add(self.abbreviation(unit.encoding())?); + *offset += self.size(unit, offsets); + if !self.children.is_empty() { + for child in &self.children { + unit.entries[child.index].calculate_offsets(unit, offset, offsets, abbrevs)?; + } + // Null child + *offset += 1; + } + Ok(()) + } + + fn size(&self, unit: &Unit, offsets: &UnitOffsets) -> usize { + let mut size = uleb128_size(offsets.abbrev(self.id)); + if self.sibling && !self.children.is_empty() { + size += unit.format().word_size() as usize; + } + for attr in &self.attrs { + size += attr.value.size(unit, offsets); + } + size + } + + /// Write the entry to the given sections. + fn write( + &self, + w: &mut DebugInfo, + debug_info_refs: &mut Vec, + unit_refs: &mut Vec<(DebugInfoOffset, UnitEntryId)>, + unit: &Unit, + offsets: &mut UnitOffsets, + line_program: Option, + line_strings: &DebugLineStrOffsets, + strings: &DebugStrOffsets, + range_lists: &RangeListOffsets, + loc_lists: &LocationListOffsets, + ) -> Result<()> { + debug_assert_eq!(offsets.debug_info_offset(self.id), w.offset()); + w.write_uleb128(offsets.abbrev(self.id))?; + + let sibling_offset = if self.sibling && !self.children.is_empty() { + let offset = w.offset(); + w.write_udata(0, unit.format().word_size())?; + Some(offset) + } else { + None + }; + + for attr in &self.attrs { + attr.value.write( + w, + debug_info_refs, + unit_refs, + unit, + offsets, + line_program, + line_strings, + strings, + range_lists, + loc_lists, + )?; + } + + if !self.children.is_empty() { + for child in &self.children { + unit.entries[child.index].write( + w, + debug_info_refs, + unit_refs, + unit, + offsets, + line_program, + line_strings, + strings, + range_lists, + loc_lists, + )?; + } + // Null child + w.write_u8(0)?; + } + + if let Some(offset) = sibling_offset { + let next_offset = (w.offset().0 - offsets.unit.0) as u64; + // This does not need relocation. + w.write_udata_at(offset.0, next_offset, unit.format().word_size())?; + } + Ok(()) + } +} + +/// An attribute in a `DebuggingInformationEntry`, consisting of a name and +/// associated value. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Attribute { + name: constants::DwAt, + value: AttributeValue, +} + +impl Attribute { + /// Get the name of this attribute. + #[inline] + pub fn name(&self) -> constants::DwAt { + self.name + } + + /// Get the value of this attribute. + #[inline] + pub fn get(&self) -> &AttributeValue { + &self.value + } + + /// Set the value of this attribute. + #[inline] + pub fn set(&mut self, value: AttributeValue) { + self.value = value; + } + + /// Return the type specification for this attribute. + fn specification(&self, encoding: Encoding) -> Result { + Ok(AttributeSpecification::new( + self.name, + self.value.form(encoding)?, + )) + } +} + +/// The value of an attribute in a `DebuggingInformationEntry`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum AttributeValue { + /// "Refers to some location in the address space of the described program." + Address(Address), + + /// A slice of an arbitrary number of bytes. + Block(Vec), + + /// A one byte constant data value. How to interpret the byte depends on context. + /// + /// From section 7 of the standard: "Depending on context, it may be a + /// signed integer, an unsigned integer, a floating-point constant, or + /// anything else." + Data1(u8), + + /// A two byte constant data value. How to interpret the bytes depends on context. + /// + /// This value will be converted to the target endian before writing. + /// + /// From section 7 of the standard: "Depending on context, it may be a + /// signed integer, an unsigned integer, a floating-point constant, or + /// anything else." + Data2(u16), + + /// A four byte constant data value. How to interpret the bytes depends on context. + /// + /// This value will be converted to the target endian before writing. + /// + /// From section 7 of the standard: "Depending on context, it may be a + /// signed integer, an unsigned integer, a floating-point constant, or + /// anything else." + Data4(u32), + + /// An eight byte constant data value. How to interpret the bytes depends on context. + /// + /// This value will be converted to the target endian before writing. + /// + /// From section 7 of the standard: "Depending on context, it may be a + /// signed integer, an unsigned integer, a floating-point constant, or + /// anything else." + Data8(u64), + + /// A signed integer constant. + Sdata(i64), + + /// An unsigned integer constant. + Udata(u64), + + /// "The information bytes contain a DWARF expression (see Section 2.5) or + /// location description (see Section 2.6)." + Exprloc(Expression), + + /// A boolean that indicates presence or absence of the attribute. + Flag(bool), + + /// An attribute that is always present. + FlagPresent, + + /// A reference to a `DebuggingInformationEntry` in this unit. + UnitRef(UnitEntryId), + + /// A reference to a `DebuggingInformationEntry` in a potentially different unit. + DebugInfoRef(Reference), + + /// An offset into the `.debug_info` section of the supplementary object file. + /// + /// The API does not currently assist with generating this offset. + /// This variant will be removed from the API once support for writing + /// supplementary object files is implemented. + DebugInfoRefSup(DebugInfoOffset), + + /// A reference to a line number program. + LineProgramRef, + + /// A reference to a location list. + LocationListRef(LocationListId), + + /// An offset into the `.debug_macinfo` section. + /// + /// The API does not currently assist with generating this offset. + /// This variant will be removed from the API once support for writing + /// `.debug_macinfo` sections is implemented. + DebugMacinfoRef(DebugMacinfoOffset), + + /// An offset into the `.debug_macro` section. + /// + /// The API does not currently assist with generating this offset. + /// This variant will be removed from the API once support for writing + /// `.debug_macro` sections is implemented. + DebugMacroRef(DebugMacroOffset), + + /// A reference to a range list. + RangeListRef(RangeListId), + + /// A type signature. + /// + /// The API does not currently assist with generating this signature. + /// This variant will be removed from the API once support for writing + /// `.debug_types` sections is implemented. + DebugTypesRef(DebugTypeSignature), + + /// A reference to a string in the `.debug_str` section. + StringRef(StringId), + + /// An offset into the `.debug_str` section of the supplementary object file. + /// + /// The API does not currently assist with generating this offset. + /// This variant will be removed from the API once support for writing + /// supplementary object files is implemented. + DebugStrRefSup(DebugStrOffset), + + /// A reference to a string in the `.debug_line_str` section. + LineStringRef(LineStringId), + + /// A slice of bytes representing a string. Must not include null bytes. + /// Not guaranteed to be UTF-8 or anything like that. + String(Vec), + + /// The value of a `DW_AT_encoding` attribute. + Encoding(constants::DwAte), + + /// The value of a `DW_AT_decimal_sign` attribute. + DecimalSign(constants::DwDs), + + /// The value of a `DW_AT_endianity` attribute. + Endianity(constants::DwEnd), + + /// The value of a `DW_AT_accessibility` attribute. + Accessibility(constants::DwAccess), + + /// The value of a `DW_AT_visibility` attribute. + Visibility(constants::DwVis), + + /// The value of a `DW_AT_virtuality` attribute. + Virtuality(constants::DwVirtuality), + + /// The value of a `DW_AT_language` attribute. + Language(constants::DwLang), + + /// The value of a `DW_AT_address_class` attribute. + AddressClass(constants::DwAddr), + + /// The value of a `DW_AT_identifier_case` attribute. + IdentifierCase(constants::DwId), + + /// The value of a `DW_AT_calling_convention` attribute. + CallingConvention(constants::DwCc), + + /// The value of a `DW_AT_inline` attribute. + Inline(constants::DwInl), + + /// The value of a `DW_AT_ordering` attribute. + Ordering(constants::DwOrd), + + /// An index into the filename entries from the line number information + /// table for the unit containing this value. + FileIndex(Option), +} + +impl AttributeValue { + /// Return the form that will be used to encode this value. + pub fn form(&self, encoding: Encoding) -> Result { + // TODO: missing forms: + // - DW_FORM_indirect + // - DW_FORM_implicit_const + // - FW_FORM_block1/block2/block4 + // - DW_FORM_str/strx1/strx2/strx3/strx4 + // - DW_FORM_addrx/addrx1/addrx2/addrx3/addrx4 + // - DW_FORM_data16 + // - DW_FORM_line_strp + // - DW_FORM_loclistx + // - DW_FORM_rnglistx + let form = match *self { + AttributeValue::Address(_) => constants::DW_FORM_addr, + AttributeValue::Block(_) => constants::DW_FORM_block, + AttributeValue::Data1(_) => constants::DW_FORM_data1, + AttributeValue::Data2(_) => constants::DW_FORM_data2, + AttributeValue::Data4(_) => constants::DW_FORM_data4, + AttributeValue::Data8(_) => constants::DW_FORM_data8, + AttributeValue::Exprloc(_) => constants::DW_FORM_exprloc, + AttributeValue::Flag(_) => constants::DW_FORM_flag, + AttributeValue::FlagPresent => constants::DW_FORM_flag_present, + AttributeValue::UnitRef(_) => { + // Using a fixed size format lets us write a placeholder before we know + // the value. + match encoding.format { + Format::Dwarf32 => constants::DW_FORM_ref4, + Format::Dwarf64 => constants::DW_FORM_ref8, + } + } + AttributeValue::DebugInfoRef(_) => constants::DW_FORM_ref_addr, + AttributeValue::DebugInfoRefSup(_) => { + // TODO: should this depend on the size of supplementary section? + match encoding.format { + Format::Dwarf32 => constants::DW_FORM_ref_sup4, + Format::Dwarf64 => constants::DW_FORM_ref_sup8, + } + } + AttributeValue::LineProgramRef + | AttributeValue::LocationListRef(_) + | AttributeValue::DebugMacinfoRef(_) + | AttributeValue::DebugMacroRef(_) + | AttributeValue::RangeListRef(_) => { + if encoding.version == 2 || encoding.version == 3 { + match encoding.format { + Format::Dwarf32 => constants::DW_FORM_data4, + Format::Dwarf64 => constants::DW_FORM_data8, + } + } else { + constants::DW_FORM_sec_offset + } + } + AttributeValue::DebugTypesRef(_) => constants::DW_FORM_ref_sig8, + AttributeValue::StringRef(_) => constants::DW_FORM_strp, + AttributeValue::DebugStrRefSup(_) => constants::DW_FORM_strp_sup, + AttributeValue::LineStringRef(_) => constants::DW_FORM_line_strp, + AttributeValue::String(_) => constants::DW_FORM_string, + AttributeValue::Encoding(_) + | AttributeValue::DecimalSign(_) + | AttributeValue::Endianity(_) + | AttributeValue::Accessibility(_) + | AttributeValue::Visibility(_) + | AttributeValue::Virtuality(_) + | AttributeValue::Language(_) + | AttributeValue::AddressClass(_) + | AttributeValue::IdentifierCase(_) + | AttributeValue::CallingConvention(_) + | AttributeValue::Inline(_) + | AttributeValue::Ordering(_) + | AttributeValue::FileIndex(_) + | AttributeValue::Udata(_) => constants::DW_FORM_udata, + AttributeValue::Sdata(_) => constants::DW_FORM_sdata, + }; + Ok(form) + } + + fn size(&self, unit: &Unit, offsets: &UnitOffsets) -> usize { + macro_rules! debug_assert_form { + ($form:expr) => { + debug_assert_eq!(self.form(unit.encoding()).unwrap(), $form) + }; + } + match *self { + AttributeValue::Address(_) => { + debug_assert_form!(constants::DW_FORM_addr); + unit.address_size() as usize + } + AttributeValue::Block(ref val) => { + debug_assert_form!(constants::DW_FORM_block); + uleb128_size(val.len() as u64) + val.len() + } + AttributeValue::Data1(_) => { + debug_assert_form!(constants::DW_FORM_data1); + 1 + } + AttributeValue::Data2(_) => { + debug_assert_form!(constants::DW_FORM_data2); + 2 + } + AttributeValue::Data4(_) => { + debug_assert_form!(constants::DW_FORM_data4); + 4 + } + AttributeValue::Data8(_) => { + debug_assert_form!(constants::DW_FORM_data8); + 8 + } + AttributeValue::Sdata(val) => { + debug_assert_form!(constants::DW_FORM_sdata); + sleb128_size(val) + } + AttributeValue::Udata(val) => { + debug_assert_form!(constants::DW_FORM_udata); + uleb128_size(val) + } + AttributeValue::Exprloc(ref val) => { + debug_assert_form!(constants::DW_FORM_exprloc); + let size = val.size(unit.encoding(), Some(offsets)); + uleb128_size(size as u64) + size + } + AttributeValue::Flag(_) => { + debug_assert_form!(constants::DW_FORM_flag); + 1 + } + AttributeValue::FlagPresent => { + debug_assert_form!(constants::DW_FORM_flag_present); + 0 + } + AttributeValue::UnitRef(_) => { + match unit.format() { + Format::Dwarf32 => debug_assert_form!(constants::DW_FORM_ref4), + Format::Dwarf64 => debug_assert_form!(constants::DW_FORM_ref8), + } + unit.format().word_size() as usize + } + AttributeValue::DebugInfoRef(_) => { + debug_assert_form!(constants::DW_FORM_ref_addr); + if unit.version() == 2 { + unit.address_size() as usize + } else { + unit.format().word_size() as usize + } + } + AttributeValue::DebugInfoRefSup(_) => { + match unit.format() { + Format::Dwarf32 => debug_assert_form!(constants::DW_FORM_ref_sup4), + Format::Dwarf64 => debug_assert_form!(constants::DW_FORM_ref_sup8), + } + unit.format().word_size() as usize + } + AttributeValue::LineProgramRef => { + if unit.version() >= 4 { + debug_assert_form!(constants::DW_FORM_sec_offset); + } + unit.format().word_size() as usize + } + AttributeValue::LocationListRef(_) => { + if unit.version() >= 4 { + debug_assert_form!(constants::DW_FORM_sec_offset); + } + unit.format().word_size() as usize + } + AttributeValue::DebugMacinfoRef(_) => { + if unit.version() >= 4 { + debug_assert_form!(constants::DW_FORM_sec_offset); + } + unit.format().word_size() as usize + } + AttributeValue::DebugMacroRef(_) => { + if unit.version() >= 4 { + debug_assert_form!(constants::DW_FORM_sec_offset); + } + unit.format().word_size() as usize + } + AttributeValue::RangeListRef(_) => { + if unit.version() >= 4 { + debug_assert_form!(constants::DW_FORM_sec_offset); + } + unit.format().word_size() as usize + } + AttributeValue::DebugTypesRef(_) => { + debug_assert_form!(constants::DW_FORM_ref_sig8); + 8 + } + AttributeValue::StringRef(_) => { + debug_assert_form!(constants::DW_FORM_strp); + unit.format().word_size() as usize + } + AttributeValue::DebugStrRefSup(_) => { + debug_assert_form!(constants::DW_FORM_strp_sup); + unit.format().word_size() as usize + } + AttributeValue::LineStringRef(_) => { + debug_assert_form!(constants::DW_FORM_line_strp); + unit.format().word_size() as usize + } + AttributeValue::String(ref val) => { + debug_assert_form!(constants::DW_FORM_string); + val.len() + 1 + } + AttributeValue::Encoding(val) => { + debug_assert_form!(constants::DW_FORM_udata); + uleb128_size(val.0 as u64) + } + AttributeValue::DecimalSign(val) => { + debug_assert_form!(constants::DW_FORM_udata); + uleb128_size(val.0 as u64) + } + AttributeValue::Endianity(val) => { + debug_assert_form!(constants::DW_FORM_udata); + uleb128_size(val.0 as u64) + } + AttributeValue::Accessibility(val) => { + debug_assert_form!(constants::DW_FORM_udata); + uleb128_size(val.0 as u64) + } + AttributeValue::Visibility(val) => { + debug_assert_form!(constants::DW_FORM_udata); + uleb128_size(val.0 as u64) + } + AttributeValue::Virtuality(val) => { + debug_assert_form!(constants::DW_FORM_udata); + uleb128_size(val.0 as u64) + } + AttributeValue::Language(val) => { + debug_assert_form!(constants::DW_FORM_udata); + uleb128_size(val.0 as u64) + } + AttributeValue::AddressClass(val) => { + debug_assert_form!(constants::DW_FORM_udata); + uleb128_size(val.0) + } + AttributeValue::IdentifierCase(val) => { + debug_assert_form!(constants::DW_FORM_udata); + uleb128_size(val.0 as u64) + } + AttributeValue::CallingConvention(val) => { + debug_assert_form!(constants::DW_FORM_udata); + uleb128_size(val.0 as u64) + } + AttributeValue::Inline(val) => { + debug_assert_form!(constants::DW_FORM_udata); + uleb128_size(val.0 as u64) + } + AttributeValue::Ordering(val) => { + debug_assert_form!(constants::DW_FORM_udata); + uleb128_size(val.0 as u64) + } + AttributeValue::FileIndex(val) => { + debug_assert_form!(constants::DW_FORM_udata); + uleb128_size(val.map(FileId::raw).unwrap_or(0)) + } + } + } + + /// Write the attribute value to the given sections. + fn write( + &self, + w: &mut DebugInfo, + debug_info_refs: &mut Vec, + unit_refs: &mut Vec<(DebugInfoOffset, UnitEntryId)>, + unit: &Unit, + offsets: &UnitOffsets, + line_program: Option, + line_strings: &DebugLineStrOffsets, + strings: &DebugStrOffsets, + range_lists: &RangeListOffsets, + loc_lists: &LocationListOffsets, + ) -> Result<()> { + macro_rules! debug_assert_form { + ($form:expr) => { + debug_assert_eq!(self.form(unit.encoding()).unwrap(), $form) + }; + } + match *self { + AttributeValue::Address(val) => { + debug_assert_form!(constants::DW_FORM_addr); + w.write_address(val, unit.address_size())?; + } + AttributeValue::Block(ref val) => { + debug_assert_form!(constants::DW_FORM_block); + w.write_uleb128(val.len() as u64)?; + w.write(val)?; + } + AttributeValue::Data1(val) => { + debug_assert_form!(constants::DW_FORM_data1); + w.write_u8(val)?; + } + AttributeValue::Data2(val) => { + debug_assert_form!(constants::DW_FORM_data2); + w.write_u16(val)?; + } + AttributeValue::Data4(val) => { + debug_assert_form!(constants::DW_FORM_data4); + w.write_u32(val)?; + } + AttributeValue::Data8(val) => { + debug_assert_form!(constants::DW_FORM_data8); + w.write_u64(val)?; + } + AttributeValue::Sdata(val) => { + debug_assert_form!(constants::DW_FORM_sdata); + w.write_sleb128(val)?; + } + AttributeValue::Udata(val) => { + debug_assert_form!(constants::DW_FORM_udata); + w.write_uleb128(val)?; + } + AttributeValue::Exprloc(ref val) => { + debug_assert_form!(constants::DW_FORM_exprloc); + w.write_uleb128(val.size(unit.encoding(), Some(offsets)) as u64)?; + val.write( + &mut w.0, + Some(debug_info_refs), + unit.encoding(), + Some(offsets), + )?; + } + AttributeValue::Flag(val) => { + debug_assert_form!(constants::DW_FORM_flag); + w.write_u8(val as u8)?; + } + AttributeValue::FlagPresent => { + debug_assert_form!(constants::DW_FORM_flag_present); + } + AttributeValue::UnitRef(id) => { + match unit.format() { + Format::Dwarf32 => debug_assert_form!(constants::DW_FORM_ref4), + Format::Dwarf64 => debug_assert_form!(constants::DW_FORM_ref8), + } + unit_refs.push((w.offset(), id)); + w.write_udata(0, unit.format().word_size())?; + } + AttributeValue::DebugInfoRef(reference) => { + debug_assert_form!(constants::DW_FORM_ref_addr); + let size = if unit.version() == 2 { + unit.address_size() + } else { + unit.format().word_size() + }; + match reference { + Reference::Symbol(symbol) => w.write_reference(symbol, size)?, + Reference::Entry(unit, entry) => { + debug_info_refs.push(DebugInfoReference { + offset: w.len(), + unit, + entry, + size, + }); + w.write_udata(0, size)?; + } + } + } + AttributeValue::DebugInfoRefSup(val) => { + match unit.format() { + Format::Dwarf32 => debug_assert_form!(constants::DW_FORM_ref_sup4), + Format::Dwarf64 => debug_assert_form!(constants::DW_FORM_ref_sup8), + } + w.write_udata(val.0 as u64, unit.format().word_size())?; + } + AttributeValue::LineProgramRef => { + if unit.version() >= 4 { + debug_assert_form!(constants::DW_FORM_sec_offset); + } + match line_program { + Some(line_program) => { + w.write_offset( + line_program.0, + SectionId::DebugLine, + unit.format().word_size(), + )?; + } + None => return Err(Error::InvalidAttributeValue), + } + } + AttributeValue::LocationListRef(val) => { + if unit.version() >= 4 { + debug_assert_form!(constants::DW_FORM_sec_offset); + } + let section = if unit.version() <= 4 { + SectionId::DebugLoc + } else { + SectionId::DebugLocLists + }; + w.write_offset(loc_lists.get(val).0, section, unit.format().word_size())?; + } + AttributeValue::DebugMacinfoRef(val) => { + if unit.version() >= 4 { + debug_assert_form!(constants::DW_FORM_sec_offset); + } + w.write_offset(val.0, SectionId::DebugMacinfo, unit.format().word_size())?; + } + AttributeValue::DebugMacroRef(val) => { + if unit.version() >= 4 { + debug_assert_form!(constants::DW_FORM_sec_offset); + } + w.write_offset(val.0, SectionId::DebugMacro, unit.format().word_size())?; + } + AttributeValue::RangeListRef(val) => { + if unit.version() >= 4 { + debug_assert_form!(constants::DW_FORM_sec_offset); + } + let section = if unit.version() <= 4 { + SectionId::DebugRanges + } else { + SectionId::DebugRngLists + }; + w.write_offset(range_lists.get(val).0, section, unit.format().word_size())?; + } + AttributeValue::DebugTypesRef(val) => { + debug_assert_form!(constants::DW_FORM_ref_sig8); + w.write_u64(val.0)?; + } + AttributeValue::StringRef(val) => { + debug_assert_form!(constants::DW_FORM_strp); + w.write_offset( + strings.get(val).0, + SectionId::DebugStr, + unit.format().word_size(), + )?; + } + AttributeValue::DebugStrRefSup(val) => { + debug_assert_form!(constants::DW_FORM_strp_sup); + w.write_udata(val.0 as u64, unit.format().word_size())?; + } + AttributeValue::LineStringRef(val) => { + debug_assert_form!(constants::DW_FORM_line_strp); + w.write_offset( + line_strings.get(val).0, + SectionId::DebugLineStr, + unit.format().word_size(), + )?; + } + AttributeValue::String(ref val) => { + debug_assert_form!(constants::DW_FORM_string); + w.write(val)?; + w.write_u8(0)?; + } + AttributeValue::Encoding(val) => { + debug_assert_form!(constants::DW_FORM_udata); + w.write_uleb128(u64::from(val.0))?; + } + AttributeValue::DecimalSign(val) => { + debug_assert_form!(constants::DW_FORM_udata); + w.write_uleb128(u64::from(val.0))?; + } + AttributeValue::Endianity(val) => { + debug_assert_form!(constants::DW_FORM_udata); + w.write_uleb128(u64::from(val.0))?; + } + AttributeValue::Accessibility(val) => { + debug_assert_form!(constants::DW_FORM_udata); + w.write_uleb128(u64::from(val.0))?; + } + AttributeValue::Visibility(val) => { + debug_assert_form!(constants::DW_FORM_udata); + w.write_uleb128(u64::from(val.0))?; + } + AttributeValue::Virtuality(val) => { + debug_assert_form!(constants::DW_FORM_udata); + w.write_uleb128(u64::from(val.0))?; + } + AttributeValue::Language(val) => { + debug_assert_form!(constants::DW_FORM_udata); + w.write_uleb128(u64::from(val.0))?; + } + AttributeValue::AddressClass(val) => { + debug_assert_form!(constants::DW_FORM_udata); + w.write_uleb128(val.0)?; + } + AttributeValue::IdentifierCase(val) => { + debug_assert_form!(constants::DW_FORM_udata); + w.write_uleb128(u64::from(val.0))?; + } + AttributeValue::CallingConvention(val) => { + debug_assert_form!(constants::DW_FORM_udata); + w.write_uleb128(u64::from(val.0))?; + } + AttributeValue::Inline(val) => { + debug_assert_form!(constants::DW_FORM_udata); + w.write_uleb128(u64::from(val.0))?; + } + AttributeValue::Ordering(val) => { + debug_assert_form!(constants::DW_FORM_udata); + w.write_uleb128(u64::from(val.0))?; + } + AttributeValue::FileIndex(val) => { + debug_assert_form!(constants::DW_FORM_udata); + w.write_uleb128(val.map(FileId::raw).unwrap_or(0))?; + } + } + Ok(()) + } +} + +define_section!( + DebugInfo, + DebugInfoOffset, + "A writable `.debug_info` section." +); + +/// The section offsets of all elements within a `.debug_info` section. +#[derive(Debug, Default)] +pub struct DebugInfoOffsets { + base_id: BaseId, + units: Vec, +} + +impl DebugInfoOffsets { + #[cfg(test)] + #[cfg(feature = "read")] + pub(crate) fn unit_offsets(&self, unit: UnitId) -> &UnitOffsets { + debug_assert_eq!(self.base_id, unit.base_id); + &self.units[unit.index] + } + + /// Get the `.debug_info` section offset for the given unit. + #[inline] + pub fn unit(&self, unit: UnitId) -> DebugInfoOffset { + debug_assert_eq!(self.base_id, unit.base_id); + self.units[unit.index].unit + } + + /// Get the `.debug_info` section offset for the given entry. + #[inline] + pub fn entry(&self, unit: UnitId, entry: UnitEntryId) -> DebugInfoOffset { + debug_assert_eq!(self.base_id, unit.base_id); + self.units[unit.index].debug_info_offset(entry) + } +} + +/// The section offsets of all elements of a unit within a `.debug_info` section. +#[derive(Debug)] +pub(crate) struct UnitOffsets { + base_id: BaseId, + unit: DebugInfoOffset, + entries: Vec, +} + +impl UnitOffsets { + #[cfg(test)] + #[cfg(feature = "read")] + fn none() -> Self { + UnitOffsets { + base_id: BaseId::default(), + unit: DebugInfoOffset(0), + entries: Vec::new(), + } + } + + /// Get the .debug_info offset for the given entry. + #[inline] + pub(crate) fn debug_info_offset(&self, entry: UnitEntryId) -> DebugInfoOffset { + debug_assert_eq!(self.base_id, entry.base_id); + let offset = self.entries[entry.index].offset; + debug_assert_ne!(offset.0, 0); + offset + } + + /// Get the unit offset for the given entry. + #[inline] + pub(crate) fn unit_offset(&self, entry: UnitEntryId) -> u64 { + let offset = self.debug_info_offset(entry); + (offset.0 - self.unit.0) as u64 + } + + /// Get the abbreviation code for the given entry. + #[inline] + pub(crate) fn abbrev(&self, entry: UnitEntryId) -> u64 { + debug_assert_eq!(self.base_id, entry.base_id); + self.entries[entry.index].abbrev + } +} + +#[derive(Debug, Clone, Copy)] +pub(crate) struct EntryOffset { + offset: DebugInfoOffset, + abbrev: u64, +} + +impl EntryOffset { + fn none() -> Self { + EntryOffset { + offset: DebugInfoOffset(0), + abbrev: 0, + } + } +} + +/// A reference to a `.debug_info` entry that has yet to be resolved. +#[derive(Debug, Clone, Copy)] +pub(crate) struct DebugInfoReference { + /// The offset within the section of the reference. + pub offset: usize, + /// The size of the reference. + pub size: u8, + /// The unit containing the entry. + pub unit: UnitId, + /// The entry being referenced. + pub entry: UnitEntryId, +} + +#[cfg(feature = "read")] +pub(crate) mod convert { + use super::*; + use crate::common::{DwoId, UnitSectionOffset}; + use crate::read::{self, Reader}; + use crate::write::{self, ConvertError, ConvertResult, LocationList, RangeList}; + use std::collections::HashMap; + + pub(crate) struct ConvertUnit> { + from_unit: read::Unit, + base_id: BaseId, + encoding: Encoding, + entries: Vec, + entry_offsets: Vec, + root: UnitEntryId, + } + + pub(crate) struct ConvertUnitContext<'a, R: Reader> { + pub dwarf: &'a read::Dwarf, + pub unit: &'a read::Unit, + pub line_strings: &'a mut write::LineStringTable, + pub strings: &'a mut write::StringTable, + pub ranges: &'a mut write::RangeListTable, + pub locations: &'a mut write::LocationListTable, + pub convert_address: &'a dyn Fn(u64) -> Option
, + pub base_address: Address, + pub line_program_offset: Option, + pub line_program_files: Vec, + pub entry_ids: &'a HashMap, + } + + impl UnitTable { + /// Create a unit table by reading the data in the given sections. + /// + /// This also updates the given tables with the values that are referenced from + /// attributes in this section. + /// + /// `convert_address` is a function to convert read addresses into the `Address` + /// type. For non-relocatable addresses, this function may simply return + /// `Address::Constant(address)`. For relocatable addresses, it is the caller's + /// responsibility to determine the symbol and addend corresponding to the address + /// and return `Address::Symbol { symbol, addend }`. + pub fn from>( + dwarf: &read::Dwarf, + line_strings: &mut write::LineStringTable, + strings: &mut write::StringTable, + convert_address: &dyn Fn(u64) -> Option
, + ) -> ConvertResult { + let base_id = BaseId::default(); + let mut unit_entries = Vec::new(); + let mut entry_ids = HashMap::new(); + + let mut from_units = dwarf.units(); + while let Some(from_unit) = from_units.next()? { + let unit_id = UnitId::new(base_id, unit_entries.len()); + unit_entries.push(Unit::convert_entries( + from_unit, + unit_id, + &mut entry_ids, + dwarf, + )?); + } + + // Attributes must be converted in a separate pass so that we can handle + // references to other compilation units. + let mut units = Vec::new(); + for unit_entries in unit_entries.drain(..) { + units.push(Unit::convert_attributes( + unit_entries, + &entry_ids, + dwarf, + line_strings, + strings, + convert_address, + )?); + } + + Ok(UnitTable { base_id, units }) + } + } + + impl Unit { + /// Create a unit by reading the data in the input sections. + /// + /// Does not add entry attributes. + pub(crate) fn convert_entries>( + from_header: read::UnitHeader, + unit_id: UnitId, + entry_ids: &mut HashMap, + dwarf: &read::Dwarf, + ) -> ConvertResult> { + match from_header.type_() { + read::UnitType::Compilation => (), + _ => return Err(ConvertError::UnsupportedUnitType), + } + let base_id = BaseId::default(); + + let from_unit = dwarf.unit(from_header)?; + let encoding = from_unit.encoding(); + + let mut entries = Vec::new(); + let mut entry_offsets = Vec::new(); + + let mut from_tree = from_unit.entries_tree(None)?; + let from_root = from_tree.root()?; + let root = DebuggingInformationEntry::convert_entry( + from_root, + &from_unit, + base_id, + &mut entries, + &mut entry_offsets, + entry_ids, + None, + unit_id, + )?; + + Ok(ConvertUnit { + from_unit, + base_id, + encoding, + entries, + entry_offsets, + root, + }) + } + + /// Create entry attributes by reading the data in the input sections. + fn convert_attributes>( + unit: ConvertUnit, + entry_ids: &HashMap, + dwarf: &read::Dwarf, + line_strings: &mut write::LineStringTable, + strings: &mut write::StringTable, + convert_address: &dyn Fn(u64) -> Option
, + ) -> ConvertResult { + let from_unit = unit.from_unit; + let base_address = + convert_address(from_unit.low_pc).ok_or(ConvertError::InvalidAddress)?; + + let (line_program_offset, line_program, line_program_files) = + match from_unit.line_program { + Some(ref from_program) => { + let from_program = from_program.clone(); + let line_program_offset = from_program.header().offset(); + let (line_program, line_program_files) = LineProgram::from( + from_program, + dwarf, + line_strings, + strings, + convert_address, + )?; + (Some(line_program_offset), line_program, line_program_files) + } + None => (None, LineProgram::none(), Vec::new()), + }; + + let mut ranges = RangeListTable::default(); + let mut locations = LocationListTable::default(); + + let mut context = ConvertUnitContext { + entry_ids, + dwarf, + unit: &from_unit, + line_strings, + strings, + ranges: &mut ranges, + locations: &mut locations, + convert_address, + base_address, + line_program_offset, + line_program_files, + }; + + let mut entries = unit.entries; + for entry in &mut entries { + entry.convert_attributes(&mut context, &unit.entry_offsets)?; + } + + Ok(Unit { + base_id: unit.base_id, + encoding: unit.encoding, + line_program, + ranges, + locations, + entries, + root: unit.root, + }) + } + } + + impl DebuggingInformationEntry { + /// Create an entry by reading the data in the input sections. + /// + /// Does not add the entry attributes. + fn convert_entry>( + from: read::EntriesTreeNode<'_, '_, '_, R>, + from_unit: &read::Unit, + base_id: BaseId, + entries: &mut Vec, + entry_offsets: &mut Vec, + entry_ids: &mut HashMap, + parent: Option, + unit_id: UnitId, + ) -> ConvertResult { + let from_entry = from.entry(); + let id = DebuggingInformationEntry::new(base_id, entries, parent, from_entry.tag()); + let offset = from_entry.offset(); + entry_offsets.push(offset); + entry_ids.insert(offset.to_unit_section_offset(from_unit), (unit_id, id)); + + let mut from_children = from.children(); + while let Some(from_child) = from_children.next()? { + DebuggingInformationEntry::convert_entry( + from_child, + from_unit, + base_id, + entries, + entry_offsets, + entry_ids, + Some(id), + unit_id, + )?; + } + Ok(id) + } + + /// Create an entry's attributes by reading the data in the input sections. + fn convert_attributes>( + &mut self, + context: &mut ConvertUnitContext<'_, R>, + entry_offsets: &[read::UnitOffset], + ) -> ConvertResult<()> { + let offset = entry_offsets[self.id.index]; + let from = context.unit.entry(offset)?; + let mut from_attrs = from.attrs(); + while let Some(from_attr) = from_attrs.next()? { + if from_attr.name() == constants::DW_AT_sibling { + // This may point to a null entry, so we have to treat it differently. + self.set_sibling(true); + } else if let Some(attr) = Attribute::from(context, &from_attr)? { + self.set(attr.name, attr.value); + } + } + Ok(()) + } + } + + impl Attribute { + /// Create an attribute by reading the data in the given sections. + pub(crate) fn from>( + context: &mut ConvertUnitContext<'_, R>, + from: &read::Attribute, + ) -> ConvertResult> { + let value = AttributeValue::from(context, from.value())?; + Ok(value.map(|value| Attribute { + name: from.name(), + value, + })) + } + } + + impl AttributeValue { + /// Create an attribute value by reading the data in the given sections. + pub(crate) fn from>( + context: &mut ConvertUnitContext<'_, R>, + from: read::AttributeValue, + ) -> ConvertResult> { + let to = match from { + read::AttributeValue::Addr(val) => match (context.convert_address)(val) { + Some(val) => AttributeValue::Address(val), + None => return Err(ConvertError::InvalidAddress), + }, + read::AttributeValue::Block(r) => AttributeValue::Block(r.to_slice()?.into()), + read::AttributeValue::Data1(val) => AttributeValue::Data1(val), + read::AttributeValue::Data2(val) => AttributeValue::Data2(val), + read::AttributeValue::Data4(val) => AttributeValue::Data4(val), + read::AttributeValue::Data8(val) => AttributeValue::Data8(val), + read::AttributeValue::Sdata(val) => AttributeValue::Sdata(val), + read::AttributeValue::Udata(val) => AttributeValue::Udata(val), + read::AttributeValue::Exprloc(expression) => { + let expression = Expression::from( + expression, + context.unit.encoding(), + Some(context.dwarf), + Some(context.unit), + Some(context.entry_ids), + context.convert_address, + )?; + AttributeValue::Exprloc(expression) + } + // TODO: it would be nice to preserve the flag form. + read::AttributeValue::Flag(val) => AttributeValue::Flag(val), + read::AttributeValue::DebugAddrBase(_base) => { + // We convert all address indices to addresses, + // so this is unneeded. + return Ok(None); + } + read::AttributeValue::DebugAddrIndex(index) => { + let val = context.dwarf.address(context.unit, index)?; + match (context.convert_address)(val) { + Some(val) => AttributeValue::Address(val), + None => return Err(ConvertError::InvalidAddress), + } + } + read::AttributeValue::UnitRef(val) => { + if !context.unit.header.is_valid_offset(val) { + return Err(ConvertError::InvalidUnitRef); + } + let id = context + .entry_ids + .get(&val.to_unit_section_offset(context.unit)) + .ok_or(ConvertError::InvalidUnitRef)?; + AttributeValue::UnitRef(id.1) + } + read::AttributeValue::DebugInfoRef(val) => { + // TODO: support relocation of this value + let id = context + .entry_ids + .get(&UnitSectionOffset::DebugInfoOffset(val)) + .ok_or(ConvertError::InvalidDebugInfoRef)?; + AttributeValue::DebugInfoRef(Reference::Entry(id.0, id.1)) + } + read::AttributeValue::DebugInfoRefSup(val) => AttributeValue::DebugInfoRefSup(val), + read::AttributeValue::DebugLineRef(val) => { + // There should only be the line program in the CU DIE which we've already + // converted, so check if it matches that. + if Some(val) == context.line_program_offset { + AttributeValue::LineProgramRef + } else { + return Err(ConvertError::InvalidLineRef); + } + } + read::AttributeValue::DebugMacinfoRef(val) => AttributeValue::DebugMacinfoRef(val), + read::AttributeValue::DebugMacroRef(val) => AttributeValue::DebugMacroRef(val), + read::AttributeValue::LocationListsRef(val) => { + let iter = context + .dwarf + .locations + .raw_locations(val, context.unit.encoding())?; + let loc_list = LocationList::from(iter, context)?; + let loc_id = context.locations.add(loc_list); + AttributeValue::LocationListRef(loc_id) + } + read::AttributeValue::DebugLocListsBase(_base) => { + // We convert all location list indices to offsets, + // so this is unneeded. + return Ok(None); + } + read::AttributeValue::DebugLocListsIndex(index) => { + let offset = context.dwarf.locations_offset(context.unit, index)?; + let iter = context + .dwarf + .locations + .raw_locations(offset, context.unit.encoding())?; + let loc_list = LocationList::from(iter, context)?; + let loc_id = context.locations.add(loc_list); + AttributeValue::LocationListRef(loc_id) + } + read::AttributeValue::RangeListsRef(offset) => { + let offset = context.dwarf.ranges_offset_from_raw(context.unit, offset); + let iter = context.dwarf.raw_ranges(context.unit, offset)?; + let range_list = RangeList::from(iter, context)?; + let range_id = context.ranges.add(range_list); + AttributeValue::RangeListRef(range_id) + } + read::AttributeValue::DebugRngListsBase(_base) => { + // We convert all range list indices to offsets, + // so this is unneeded. + return Ok(None); + } + read::AttributeValue::DebugRngListsIndex(index) => { + let offset = context.dwarf.ranges_offset(context.unit, index)?; + let iter = context + .dwarf + .ranges + .raw_ranges(offset, context.unit.encoding())?; + let range_list = RangeList::from(iter, context)?; + let range_id = context.ranges.add(range_list); + AttributeValue::RangeListRef(range_id) + } + read::AttributeValue::DebugTypesRef(val) => AttributeValue::DebugTypesRef(val), + read::AttributeValue::DebugStrRef(offset) => { + let r = context.dwarf.string(offset)?; + let id = context.strings.add(r.to_slice()?); + AttributeValue::StringRef(id) + } + read::AttributeValue::DebugStrRefSup(val) => AttributeValue::DebugStrRefSup(val), + read::AttributeValue::DebugStrOffsetsBase(_base) => { + // We convert all string offsets to `.debug_str` references, + // so this is unneeded. + return Ok(None); + } + read::AttributeValue::DebugStrOffsetsIndex(index) => { + let offset = context.dwarf.string_offset(context.unit, index)?; + let r = context.dwarf.string(offset)?; + let id = context.strings.add(r.to_slice()?); + AttributeValue::StringRef(id) + } + read::AttributeValue::DebugLineStrRef(offset) => { + let r = context.dwarf.line_string(offset)?; + let id = context.line_strings.add(r.to_slice()?); + AttributeValue::LineStringRef(id) + } + read::AttributeValue::String(r) => AttributeValue::String(r.to_slice()?.into()), + read::AttributeValue::Encoding(val) => AttributeValue::Encoding(val), + read::AttributeValue::DecimalSign(val) => AttributeValue::DecimalSign(val), + read::AttributeValue::Endianity(val) => AttributeValue::Endianity(val), + read::AttributeValue::Accessibility(val) => AttributeValue::Accessibility(val), + read::AttributeValue::Visibility(val) => AttributeValue::Visibility(val), + read::AttributeValue::Virtuality(val) => AttributeValue::Virtuality(val), + read::AttributeValue::Language(val) => AttributeValue::Language(val), + read::AttributeValue::AddressClass(val) => AttributeValue::AddressClass(val), + read::AttributeValue::IdentifierCase(val) => AttributeValue::IdentifierCase(val), + read::AttributeValue::CallingConvention(val) => { + AttributeValue::CallingConvention(val) + } + read::AttributeValue::Inline(val) => AttributeValue::Inline(val), + read::AttributeValue::Ordering(val) => AttributeValue::Ordering(val), + read::AttributeValue::FileIndex(val) => { + if val == 0 { + // 0 means not specified, even for version 5. + AttributeValue::FileIndex(None) + } else { + match context.line_program_files.get(val as usize) { + Some(id) => AttributeValue::FileIndex(Some(*id)), + None => return Err(ConvertError::InvalidFileIndex), + } + } + } + // Should always be a more specific section reference. + read::AttributeValue::SecOffset(_) => { + return Err(ConvertError::InvalidAttributeValue); + } + read::AttributeValue::DwoId(DwoId(val)) => AttributeValue::Udata(val), + }; + Ok(Some(to)) + } + } +} + +#[cfg(test)] +#[cfg(feature = "read")] +mod tests { + use super::*; + use crate::common::{ + DebugAddrBase, DebugLocListsBase, DebugRngListsBase, DebugStrOffsetsBase, LineEncoding, + }; + use crate::constants; + use crate::read; + use crate::write::{ + DebugLine, DebugLineStr, DebugStr, DwarfUnit, EndianVec, LineString, LineStringTable, + Location, LocationList, LocationListTable, Range, RangeList, RangeListOffsets, + RangeListTable, StringTable, + }; + use crate::LittleEndian; + use std::collections::HashMap; + use std::mem; + use std::sync::Arc; + + #[test] + fn test_unit_table() { + let mut strings = StringTable::default(); + + let mut units = UnitTable::default(); + let unit_id1 = units.add(Unit::new( + Encoding { + version: 4, + address_size: 8, + format: Format::Dwarf32, + }, + LineProgram::none(), + )); + let unit2 = units.add(Unit::new( + Encoding { + version: 2, + address_size: 4, + format: Format::Dwarf64, + }, + LineProgram::none(), + )); + let unit3 = units.add(Unit::new( + Encoding { + version: 5, + address_size: 4, + format: Format::Dwarf32, + }, + LineProgram::none(), + )); + assert_eq!(units.count(), 3); + { + let unit1 = units.get_mut(unit_id1); + assert_eq!(unit1.version(), 4); + assert_eq!(unit1.address_size(), 8); + assert_eq!(unit1.format(), Format::Dwarf32); + assert_eq!(unit1.count(), 1); + + let root_id = unit1.root(); + assert_eq!(root_id, UnitEntryId::new(unit1.base_id, 0)); + { + let root = unit1.get_mut(root_id); + assert_eq!(root.id(), root_id); + assert!(root.parent().is_none()); + assert_eq!(root.tag(), constants::DW_TAG_compile_unit); + + // Test get/get_mut + assert!(root.get(constants::DW_AT_producer).is_none()); + assert!(root.get_mut(constants::DW_AT_producer).is_none()); + let mut producer = AttributeValue::String(b"root"[..].into()); + root.set(constants::DW_AT_producer, producer.clone()); + assert_eq!(root.get(constants::DW_AT_producer), Some(&producer)); + assert_eq!(root.get_mut(constants::DW_AT_producer), Some(&mut producer)); + + // Test attrs + let mut attrs = root.attrs(); + let attr = attrs.next().unwrap(); + assert_eq!(attr.name(), constants::DW_AT_producer); + assert_eq!(attr.get(), &producer); + assert!(attrs.next().is_none()); + } + + let child1 = unit1.add(root_id, constants::DW_TAG_subprogram); + assert_eq!(child1, UnitEntryId::new(unit1.base_id, 1)); + { + let child1 = unit1.get_mut(child1); + assert_eq!(child1.parent(), Some(root_id)); + + let tmp = AttributeValue::String(b"tmp"[..].into()); + child1.set(constants::DW_AT_name, tmp.clone()); + assert_eq!(child1.get(constants::DW_AT_name), Some(&tmp)); + + // Test attrs_mut + let name = AttributeValue::StringRef(strings.add(&b"child1"[..])); + { + let attr = child1.attrs_mut().next().unwrap(); + assert_eq!(attr.name(), constants::DW_AT_name); + attr.set(name.clone()); + } + assert_eq!(child1.get(constants::DW_AT_name), Some(&name)); + } + + let child2 = unit1.add(root_id, constants::DW_TAG_subprogram); + assert_eq!(child2, UnitEntryId::new(unit1.base_id, 2)); + { + let child2 = unit1.get_mut(child2); + assert_eq!(child2.parent(), Some(root_id)); + + let tmp = AttributeValue::String(b"tmp"[..].into()); + child2.set(constants::DW_AT_name, tmp.clone()); + assert_eq!(child2.get(constants::DW_AT_name), Some(&tmp)); + + // Test replace + let name = AttributeValue::StringRef(strings.add(&b"child2"[..])); + child2.set(constants::DW_AT_name, name.clone()); + assert_eq!(child2.get(constants::DW_AT_name), Some(&name)); + } + + { + let root = unit1.get(root_id); + assert_eq!( + root.children().cloned().collect::>(), + vec![child1, child2] + ); + } + } + { + let unit2 = units.get(unit2); + assert_eq!(unit2.version(), 2); + assert_eq!(unit2.address_size(), 4); + assert_eq!(unit2.format(), Format::Dwarf64); + assert_eq!(unit2.count(), 1); + + let root = unit2.root(); + assert_eq!(root, UnitEntryId::new(unit2.base_id, 0)); + let root = unit2.get(root); + assert_eq!(root.id(), UnitEntryId::new(unit2.base_id, 0)); + assert!(root.parent().is_none()); + assert_eq!(root.tag(), constants::DW_TAG_compile_unit); + } + + let mut sections = Sections::new(EndianVec::new(LittleEndian)); + let debug_line_str_offsets = DebugLineStrOffsets::none(); + let debug_str_offsets = strings.write(&mut sections.debug_str).unwrap(); + units + .write(&mut sections, &debug_line_str_offsets, &debug_str_offsets) + .unwrap(); + + println!("{:?}", sections.debug_str); + println!("{:?}", sections.debug_info); + println!("{:?}", sections.debug_abbrev); + + let dwarf = read::Dwarf { + debug_abbrev: read::DebugAbbrev::new(sections.debug_abbrev.slice(), LittleEndian), + debug_info: read::DebugInfo::new(sections.debug_info.slice(), LittleEndian), + debug_str: read::DebugStr::new(sections.debug_str.slice(), LittleEndian), + ..Default::default() + }; + let mut read_units = dwarf.units(); + + { + let read_unit1 = read_units.next().unwrap().unwrap(); + let unit1 = units.get(unit_id1); + assert_eq!(unit1.version(), read_unit1.version()); + assert_eq!(unit1.address_size(), read_unit1.address_size()); + assert_eq!(unit1.format(), read_unit1.format()); + + let read_unit1 = dwarf.unit(read_unit1).unwrap(); + let mut read_entries = read_unit1.entries(); + + let root = unit1.get(unit1.root()); + { + let (depth, read_root) = read_entries.next_dfs().unwrap().unwrap(); + assert_eq!(depth, 0); + assert_eq!(root.tag(), read_root.tag()); + assert!(read_root.has_children()); + + let producer = match root.get(constants::DW_AT_producer).unwrap() { + AttributeValue::String(ref producer) => &**producer, + otherwise => panic!("unexpected {:?}", otherwise), + }; + assert_eq!(producer, b"root"); + let read_producer = read_root + .attr_value(constants::DW_AT_producer) + .unwrap() + .unwrap(); + assert_eq!( + dwarf + .attr_string(&read_unit1, read_producer) + .unwrap() + .slice(), + producer + ); + } + + let mut children = root.children().cloned(); + + { + let child = children.next().unwrap(); + assert_eq!(child, UnitEntryId::new(unit1.base_id, 1)); + let child = unit1.get(child); + let (depth, read_child) = read_entries.next_dfs().unwrap().unwrap(); + assert_eq!(depth, 1); + assert_eq!(child.tag(), read_child.tag()); + assert!(!read_child.has_children()); + + let name = match child.get(constants::DW_AT_name).unwrap() { + AttributeValue::StringRef(name) => *name, + otherwise => panic!("unexpected {:?}", otherwise), + }; + let name = strings.get(name); + assert_eq!(name, b"child1"); + let read_name = read_child + .attr_value(constants::DW_AT_name) + .unwrap() + .unwrap(); + assert_eq!( + dwarf.attr_string(&read_unit1, read_name).unwrap().slice(), + name + ); + } + + { + let child = children.next().unwrap(); + assert_eq!(child, UnitEntryId::new(unit1.base_id, 2)); + let child = unit1.get(child); + let (depth, read_child) = read_entries.next_dfs().unwrap().unwrap(); + assert_eq!(depth, 0); + assert_eq!(child.tag(), read_child.tag()); + assert!(!read_child.has_children()); + + let name = match child.get(constants::DW_AT_name).unwrap() { + AttributeValue::StringRef(name) => *name, + otherwise => panic!("unexpected {:?}", otherwise), + }; + let name = strings.get(name); + assert_eq!(name, b"child2"); + let read_name = read_child + .attr_value(constants::DW_AT_name) + .unwrap() + .unwrap(); + assert_eq!( + dwarf.attr_string(&read_unit1, read_name).unwrap().slice(), + name + ); + } + + assert!(read_entries.next_dfs().unwrap().is_none()); + } + + { + let read_unit2 = read_units.next().unwrap().unwrap(); + let unit2 = units.get(unit2); + assert_eq!(unit2.version(), read_unit2.version()); + assert_eq!(unit2.address_size(), read_unit2.address_size()); + assert_eq!(unit2.format(), read_unit2.format()); + + let abbrevs = dwarf.abbreviations(&read_unit2).unwrap(); + let mut read_entries = read_unit2.entries(&abbrevs); + + { + let root = unit2.get(unit2.root()); + let (depth, read_root) = read_entries.next_dfs().unwrap().unwrap(); + assert_eq!(depth, 0); + assert_eq!(root.tag(), read_root.tag()); + assert!(!read_root.has_children()); + } + + assert!(read_entries.next_dfs().unwrap().is_none()); + } + + { + let read_unit3 = read_units.next().unwrap().unwrap(); + let unit3 = units.get(unit3); + assert_eq!(unit3.version(), read_unit3.version()); + assert_eq!(unit3.address_size(), read_unit3.address_size()); + assert_eq!(unit3.format(), read_unit3.format()); + + let abbrevs = dwarf.abbreviations(&read_unit3).unwrap(); + let mut read_entries = read_unit3.entries(&abbrevs); + + { + let root = unit3.get(unit3.root()); + let (depth, read_root) = read_entries.next_dfs().unwrap().unwrap(); + assert_eq!(depth, 0); + assert_eq!(root.tag(), read_root.tag()); + assert!(!read_root.has_children()); + } + + assert!(read_entries.next_dfs().unwrap().is_none()); + } + + assert!(read_units.next().unwrap().is_none()); + + let mut convert_line_strings = LineStringTable::default(); + let mut convert_strings = StringTable::default(); + let convert_units = UnitTable::from( + &dwarf, + &mut convert_line_strings, + &mut convert_strings, + &|address| Some(Address::Constant(address)), + ) + .unwrap(); + assert_eq!(convert_units.count(), units.count()); + + for i in 0..convert_units.count() { + let unit_id = units.id(i); + let unit = units.get(unit_id); + let convert_unit_id = convert_units.id(i); + let convert_unit = convert_units.get(convert_unit_id); + assert_eq!(convert_unit.version(), unit.version()); + assert_eq!(convert_unit.address_size(), unit.address_size()); + assert_eq!(convert_unit.format(), unit.format()); + assert_eq!(convert_unit.count(), unit.count()); + + let root = unit.get(unit.root()); + let convert_root = convert_unit.get(convert_unit.root()); + assert_eq!(convert_root.tag(), root.tag()); + for (convert_attr, attr) in convert_root.attrs().zip(root.attrs()) { + assert_eq!(convert_attr, attr); + } + } + } + + #[test] + fn test_attribute_value() { + // Create a string table and a string with a non-zero id/offset. + let mut strings = StringTable::default(); + strings.add("string one"); + let string_id = strings.add("string two"); + let mut debug_str = DebugStr::from(EndianVec::new(LittleEndian)); + let debug_str_offsets = strings.write(&mut debug_str).unwrap(); + let read_debug_str = read::DebugStr::new(debug_str.slice(), LittleEndian); + + let mut line_strings = LineStringTable::default(); + line_strings.add("line string one"); + let line_string_id = line_strings.add("line string two"); + let mut debug_line_str = DebugLineStr::from(EndianVec::new(LittleEndian)); + let debug_line_str_offsets = line_strings.write(&mut debug_line_str).unwrap(); + let read_debug_line_str = + read::DebugLineStr::from(read::EndianSlice::new(debug_line_str.slice(), LittleEndian)); + + let data = vec![1, 2, 3, 4]; + let read_data = read::EndianSlice::new(&[1, 2, 3, 4], LittleEndian); + + let mut expression = Expression::new(); + expression.op_constu(57); + let read_expression = read::Expression(read::EndianSlice::new( + &[constants::DW_OP_constu.0, 57], + LittleEndian, + )); + + let mut ranges = RangeListTable::default(); + let range_id = ranges.add(RangeList(vec![Range::StartEnd { + begin: Address::Constant(0x1234), + end: Address::Constant(0x2345), + }])); + + let mut locations = LocationListTable::default(); + let loc_id = locations.add(LocationList(vec![Location::StartEnd { + begin: Address::Constant(0x1234), + end: Address::Constant(0x2345), + data: expression.clone(), + }])); + + for &version in &[2, 3, 4, 5] { + for &address_size in &[4, 8] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + + let mut sections = Sections::new(EndianVec::new(LittleEndian)); + let range_list_offsets = ranges.write(&mut sections, encoding).unwrap(); + let loc_list_offsets = locations.write(&mut sections, encoding, None).unwrap(); + + let read_debug_ranges = + read::DebugRanges::new(sections.debug_ranges.slice(), LittleEndian); + let read_debug_rnglists = + read::DebugRngLists::new(sections.debug_rnglists.slice(), LittleEndian); + + let read_debug_loc = + read::DebugLoc::new(sections.debug_loc.slice(), LittleEndian); + let read_debug_loclists = + read::DebugLocLists::new(sections.debug_loclists.slice(), LittleEndian); + + let mut units = UnitTable::default(); + let unit = units.add(Unit::new(encoding, LineProgram::none())); + let unit = units.get(unit); + let encoding = Encoding { + format, + version, + address_size, + }; + let from_unit = read::UnitHeader::new( + encoding, + 0, + read::UnitType::Compilation, + DebugAbbrevOffset(0), + DebugInfoOffset(0).into(), + read::EndianSlice::new(&[], LittleEndian), + ); + + for (name, value, expect_value) in &[ + ( + constants::DW_AT_name, + AttributeValue::Address(Address::Constant(0x1234)), + read::AttributeValue::Addr(0x1234), + ), + ( + constants::DW_AT_name, + AttributeValue::Block(data.clone()), + read::AttributeValue::Block(read_data), + ), + ( + constants::DW_AT_name, + AttributeValue::Data1(0x12), + read::AttributeValue::Data1(0x12), + ), + ( + constants::DW_AT_name, + AttributeValue::Data2(0x1234), + read::AttributeValue::Data2(0x1234), + ), + ( + constants::DW_AT_name, + AttributeValue::Data4(0x1234), + read::AttributeValue::Data4(0x1234), + ), + ( + constants::DW_AT_name, + AttributeValue::Data8(0x1234), + read::AttributeValue::Data8(0x1234), + ), + ( + constants::DW_AT_name, + AttributeValue::Sdata(0x1234), + read::AttributeValue::Sdata(0x1234), + ), + ( + constants::DW_AT_name, + AttributeValue::Udata(0x1234), + read::AttributeValue::Udata(0x1234), + ), + ( + constants::DW_AT_name, + AttributeValue::Exprloc(expression.clone()), + read::AttributeValue::Exprloc(read_expression), + ), + ( + constants::DW_AT_name, + AttributeValue::Flag(false), + read::AttributeValue::Flag(false), + ), + /* + ( + constants::DW_AT_name, + AttributeValue::FlagPresent, + read::AttributeValue::Flag(true), + ), + */ + ( + constants::DW_AT_name, + AttributeValue::DebugInfoRefSup(DebugInfoOffset(0x1234)), + read::AttributeValue::DebugInfoRefSup(DebugInfoOffset(0x1234)), + ), + ( + constants::DW_AT_location, + AttributeValue::LocationListRef(loc_id), + read::AttributeValue::SecOffset(loc_list_offsets.get(loc_id).0), + ), + ( + constants::DW_AT_macro_info, + AttributeValue::DebugMacinfoRef(DebugMacinfoOffset(0x1234)), + read::AttributeValue::SecOffset(0x1234), + ), + ( + constants::DW_AT_macros, + AttributeValue::DebugMacroRef(DebugMacroOffset(0x1234)), + read::AttributeValue::SecOffset(0x1234), + ), + ( + constants::DW_AT_ranges, + AttributeValue::RangeListRef(range_id), + read::AttributeValue::SecOffset(range_list_offsets.get(range_id).0), + ), + ( + constants::DW_AT_name, + AttributeValue::DebugTypesRef(DebugTypeSignature(0x1234)), + read::AttributeValue::DebugTypesRef(DebugTypeSignature(0x1234)), + ), + ( + constants::DW_AT_name, + AttributeValue::StringRef(string_id), + read::AttributeValue::DebugStrRef(debug_str_offsets.get(string_id)), + ), + ( + constants::DW_AT_name, + AttributeValue::DebugStrRefSup(DebugStrOffset(0x1234)), + read::AttributeValue::DebugStrRefSup(DebugStrOffset(0x1234)), + ), + ( + constants::DW_AT_name, + AttributeValue::LineStringRef(line_string_id), + read::AttributeValue::DebugLineStrRef( + debug_line_str_offsets.get(line_string_id), + ), + ), + ( + constants::DW_AT_name, + AttributeValue::String(data.clone()), + read::AttributeValue::String(read_data), + ), + ( + constants::DW_AT_encoding, + AttributeValue::Encoding(constants::DwAte(0x12)), + read::AttributeValue::Udata(0x12), + ), + ( + constants::DW_AT_decimal_sign, + AttributeValue::DecimalSign(constants::DwDs(0x12)), + read::AttributeValue::Udata(0x12), + ), + ( + constants::DW_AT_endianity, + AttributeValue::Endianity(constants::DwEnd(0x12)), + read::AttributeValue::Udata(0x12), + ), + ( + constants::DW_AT_accessibility, + AttributeValue::Accessibility(constants::DwAccess(0x12)), + read::AttributeValue::Udata(0x12), + ), + ( + constants::DW_AT_visibility, + AttributeValue::Visibility(constants::DwVis(0x12)), + read::AttributeValue::Udata(0x12), + ), + ( + constants::DW_AT_virtuality, + AttributeValue::Virtuality(constants::DwVirtuality(0x12)), + read::AttributeValue::Udata(0x12), + ), + ( + constants::DW_AT_language, + AttributeValue::Language(constants::DwLang(0x12)), + read::AttributeValue::Udata(0x12), + ), + ( + constants::DW_AT_address_class, + AttributeValue::AddressClass(constants::DwAddr(0x12)), + read::AttributeValue::Udata(0x12), + ), + ( + constants::DW_AT_identifier_case, + AttributeValue::IdentifierCase(constants::DwId(0x12)), + read::AttributeValue::Udata(0x12), + ), + ( + constants::DW_AT_calling_convention, + AttributeValue::CallingConvention(constants::DwCc(0x12)), + read::AttributeValue::Udata(0x12), + ), + ( + constants::DW_AT_ordering, + AttributeValue::Ordering(constants::DwOrd(0x12)), + read::AttributeValue::Udata(0x12), + ), + ( + constants::DW_AT_inline, + AttributeValue::Inline(constants::DwInl(0x12)), + read::AttributeValue::Udata(0x12), + ), + ][..] + { + let form = value.form(encoding).unwrap(); + let attr = Attribute { + name: *name, + value: value.clone(), + }; + + let offsets = UnitOffsets::none(); + let line_program_offset = None; + let mut debug_info_refs = Vec::new(); + let mut unit_refs = Vec::new(); + let mut debug_info = DebugInfo::from(EndianVec::new(LittleEndian)); + attr.value + .write( + &mut debug_info, + &mut debug_info_refs, + &mut unit_refs, + unit, + &offsets, + line_program_offset, + &debug_line_str_offsets, + &debug_str_offsets, + &range_list_offsets, + &loc_list_offsets, + ) + .unwrap(); + + let spec = read::AttributeSpecification::new(*name, form, None); + let mut r = read::EndianSlice::new(debug_info.slice(), LittleEndian); + let read_attr = read::parse_attribute(&mut r, encoding, spec).unwrap(); + let read_value = &read_attr.raw_value(); + // read::AttributeValue is invariant in the lifetime of R. + // The lifetimes here are all okay, so transmute it. + let read_value = unsafe { + mem::transmute::< + &read::AttributeValue>, + &read::AttributeValue>, + >(read_value) + }; + assert_eq!(read_value, expect_value); + + let dwarf = read::Dwarf { + debug_str: read_debug_str, + debug_line_str: read_debug_line_str, + ranges: read::RangeLists::new(read_debug_ranges, read_debug_rnglists), + locations: read::LocationLists::new( + read_debug_loc, + read_debug_loclists, + ), + ..Default::default() + }; + + let unit = read::Unit { + header: from_unit, + abbreviations: Arc::new(read::Abbreviations::default()), + name: None, + comp_dir: None, + low_pc: 0, + str_offsets_base: DebugStrOffsetsBase(0), + addr_base: DebugAddrBase(0), + loclists_base: DebugLocListsBase(0), + rnglists_base: DebugRngListsBase(0), + line_program: None, + dwo_id: None, + }; + + let mut context = convert::ConvertUnitContext { + dwarf: &dwarf, + unit: &unit, + line_strings: &mut line_strings, + strings: &mut strings, + ranges: &mut ranges, + locations: &mut locations, + convert_address: &|address| Some(Address::Constant(address)), + base_address: Address::Constant(0), + line_program_offset: None, + line_program_files: Vec::new(), + entry_ids: &HashMap::new(), + }; + + let convert_attr = + Attribute::from(&mut context, &read_attr).unwrap().unwrap(); + assert_eq!(convert_attr, attr); + } + } + } + } + } + + #[test] + fn test_unit_ref() { + let mut units = UnitTable::default(); + let unit_id1 = units.add(Unit::new( + Encoding { + version: 4, + address_size: 8, + format: Format::Dwarf32, + }, + LineProgram::none(), + )); + assert_eq!(unit_id1, units.id(0)); + let unit_id2 = units.add(Unit::new( + Encoding { + version: 2, + address_size: 4, + format: Format::Dwarf64, + }, + LineProgram::none(), + )); + assert_eq!(unit_id2, units.id(1)); + let unit1_child1 = UnitEntryId::new(units.get(unit_id1).base_id, 1); + let unit1_child2 = UnitEntryId::new(units.get(unit_id1).base_id, 2); + let unit2_child1 = UnitEntryId::new(units.get(unit_id2).base_id, 1); + let unit2_child2 = UnitEntryId::new(units.get(unit_id2).base_id, 2); + { + let unit1 = units.get_mut(unit_id1); + let root = unit1.root(); + let child_id1 = unit1.add(root, constants::DW_TAG_subprogram); + assert_eq!(child_id1, unit1_child1); + let child_id2 = unit1.add(root, constants::DW_TAG_subprogram); + assert_eq!(child_id2, unit1_child2); + { + let child1 = unit1.get_mut(child_id1); + child1.set(constants::DW_AT_type, AttributeValue::UnitRef(child_id2)); + } + { + let child2 = unit1.get_mut(child_id2); + child2.set( + constants::DW_AT_type, + AttributeValue::DebugInfoRef(Reference::Entry(unit_id2, unit2_child1)), + ); + } + } + { + let unit2 = units.get_mut(unit_id2); + let root = unit2.root(); + let child_id1 = unit2.add(root, constants::DW_TAG_subprogram); + assert_eq!(child_id1, unit2_child1); + let child_id2 = unit2.add(root, constants::DW_TAG_subprogram); + assert_eq!(child_id2, unit2_child2); + { + let child1 = unit2.get_mut(child_id1); + child1.set(constants::DW_AT_type, AttributeValue::UnitRef(child_id2)); + } + { + let child2 = unit2.get_mut(child_id2); + child2.set( + constants::DW_AT_type, + AttributeValue::DebugInfoRef(Reference::Entry(unit_id1, unit1_child1)), + ); + } + } + + let debug_line_str_offsets = DebugLineStrOffsets::none(); + let debug_str_offsets = DebugStrOffsets::none(); + let mut sections = Sections::new(EndianVec::new(LittleEndian)); + let debug_info_offsets = units + .write(&mut sections, &debug_line_str_offsets, &debug_str_offsets) + .unwrap(); + + println!("{:?}", sections.debug_info); + println!("{:?}", sections.debug_abbrev); + + let dwarf = read::Dwarf { + debug_abbrev: read::DebugAbbrev::new(sections.debug_abbrev.slice(), LittleEndian), + debug_info: read::DebugInfo::new(sections.debug_info.slice(), LittleEndian), + ..Default::default() + }; + + let mut read_units = dwarf.units(); + { + let read_unit1 = read_units.next().unwrap().unwrap(); + assert_eq!( + read_unit1.offset(), + debug_info_offsets.unit(unit_id1).into() + ); + + let abbrevs = dwarf.abbreviations(&read_unit1).unwrap(); + let mut read_entries = read_unit1.entries(&abbrevs); + { + let (_, _read_root) = read_entries.next_dfs().unwrap().unwrap(); + } + { + let (_, read_child1) = read_entries.next_dfs().unwrap().unwrap(); + let offset = debug_info_offsets + .entry(unit_id1, unit1_child2) + .to_unit_offset(&read_unit1) + .unwrap(); + assert_eq!( + read_child1.attr_value(constants::DW_AT_type).unwrap(), + Some(read::AttributeValue::UnitRef(offset)) + ); + } + { + let (_, read_child2) = read_entries.next_dfs().unwrap().unwrap(); + let offset = debug_info_offsets.entry(unit_id2, unit2_child1); + assert_eq!( + read_child2.attr_value(constants::DW_AT_type).unwrap(), + Some(read::AttributeValue::DebugInfoRef(offset)) + ); + } + } + { + let read_unit2 = read_units.next().unwrap().unwrap(); + assert_eq!( + read_unit2.offset(), + debug_info_offsets.unit(unit_id2).into() + ); + + let abbrevs = dwarf.abbreviations(&read_unit2).unwrap(); + let mut read_entries = read_unit2.entries(&abbrevs); + { + let (_, _read_root) = read_entries.next_dfs().unwrap().unwrap(); + } + { + let (_, read_child1) = read_entries.next_dfs().unwrap().unwrap(); + let offset = debug_info_offsets + .entry(unit_id2, unit2_child2) + .to_unit_offset(&read_unit2) + .unwrap(); + assert_eq!( + read_child1.attr_value(constants::DW_AT_type).unwrap(), + Some(read::AttributeValue::UnitRef(offset)) + ); + } + { + let (_, read_child2) = read_entries.next_dfs().unwrap().unwrap(); + let offset = debug_info_offsets.entry(unit_id1, unit1_child1); + assert_eq!( + read_child2.attr_value(constants::DW_AT_type).unwrap(), + Some(read::AttributeValue::DebugInfoRef(offset)) + ); + } + } + + let mut convert_line_strings = LineStringTable::default(); + let mut convert_strings = StringTable::default(); + let convert_units = UnitTable::from( + &dwarf, + &mut convert_line_strings, + &mut convert_strings, + &|address| Some(Address::Constant(address)), + ) + .unwrap(); + assert_eq!(convert_units.count(), units.count()); + + for i in 0..convert_units.count() { + let unit = units.get(units.id(i)); + let convert_unit = convert_units.get(convert_units.id(i)); + assert_eq!(convert_unit.version(), unit.version()); + assert_eq!(convert_unit.address_size(), unit.address_size()); + assert_eq!(convert_unit.format(), unit.format()); + assert_eq!(convert_unit.count(), unit.count()); + + let root = unit.get(unit.root()); + let convert_root = convert_unit.get(convert_unit.root()); + assert_eq!(convert_root.tag(), root.tag()); + for (convert_attr, attr) in convert_root.attrs().zip(root.attrs()) { + assert_eq!(convert_attr, attr); + } + + let child1 = unit.get(UnitEntryId::new(unit.base_id, 1)); + let convert_child1 = convert_unit.get(UnitEntryId::new(convert_unit.base_id, 1)); + assert_eq!(convert_child1.tag(), child1.tag()); + for (convert_attr, attr) in convert_child1.attrs().zip(child1.attrs()) { + assert_eq!(convert_attr.name, attr.name); + match (convert_attr.value.clone(), attr.value.clone()) { + ( + AttributeValue::DebugInfoRef(Reference::Entry(convert_unit, convert_entry)), + AttributeValue::DebugInfoRef(Reference::Entry(unit, entry)), + ) => { + assert_eq!(convert_unit.index, unit.index); + assert_eq!(convert_entry.index, entry.index); + } + (AttributeValue::UnitRef(convert_id), AttributeValue::UnitRef(id)) => { + assert_eq!(convert_id.index, id.index); + } + (convert_value, value) => assert_eq!(convert_value, value), + } + } + + let child2 = unit.get(UnitEntryId::new(unit.base_id, 2)); + let convert_child2 = convert_unit.get(UnitEntryId::new(convert_unit.base_id, 2)); + assert_eq!(convert_child2.tag(), child2.tag()); + for (convert_attr, attr) in convert_child2.attrs().zip(child2.attrs()) { + assert_eq!(convert_attr.name, attr.name); + match (convert_attr.value.clone(), attr.value.clone()) { + ( + AttributeValue::DebugInfoRef(Reference::Entry(convert_unit, convert_entry)), + AttributeValue::DebugInfoRef(Reference::Entry(unit, entry)), + ) => { + assert_eq!(convert_unit.index, unit.index); + assert_eq!(convert_entry.index, entry.index); + } + (AttributeValue::UnitRef(convert_id), AttributeValue::UnitRef(id)) => { + assert_eq!(convert_id.index, id.index); + } + (convert_value, value) => assert_eq!(convert_value, value), + } + } + } + } + + #[test] + fn test_sibling() { + fn add_child( + unit: &mut Unit, + parent: UnitEntryId, + tag: constants::DwTag, + name: &str, + ) -> UnitEntryId { + let id = unit.add(parent, tag); + let child = unit.get_mut(id); + child.set(constants::DW_AT_name, AttributeValue::String(name.into())); + child.set_sibling(true); + id + } + + fn add_children(units: &mut UnitTable, unit_id: UnitId) { + let unit = units.get_mut(unit_id); + let root = unit.root(); + let child1 = add_child(unit, root, constants::DW_TAG_subprogram, "child1"); + add_child(unit, child1, constants::DW_TAG_variable, "grandchild1"); + add_child(unit, root, constants::DW_TAG_subprogram, "child2"); + add_child(unit, root, constants::DW_TAG_subprogram, "child3"); + } + + fn next_child>( + entries: &mut read::EntriesCursor<'_, '_, R>, + ) -> (read::UnitOffset, Option) { + let (_, entry) = entries.next_dfs().unwrap().unwrap(); + let offset = entry.offset(); + let sibling = + entry + .attr_value(constants::DW_AT_sibling) + .unwrap() + .map(|attr| match attr { + read::AttributeValue::UnitRef(offset) => offset, + _ => panic!("bad sibling value"), + }); + (offset, sibling) + } + + fn check_sibling>( + unit: &read::UnitHeader, + debug_abbrev: &read::DebugAbbrev, + ) { + let abbrevs = unit.abbreviations(debug_abbrev).unwrap(); + let mut entries = unit.entries(&abbrevs); + // root + entries.next_dfs().unwrap().unwrap(); + // child1 + let (_, sibling1) = next_child(&mut entries); + // grandchild1 + entries.next_dfs().unwrap().unwrap(); + // child2 + let (offset2, sibling2) = next_child(&mut entries); + // child3 + let (_, _) = next_child(&mut entries); + assert_eq!(sibling1, Some(offset2)); + assert_eq!(sibling2, None); + } + + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 8, + }; + let mut units = UnitTable::default(); + let unit_id1 = units.add(Unit::new(encoding, LineProgram::none())); + add_children(&mut units, unit_id1); + let unit_id2 = units.add(Unit::new(encoding, LineProgram::none())); + add_children(&mut units, unit_id2); + + let debug_line_str_offsets = DebugLineStrOffsets::none(); + let debug_str_offsets = DebugStrOffsets::none(); + let mut sections = Sections::new(EndianVec::new(LittleEndian)); + units + .write(&mut sections, &debug_line_str_offsets, &debug_str_offsets) + .unwrap(); + + println!("{:?}", sections.debug_info); + println!("{:?}", sections.debug_abbrev); + + let read_debug_info = read::DebugInfo::new(sections.debug_info.slice(), LittleEndian); + let read_debug_abbrev = read::DebugAbbrev::new(sections.debug_abbrev.slice(), LittleEndian); + let mut read_units = read_debug_info.units(); + check_sibling(&read_units.next().unwrap().unwrap(), &read_debug_abbrev); + check_sibling(&read_units.next().unwrap().unwrap(), &read_debug_abbrev); + } + + #[test] + fn test_line_ref() { + for &version in &[2, 3, 4, 5] { + for &address_size in &[4, 8] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + + // The line program we'll be referencing. + let mut line_program = LineProgram::new( + encoding, + LineEncoding::default(), + LineString::String(b"comp_dir".to_vec()), + LineString::String(b"comp_name".to_vec()), + None, + ); + let dir = line_program.default_directory(); + let file1 = + line_program.add_file(LineString::String(b"file1".to_vec()), dir, None); + let file2 = + line_program.add_file(LineString::String(b"file2".to_vec()), dir, None); + + // Write, read, and convert the line program, so that we have the info + // required to convert the attributes. + let line_strings = DebugLineStrOffsets::none(); + let strings = DebugStrOffsets::none(); + let mut debug_line = DebugLine::from(EndianVec::new(LittleEndian)); + let line_program_offset = line_program + .write(&mut debug_line, encoding, &line_strings, &strings) + .unwrap(); + let read_debug_line = read::DebugLine::new(debug_line.slice(), LittleEndian); + let read_line_program = read_debug_line + .program( + line_program_offset, + address_size, + Some(read::EndianSlice::new(b"comp_dir", LittleEndian)), + Some(read::EndianSlice::new(b"comp_name", LittleEndian)), + ) + .unwrap(); + let dwarf = read::Dwarf::default(); + let mut convert_line_strings = LineStringTable::default(); + let mut convert_strings = StringTable::default(); + let (_, line_program_files) = LineProgram::from( + read_line_program, + &dwarf, + &mut convert_line_strings, + &mut convert_strings, + &|address| Some(Address::Constant(address)), + ) + .unwrap(); + + // Fake the unit. + let mut units = UnitTable::default(); + let unit = units.add(Unit::new(encoding, LineProgram::none())); + let unit = units.get(unit); + let from_unit = read::UnitHeader::new( + encoding, + 0, + read::UnitType::Compilation, + DebugAbbrevOffset(0), + DebugInfoOffset(0).into(), + read::EndianSlice::new(&[], LittleEndian), + ); + + for (name, value, expect_value) in &[ + ( + constants::DW_AT_stmt_list, + AttributeValue::LineProgramRef, + read::AttributeValue::SecOffset(line_program_offset.0), + ), + ( + constants::DW_AT_decl_file, + AttributeValue::FileIndex(Some(file1)), + read::AttributeValue::Udata(file1.raw()), + ), + ( + constants::DW_AT_decl_file, + AttributeValue::FileIndex(Some(file2)), + read::AttributeValue::Udata(file2.raw()), + ), + ][..] + { + let mut ranges = RangeListTable::default(); + let mut locations = LocationListTable::default(); + let mut strings = StringTable::default(); + let mut line_strings = LineStringTable::default(); + + let form = value.form(encoding).unwrap(); + let attr = Attribute { + name: *name, + value: value.clone(), + }; + + let mut debug_info_refs = Vec::new(); + let mut unit_refs = Vec::new(); + let mut debug_info = DebugInfo::from(EndianVec::new(LittleEndian)); + let offsets = UnitOffsets::none(); + let debug_line_str_offsets = DebugLineStrOffsets::none(); + let debug_str_offsets = DebugStrOffsets::none(); + let range_list_offsets = RangeListOffsets::none(); + let loc_list_offsets = LocationListOffsets::none(); + attr.value + .write( + &mut debug_info, + &mut debug_info_refs, + &mut unit_refs, + unit, + &offsets, + Some(line_program_offset), + &debug_line_str_offsets, + &debug_str_offsets, + &range_list_offsets, + &loc_list_offsets, + ) + .unwrap(); + + let spec = read::AttributeSpecification::new(*name, form, None); + let mut r = read::EndianSlice::new(debug_info.slice(), LittleEndian); + let read_attr = read::parse_attribute(&mut r, encoding, spec).unwrap(); + let read_value = &read_attr.raw_value(); + // read::AttributeValue is invariant in the lifetime of R. + // The lifetimes here are all okay, so transmute it. + let read_value = unsafe { + mem::transmute::< + &read::AttributeValue>, + &read::AttributeValue>, + >(read_value) + }; + assert_eq!(read_value, expect_value); + + let unit = read::Unit { + header: from_unit, + abbreviations: Arc::new(read::Abbreviations::default()), + name: None, + comp_dir: None, + low_pc: 0, + str_offsets_base: DebugStrOffsetsBase(0), + addr_base: DebugAddrBase(0), + loclists_base: DebugLocListsBase(0), + rnglists_base: DebugRngListsBase(0), + line_program: None, + dwo_id: None, + }; + + let mut context = convert::ConvertUnitContext { + dwarf: &dwarf, + unit: &unit, + line_strings: &mut line_strings, + strings: &mut strings, + ranges: &mut ranges, + locations: &mut locations, + convert_address: &|address| Some(Address::Constant(address)), + base_address: Address::Constant(0), + line_program_offset: Some(line_program_offset), + line_program_files: line_program_files.clone(), + entry_ids: &HashMap::new(), + }; + + let convert_attr = + Attribute::from(&mut context, &read_attr).unwrap().unwrap(); + assert_eq!(convert_attr, attr); + } + } + } + } + } + + #[test] + fn test_line_program_used() { + for used in [false, true] { + let encoding = Encoding { + format: Format::Dwarf32, + version: 5, + address_size: 8, + }; + + let line_program = LineProgram::new( + encoding, + LineEncoding::default(), + LineString::String(b"comp_dir".to_vec()), + LineString::String(b"comp_name".to_vec()), + None, + ); + + let mut unit = Unit::new(encoding, line_program); + let file_id = if used { Some(FileId::new(0)) } else { None }; + let root = unit.root(); + unit.get_mut(root).set( + constants::DW_AT_decl_file, + AttributeValue::FileIndex(file_id), + ); + + let mut units = UnitTable::default(); + units.add(unit); + + let debug_line_str_offsets = DebugLineStrOffsets::none(); + let debug_str_offsets = DebugStrOffsets::none(); + let mut sections = Sections::new(EndianVec::new(LittleEndian)); + units + .write(&mut sections, &debug_line_str_offsets, &debug_str_offsets) + .unwrap(); + assert_eq!(!used, sections.debug_line.slice().is_empty()); + } + } + + #[test] + fn test_delete_child() { + fn set_name(unit: &mut Unit, id: UnitEntryId, name: &str) { + let entry = unit.get_mut(id); + entry.set(constants::DW_AT_name, AttributeValue::String(name.into())); + } + fn check_name( + entry: &read::DebuggingInformationEntry<'_, '_, R>, + debug_str: &read::DebugStr, + name: &str, + ) { + let name_attr = entry.attr(constants::DW_AT_name).unwrap().unwrap(); + let entry_name = name_attr.string_value(debug_str).unwrap(); + let entry_name_str = entry_name.to_string().unwrap(); + assert_eq!(entry_name_str, name); + } + let encoding = Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 8, + }; + let mut dwarf = DwarfUnit::new(encoding); + let root = dwarf.unit.root(); + + // Add and delete entries in the root unit + let child1 = dwarf.unit.add(root, constants::DW_TAG_subprogram); + set_name(&mut dwarf.unit, child1, "child1"); + let grandchild1 = dwarf.unit.add(child1, constants::DW_TAG_variable); + set_name(&mut dwarf.unit, grandchild1, "grandchild1"); + let child2 = dwarf.unit.add(root, constants::DW_TAG_subprogram); + set_name(&mut dwarf.unit, child2, "child2"); + // This deletes both `child1` and its child `grandchild1` + dwarf.unit.get_mut(root).delete_child(child1); + let child3 = dwarf.unit.add(root, constants::DW_TAG_subprogram); + set_name(&mut dwarf.unit, child3, "child3"); + let child4 = dwarf.unit.add(root, constants::DW_TAG_subprogram); + set_name(&mut dwarf.unit, child4, "child4"); + let grandchild4 = dwarf.unit.add(child4, constants::DW_TAG_variable); + set_name(&mut dwarf.unit, grandchild4, "grandchild4"); + dwarf.unit.get_mut(child4).delete_child(grandchild4); + + let mut sections = Sections::new(EndianVec::new(LittleEndian)); + + // Write DWARF data which should only include `child2`, `child3` and `child4` + dwarf.write(&mut sections).unwrap(); + + let read_debug_info = read::DebugInfo::new(sections.debug_info.slice(), LittleEndian); + let read_debug_abbrev = read::DebugAbbrev::new(sections.debug_abbrev.slice(), LittleEndian); + let read_debug_str = read::DebugStr::new(sections.debug_str.slice(), LittleEndian); + let read_unit = read_debug_info.units().next().unwrap().unwrap(); + let abbrevs = read_unit.abbreviations(&read_debug_abbrev).unwrap(); + let mut entries = read_unit.entries(&abbrevs); + // root + entries.next_dfs().unwrap().unwrap(); + // child2 + let (_, read_child2) = entries.next_dfs().unwrap().unwrap(); + check_name(read_child2, &read_debug_str, "child2"); + // child3 + let (_, read_child3) = entries.next_dfs().unwrap().unwrap(); + check_name(read_child3, &read_debug_str, "child3"); + // child4 + let (_, read_child4) = entries.next_dfs().unwrap().unwrap(); + check_name(read_child4, &read_debug_str, "child4"); + // There should be no more entries + assert!(entries.next_dfs().unwrap().is_none()); + } +} diff --git a/third_party/rust/gimli/src/write/writer.rs b/third_party/rust/gimli/src/write/writer.rs new file mode 100644 index 000000000000..1ce3641fca5b --- /dev/null +++ b/third_party/rust/gimli/src/write/writer.rs @@ -0,0 +1,494 @@ +use crate::common::{Format, SectionId}; +use crate::constants; +use crate::endianity::Endianity; +use crate::leb128; +use crate::write::{Address, Error, Result}; + +/// A trait for writing the data to a DWARF section. +/// +/// All write operations append to the section unless otherwise specified. +#[allow(clippy::len_without_is_empty)] +pub trait Writer { + /// The endianity of bytes that are written. + type Endian: Endianity; + + /// Return the endianity of bytes that are written. + fn endian(&self) -> Self::Endian; + + /// Return the current section length. + /// + /// This may be used as an offset for future `write_at` calls. + fn len(&self) -> usize; + + /// Write a slice. + fn write(&mut self, bytes: &[u8]) -> Result<()>; + + /// Write a slice at a given offset. + /// + /// The write must not extend past the current section length. + fn write_at(&mut self, offset: usize, bytes: &[u8]) -> Result<()>; + + /// Write an address. + /// + /// If the writer supports relocations, then it must provide its own implementation + /// of this method. + // TODO: use write_reference instead? + fn write_address(&mut self, address: Address, size: u8) -> Result<()> { + match address { + Address::Constant(val) => self.write_udata(val, size), + Address::Symbol { .. } => Err(Error::InvalidAddress), + } + } + + /// Write an address with a `.eh_frame` pointer encoding. + /// + /// The given size is only used for `DW_EH_PE_absptr` formats. + /// + /// If the writer supports relocations, then it must provide its own implementation + /// of this method. + fn write_eh_pointer( + &mut self, + address: Address, + eh_pe: constants::DwEhPe, + size: u8, + ) -> Result<()> { + match address { + Address::Constant(val) => { + // Indirect doesn't matter here. + let val = match eh_pe.application() { + constants::DW_EH_PE_absptr => val, + constants::DW_EH_PE_pcrel => { + // TODO: better handling of sign + let offset = self.len() as u64; + val.wrapping_sub(offset) + } + _ => { + return Err(Error::UnsupportedPointerEncoding(eh_pe)); + } + }; + self.write_eh_pointer_data(val, eh_pe.format(), size) + } + Address::Symbol { .. } => Err(Error::InvalidAddress), + } + } + + /// Write a value with a `.eh_frame` pointer format. + /// + /// The given size is only used for `DW_EH_PE_absptr` formats. + /// + /// This must not be used directly for values that may require relocation. + fn write_eh_pointer_data( + &mut self, + val: u64, + format: constants::DwEhPe, + size: u8, + ) -> Result<()> { + match format { + constants::DW_EH_PE_absptr => self.write_udata(val, size), + constants::DW_EH_PE_uleb128 => self.write_uleb128(val), + constants::DW_EH_PE_udata2 => self.write_udata(val, 2), + constants::DW_EH_PE_udata4 => self.write_udata(val, 4), + constants::DW_EH_PE_udata8 => self.write_udata(val, 8), + constants::DW_EH_PE_sleb128 => self.write_sleb128(val as i64), + constants::DW_EH_PE_sdata2 => self.write_sdata(val as i64, 2), + constants::DW_EH_PE_sdata4 => self.write_sdata(val as i64, 4), + constants::DW_EH_PE_sdata8 => self.write_sdata(val as i64, 8), + _ => Err(Error::UnsupportedPointerEncoding(format)), + } + } + + /// Write an offset that is relative to the start of the given section. + /// + /// If the writer supports relocations, then it must provide its own implementation + /// of this method. + fn write_offset(&mut self, val: usize, _section: SectionId, size: u8) -> Result<()> { + self.write_udata(val as u64, size) + } + + /// Write an offset that is relative to the start of the given section. + /// + /// If the writer supports relocations, then it must provide its own implementation + /// of this method. + fn write_offset_at( + &mut self, + offset: usize, + val: usize, + _section: SectionId, + size: u8, + ) -> Result<()> { + self.write_udata_at(offset, val as u64, size) + } + + /// Write a reference to a symbol. + /// + /// If the writer supports symbols, then it must provide its own implementation + /// of this method. + fn write_reference(&mut self, _symbol: usize, _size: u8) -> Result<()> { + Err(Error::InvalidReference) + } + + /// Write a u8. + fn write_u8(&mut self, val: u8) -> Result<()> { + let bytes = [val]; + self.write(&bytes) + } + + /// Write a u16. + fn write_u16(&mut self, val: u16) -> Result<()> { + let mut bytes = [0; 2]; + self.endian().write_u16(&mut bytes, val); + self.write(&bytes) + } + + /// Write a u32. + fn write_u32(&mut self, val: u32) -> Result<()> { + let mut bytes = [0; 4]; + self.endian().write_u32(&mut bytes, val); + self.write(&bytes) + } + + /// Write a u64. + fn write_u64(&mut self, val: u64) -> Result<()> { + let mut bytes = [0; 8]; + self.endian().write_u64(&mut bytes, val); + self.write(&bytes) + } + + /// Write a u8 at the given offset. + fn write_u8_at(&mut self, offset: usize, val: u8) -> Result<()> { + let bytes = [val]; + self.write_at(offset, &bytes) + } + + /// Write a u16 at the given offset. + fn write_u16_at(&mut self, offset: usize, val: u16) -> Result<()> { + let mut bytes = [0; 2]; + self.endian().write_u16(&mut bytes, val); + self.write_at(offset, &bytes) + } + + /// Write a u32 at the given offset. + fn write_u32_at(&mut self, offset: usize, val: u32) -> Result<()> { + let mut bytes = [0; 4]; + self.endian().write_u32(&mut bytes, val); + self.write_at(offset, &bytes) + } + + /// Write a u64 at the given offset. + fn write_u64_at(&mut self, offset: usize, val: u64) -> Result<()> { + let mut bytes = [0; 8]; + self.endian().write_u64(&mut bytes, val); + self.write_at(offset, &bytes) + } + + /// Write unsigned data of the given size. + /// + /// Returns an error if the value is too large for the size. + /// This must not be used directly for values that may require relocation. + fn write_udata(&mut self, val: u64, size: u8) -> Result<()> { + match size { + 1 => { + let write_val = val as u8; + if val != u64::from(write_val) { + return Err(Error::ValueTooLarge); + } + self.write_u8(write_val) + } + 2 => { + let write_val = val as u16; + if val != u64::from(write_val) { + return Err(Error::ValueTooLarge); + } + self.write_u16(write_val) + } + 4 => { + let write_val = val as u32; + if val != u64::from(write_val) { + return Err(Error::ValueTooLarge); + } + self.write_u32(write_val) + } + 8 => self.write_u64(val), + otherwise => Err(Error::UnsupportedWordSize(otherwise)), + } + } + + /// Write signed data of the given size. + /// + /// Returns an error if the value is too large for the size. + /// This must not be used directly for values that may require relocation. + fn write_sdata(&mut self, val: i64, size: u8) -> Result<()> { + match size { + 1 => { + let write_val = val as i8; + if val != i64::from(write_val) { + return Err(Error::ValueTooLarge); + } + self.write_u8(write_val as u8) + } + 2 => { + let write_val = val as i16; + if val != i64::from(write_val) { + return Err(Error::ValueTooLarge); + } + self.write_u16(write_val as u16) + } + 4 => { + let write_val = val as i32; + if val != i64::from(write_val) { + return Err(Error::ValueTooLarge); + } + self.write_u32(write_val as u32) + } + 8 => self.write_u64(val as u64), + otherwise => Err(Error::UnsupportedWordSize(otherwise)), + } + } + + /// Write a word of the given size at the given offset. + /// + /// Returns an error if the value is too large for the size. + /// This must not be used directly for values that may require relocation. + fn write_udata_at(&mut self, offset: usize, val: u64, size: u8) -> Result<()> { + match size { + 1 => { + let write_val = val as u8; + if val != u64::from(write_val) { + return Err(Error::ValueTooLarge); + } + self.write_u8_at(offset, write_val) + } + 2 => { + let write_val = val as u16; + if val != u64::from(write_val) { + return Err(Error::ValueTooLarge); + } + self.write_u16_at(offset, write_val) + } + 4 => { + let write_val = val as u32; + if val != u64::from(write_val) { + return Err(Error::ValueTooLarge); + } + self.write_u32_at(offset, write_val) + } + 8 => self.write_u64_at(offset, val), + otherwise => Err(Error::UnsupportedWordSize(otherwise)), + } + } + + /// Write an unsigned LEB128 encoded integer. + fn write_uleb128(&mut self, val: u64) -> Result<()> { + let mut bytes = [0u8; 10]; + // bytes is long enough so this will never fail. + let len = leb128::write::unsigned(&mut { &mut bytes[..] }, val).unwrap(); + self.write(&bytes[..len]) + } + + /// Read an unsigned LEB128 encoded integer. + fn write_sleb128(&mut self, val: i64) -> Result<()> { + let mut bytes = [0u8; 10]; + // bytes is long enough so this will never fail. + let len = leb128::write::signed(&mut { &mut bytes[..] }, val).unwrap(); + self.write(&bytes[..len]) + } + + /// Write an initial length according to the given DWARF format. + /// + /// This will only write a length of zero, since the length isn't + /// known yet, and a subsequent call to `write_initial_length_at` + /// will write the actual length. + fn write_initial_length(&mut self, format: Format) -> Result { + if format == Format::Dwarf64 { + self.write_u32(0xffff_ffff)?; + } + let offset = InitialLengthOffset(self.len()); + self.write_udata(0, format.word_size())?; + Ok(offset) + } + + /// Write an initial length at the given offset according to the given DWARF format. + /// + /// `write_initial_length` must have previously returned the offset. + fn write_initial_length_at( + &mut self, + offset: InitialLengthOffset, + length: u64, + format: Format, + ) -> Result<()> { + self.write_udata_at(offset.0, length, format.word_size()) + } +} + +/// The offset at which an initial length should be written. +#[derive(Debug, Clone, Copy)] +pub struct InitialLengthOffset(usize); + +#[cfg(test)] +mod tests { + use super::*; + use crate::write; + use crate::{BigEndian, LittleEndian}; + use std::{i64, u64}; + + #[test] + fn test_writer() { + let mut w = write::EndianVec::new(LittleEndian); + w.write_address(Address::Constant(0x1122_3344), 4).unwrap(); + assert_eq!(w.slice(), &[0x44, 0x33, 0x22, 0x11]); + assert_eq!( + w.write_address( + Address::Symbol { + symbol: 0, + addend: 0 + }, + 4 + ), + Err(Error::InvalidAddress) + ); + + let mut w = write::EndianVec::new(LittleEndian); + w.write_offset(0x1122_3344, SectionId::DebugInfo, 4) + .unwrap(); + assert_eq!(w.slice(), &[0x44, 0x33, 0x22, 0x11]); + w.write_offset_at(1, 0x5566, SectionId::DebugInfo, 2) + .unwrap(); + assert_eq!(w.slice(), &[0x44, 0x66, 0x55, 0x11]); + + let mut w = write::EndianVec::new(LittleEndian); + w.write_u8(0x11).unwrap(); + w.write_u16(0x2233).unwrap(); + w.write_u32(0x4455_6677).unwrap(); + w.write_u64(0x8081_8283_8485_8687).unwrap(); + #[rustfmt::skip] + assert_eq!(w.slice(), &[ + 0x11, + 0x33, 0x22, + 0x77, 0x66, 0x55, 0x44, + 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, + ]); + w.write_u8_at(14, 0x11).unwrap(); + w.write_u16_at(12, 0x2233).unwrap(); + w.write_u32_at(8, 0x4455_6677).unwrap(); + w.write_u64_at(0, 0x8081_8283_8485_8687).unwrap(); + #[rustfmt::skip] + assert_eq!(w.slice(), &[ + 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, + 0x77, 0x66, 0x55, 0x44, + 0x33, 0x22, + 0x11, + ]); + + let mut w = write::EndianVec::new(BigEndian); + w.write_u8(0x11).unwrap(); + w.write_u16(0x2233).unwrap(); + w.write_u32(0x4455_6677).unwrap(); + w.write_u64(0x8081_8283_8485_8687).unwrap(); + #[rustfmt::skip] + assert_eq!(w.slice(), &[ + 0x11, + 0x22, 0x33, + 0x44, 0x55, 0x66, 0x77, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + ]); + w.write_u8_at(14, 0x11).unwrap(); + w.write_u16_at(12, 0x2233).unwrap(); + w.write_u32_at(8, 0x4455_6677).unwrap(); + w.write_u64_at(0, 0x8081_8283_8485_8687).unwrap(); + #[rustfmt::skip] + assert_eq!(w.slice(), &[ + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x44, 0x55, 0x66, 0x77, + 0x22, 0x33, + 0x11, + ]); + + let mut w = write::EndianVec::new(LittleEndian); + w.write_udata(0x11, 1).unwrap(); + w.write_udata(0x2233, 2).unwrap(); + w.write_udata(0x4455_6677, 4).unwrap(); + w.write_udata(0x8081_8283_8485_8687, 8).unwrap(); + #[rustfmt::skip] + assert_eq!(w.slice(), &[ + 0x11, + 0x33, 0x22, + 0x77, 0x66, 0x55, 0x44, + 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, + ]); + assert_eq!(w.write_udata(0x100, 1), Err(Error::ValueTooLarge)); + assert_eq!(w.write_udata(0x1_0000, 2), Err(Error::ValueTooLarge)); + assert_eq!(w.write_udata(0x1_0000_0000, 4), Err(Error::ValueTooLarge)); + assert_eq!(w.write_udata(0x00, 3), Err(Error::UnsupportedWordSize(3))); + w.write_udata_at(14, 0x11, 1).unwrap(); + w.write_udata_at(12, 0x2233, 2).unwrap(); + w.write_udata_at(8, 0x4455_6677, 4).unwrap(); + w.write_udata_at(0, 0x8081_8283_8485_8687, 8).unwrap(); + #[rustfmt::skip] + assert_eq!(w.slice(), &[ + 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, + 0x77, 0x66, 0x55, 0x44, + 0x33, 0x22, + 0x11, + ]); + assert_eq!(w.write_udata_at(0, 0x100, 1), Err(Error::ValueTooLarge)); + assert_eq!(w.write_udata_at(0, 0x1_0000, 2), Err(Error::ValueTooLarge)); + assert_eq!( + w.write_udata_at(0, 0x1_0000_0000, 4), + Err(Error::ValueTooLarge) + ); + assert_eq!( + w.write_udata_at(0, 0x00, 3), + Err(Error::UnsupportedWordSize(3)) + ); + + let mut w = write::EndianVec::new(LittleEndian); + w.write_uleb128(0).unwrap(); + assert_eq!(w.slice(), &[0]); + + let mut w = write::EndianVec::new(LittleEndian); + w.write_uleb128(u64::MAX).unwrap(); + assert_eq!( + w.slice(), + &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 1] + ); + + let mut w = write::EndianVec::new(LittleEndian); + w.write_sleb128(0).unwrap(); + assert_eq!(w.slice(), &[0]); + + let mut w = write::EndianVec::new(LittleEndian); + w.write_sleb128(i64::MAX).unwrap(); + assert_eq!( + w.slice(), + &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0] + ); + + let mut w = write::EndianVec::new(LittleEndian); + w.write_sleb128(i64::MIN).unwrap(); + assert_eq!( + w.slice(), + &[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x7f] + ); + + let mut w = write::EndianVec::new(LittleEndian); + let offset = w.write_initial_length(Format::Dwarf32).unwrap(); + assert_eq!(w.slice(), &[0, 0, 0, 0]); + w.write_initial_length_at(offset, 0x1122_3344, Format::Dwarf32) + .unwrap(); + assert_eq!(w.slice(), &[0x44, 0x33, 0x22, 0x11]); + assert_eq!( + w.write_initial_length_at(offset, 0x1_0000_0000, Format::Dwarf32), + Err(Error::ValueTooLarge) + ); + + let mut w = write::EndianVec::new(LittleEndian); + let offset = w.write_initial_length(Format::Dwarf64).unwrap(); + assert_eq!(w.slice(), &[0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0]); + w.write_initial_length_at(offset, 0x1122_3344_5566_7788, Format::Dwarf64) + .unwrap(); + assert_eq!( + w.slice(), + &[0xff, 0xff, 0xff, 0xff, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11] + ); + } +} diff --git a/third_party/rust/macho-unwind-info/.cargo-checksum.json b/third_party/rust/macho-unwind-info/.cargo-checksum.json new file mode 100644 index 000000000000..2686eda9fed5 --- /dev/null +++ b/third_party/rust/macho-unwind-info/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.lock":"279c9a1d482eedf9fb1949feff63d0284b803cef11c7082ed7cf514f9824c32b","Cargo.toml":"44f68aba549a45a56c05db6e15a45318abc3f09006bb75b0c54b791af9cbe3b5","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"9ec2734f45b0d65192b9fee2307e05176b805a19efa994a553dcc5b2d3219a1e","Readme.md":"9e20773485c5f973d4516737abfac515768b60771ff36a524458ac2badf1e3da","examples/unwindinfodump.rs":"16862a4a2b90b1be47d747fc75e54f6ff408c2fccf3a461239940ff5ed2134c9","examples/unwindinfolookup.rs":"16c9d24f16f01661300b6b4e4a2fd1c61086657e4a48c6cde8f20d445922ab6f","src/error.rs":"4b0d768256e13e88f76c1016ac9f7034829bbc9f2e6be85e18407a375c0c0f9a","src/lib.rs":"650be825a1592f33003a1d0effb93bd0cc49edca3a3a93825edf0e00f2724087","src/num_display.rs":"f71924d4a40a750e43bf3c8056b0d46a90ef247bf49623cbb792b58b9b7bf003","src/opcodes/arm64.rs":"9376ab0c69ac1acf9fb171a2985993c800aabb9ffca3f0705f57fa9164640319","src/opcodes/bitfield.rs":"923d1eacd2dd2155660593b27905bc035e37726f98433af30368743aa5536603","src/opcodes/mod.rs":"f24c84fbb763ac12105530df23a6f38547a9be0f93178589bb6e37a9797626b2","src/opcodes/permutation.rs":"51b58e66cd3dcad06b341bde245dad3cb0df385902698a8f5b89d00cb2cb9e75","src/opcodes/x86.rs":"49901dc76a2b8adcce51725260c0752c3dc67fd8a903497156172e2f58fe0535","src/opcodes/x86_64.rs":"9b1058048d3a8437f25eb2549af590480f2d76d595005aaa8544d774846c9dfc","src/raw/compressed_function.rs":"f783f2861dfc9c18008fcfe14c24bf30a65c4853a2bd5a9711b5392b4ee58bcf","src/raw/consts.rs":"caf544de186ca92fb9796d54e49f9aadfbd2426372eb8abb04f96ec55e142314","src/raw/format.rs":"5028b26d90abcc31916a3a0a567f76b56ddaa8c4d88eac8fc47be11d0dab2152","src/raw/impls.rs":"f6be417e9aec22d57ba7405f828891166e8fa4ae14b86ed3cba117619ed0be3d","src/raw/mod.rs":"aee86391ebd7c1471ce9258934dc956557e6dbebd4bd946f39f81c3150ed07a5","src/raw/unaligned.rs":"4dd83054b4b625949a21e3c79a503516d1cc00779959766be1baba5da77a42e9","src/reader.rs":"45482fbf9a68194cc92146010461ee09ee5e2f6cc5f21458daafd27887788962"},"package":"6b6086acc74bc23f56b60e88bb082d505e23849d68d6c0f12bb6a7ad5c60e03e"} \ No newline at end of file diff --git a/third_party/rust/macho-unwind-info/Cargo.lock b/third_party/rust/macho-unwind-info/Cargo.lock new file mode 100644 index 000000000000..781900da5f16 --- /dev/null +++ b/third_party/rust/macho-unwind-info/Cargo.lock @@ -0,0 +1,201 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "macho-unwind-info" +version = "0.4.0" +dependencies = [ + "object", + "thiserror", + "zerocopy", + "zerocopy-derive", +] + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "flate2", + "memchr", + "ruzstd", +] + +[[package]] +name = "proc-macro2" +version = "1.0.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ruzstd" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58c4eb8a81997cf040a091d1f7e1938aeab6749d3a0dfa73af43cdc32393483d" +dependencies = [ + "byteorder", + "derive_more", + "twox-hash", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] diff --git a/third_party/rust/macho-unwind-info/Cargo.toml b/third_party/rust/macho-unwind-info/Cargo.toml new file mode 100644 index 000000000000..9ed7a0ea734b --- /dev/null +++ b/third_party/rust/macho-unwind-info/Cargo.toml @@ -0,0 +1,51 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +name = "macho-unwind-info" +version = "0.4.0" +authors = ["Markus Stange "] +exclude = [ + "/.github", + "/tests", + "/fixtures", +] +description = "A parser for Apple's Compact Unwinding Format, which is used in the __unwind_info section of mach-O binaries." +readme = "Readme.md" +keywords = [ + "unwinding", + "exception", + "apple", + "object", + "parser", +] +categories = ["development-tools::debugging"] +license = "MIT/Apache-2.0" +repository = "https://github.com/mstange/macho-unwind-info" + +[[example]] +name = "unwindinfodump" + +[[example]] +name = "unwindinfolookup" + +[dependencies.thiserror] +version = "1.0.56" + +[dependencies.zerocopy] +version = "0.7.32" + +[dependencies.zerocopy-derive] +version = "0.7.32" + +[dev-dependencies.object] +version = "0.32.2" diff --git a/third_party/rust/macho-unwind-info/LICENSE-APACHE b/third_party/rust/macho-unwind-info/LICENSE-APACHE new file mode 100644 index 000000000000..16fe87b06e80 --- /dev/null +++ b/third_party/rust/macho-unwind-info/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/third_party/rust/macho-unwind-info/LICENSE-MIT b/third_party/rust/macho-unwind-info/LICENSE-MIT new file mode 100644 index 000000000000..e9485b4d39f4 --- /dev/null +++ b/third_party/rust/macho-unwind-info/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2018 Markus Stange + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/macho-unwind-info/Readme.md b/third_party/rust/macho-unwind-info/Readme.md new file mode 100644 index 000000000000..1b4e03dc2293 --- /dev/null +++ b/third_party/rust/macho-unwind-info/Readme.md @@ -0,0 +1,74 @@ +[![crates.io page](https://img.shields.io/crates/v/macho-unwind-info.svg)](https://crates.io/crates/macho-unwind-info) +[![docs.rs page](https://docs.rs/macho-unwind-info/badge.svg)](https://docs.rs/macho-unwind-info/) + +# macho-unwind-info + +A zero-copy parser for the contents of the `__unwind_info` section of a +mach-O binary. + +Quickly look up the unwinding opcode for an address. Then parse the opcode to find +out how to recover the return address and the caller frame's register values. + +This crate is intended to be fast enough to be used in a sampling profiler. +Re-parsing from scratch is cheap and can be done on every sample. + +For the full unwinding experience, both `__unwind_info` and `__eh_frame` may need +to be consulted. The two sections are complementary: `__unwind_info` handles the +easy cases, and refers to an `__eh_frame` FDE for the hard cases. Conversely, +`__eh_frame` only includes FDEs for functions whose unwinding info cannot be +represented in `__unwind_info`. + +On x86 and x86_64, `__unwind_info` can represent most functions regardless of +whether they were compiled with framepointers or without. + +On arm64, compiling without framepointers is strongly discouraged, and +`__unwind_info` can only represent functions which have framepointers or +which don't need to restore any registers. As a result, if you have an arm64 +binary without framepointers (rare!), then the `__unwind_info` basically just +acts as an index for `__eh_frame`, similarly to `.eh_frame_hdr` for ELF. + +In clang's default configuration for arm64, non-leaf functions have framepointers +and leaf functions without stored registers on the stack don't have framepointers. +For leaf functions, the return address is kept in the `lr` register for the entire +duration of the function. And the unwind info lets you discern between these two +types of functions ("frame-based" and "frameless"). + +## Example + +```rust +use macho_unwind_info::UnwindInfo; +use macho_unwind_info::opcodes::OpcodeX86_64; + +let unwind_info = UnwindInfo::parse(data)?; + +if let Some(function) = unwind_info.lookup(0x1234)? { + println!("Found function entry covering the address 0x1234:"); + let opcode = OpcodeX86_64::parse(function.opcode); + println!("0x{:08x}..0x{:08x}: {}", function.start_address, function.end_address, opcode); +} +``` + +## Command-line usage + +This repository also contains two CLI executables. You can install them like so: + +``` +% cargo install --examples macho-unwind-info +``` + +## Acknowledgements + +Thanks a ton to [**@Gankra**](https://github.com/Gankra/) for documenting this format at https://gankra.github.io/blah/compact-unwinding/. + +## License + +Licensed under either of + + * Apache License, Version 2.0 ([`LICENSE-APACHE`](./LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([`LICENSE-MIT`](./LICENSE-MIT) or http://opensource.org/licenses/MIT) + +at your option. + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. diff --git a/third_party/rust/macho-unwind-info/examples/unwindinfodump.rs b/third_party/rust/macho-unwind-info/examples/unwindinfodump.rs new file mode 100644 index 000000000000..f5dfe370489a --- /dev/null +++ b/third_party/rust/macho-unwind-info/examples/unwindinfodump.rs @@ -0,0 +1,52 @@ +use std::{fmt::Display, fs::File, io::Read}; + +use macho_unwind_info::opcodes::{OpcodeArm64, OpcodeX86, OpcodeX86_64}; +use macho_unwind_info::UnwindInfo; +use object::{Architecture, ObjectSection}; + +fn main() { + let mut args = std::env::args_os().skip(1); + if args.len() < 1 { + eprintln!("Usage: {} ", std::env::args().next().unwrap()); + std::process::exit(1); + } + let path = args.next().unwrap(); + + let mut data = Vec::new(); + let mut file = File::open(path).unwrap(); + file.read_to_end(&mut data).unwrap(); + let data = &data[..]; + + let file = object::File::parse(data).expect("Could not parse object file"); + use object::Object; + let unwind_info_data_section = file + .section_by_name_bytes(b"__unwind_info") + .expect("Could not find __unwind_info section"); + let data = unwind_info_data_section.data().unwrap(); + let arch = file.architecture(); + + let info = UnwindInfo::parse(data).unwrap(); + let address_range = info.address_range(); + println!( + "Unwind info for address range 0x{:08x}-0x{:08x}", + address_range.start, address_range.end + ); + println!(); + let mut function_iter = info.functions(); + while let Some(function) = function_iter.next().unwrap() { + print_entry(function.start_address, function.opcode, arch); + } +} + +fn print_entry(address: u32, opcode: u32, arch: Architecture) { + match arch { + Architecture::I386 => print_entry_impl(address, OpcodeX86::parse(opcode)), + Architecture::X86_64 => print_entry_impl(address, OpcodeX86_64::parse(opcode)), + Architecture::Aarch64 => print_entry_impl(address, OpcodeArm64::parse(opcode)), + _ => {} + } +} + +fn print_entry_impl(address: u32, opcode: impl Display) { + println!("0x{:08x}: {}", address, opcode); +} diff --git a/third_party/rust/macho-unwind-info/examples/unwindinfolookup.rs b/third_party/rust/macho-unwind-info/examples/unwindinfolookup.rs new file mode 100644 index 000000000000..9d0ea25d0de4 --- /dev/null +++ b/third_party/rust/macho-unwind-info/examples/unwindinfolookup.rs @@ -0,0 +1,66 @@ +use std::{fmt::Display, fs::File, io::Read}; + +use macho_unwind_info::opcodes::{OpcodeArm64, OpcodeX86, OpcodeX86_64}; +use macho_unwind_info::UnwindInfo; +use object::{Architecture, ObjectSection}; + +fn main() { + let mut args = std::env::args().skip(1); + if args.len() < 1 { + eprintln!("Usage: {} ", std::env::args().next().unwrap()); + std::process::exit(1); + } + let path = args.next().unwrap(); + let pc = args.next().unwrap(); + let pc: u32 = if let Some(hexstr) = pc.strip_prefix("0x") { + u32::from_str_radix(hexstr, 16).unwrap() + } else { + pc.parse().unwrap() + }; + + let mut data = Vec::new(); + let mut file = File::open(path).unwrap(); + file.read_to_end(&mut data).unwrap(); + let data = &data[..]; + + let file = object::File::parse(data).expect("Could not parse object file"); + use object::Object; + let unwind_info_data_section = file + .section_by_name_bytes(b"__unwind_info") + .expect("Could not find __unwind_info section"); + let data = unwind_info_data_section.data().unwrap(); + let arch = file.architecture(); + + let unwind_info = UnwindInfo::parse(data).unwrap(); + let function = match unwind_info.lookup(pc) { + Ok(Some(f)) => f, + Ok(None) => { + println!("No entry was found for address 0x{:x}", pc); + std::process::exit(1); + } + Err(e) => { + println!( + "There was an error when looking up address 0x{:x}: {}", + pc, e + ); + std::process::exit(1); + } + }; + print_entry(function.start_address, function.opcode, arch); +} + +fn print_entry(address: u32, opcode: u32, arch: Architecture) { + match arch { + Architecture::I386 => print_entry_impl(address, OpcodeX86::parse(opcode)), + Architecture::X86_64 => print_entry_impl(address, OpcodeX86_64::parse(opcode)), + Architecture::Aarch64 => print_entry_impl(address, OpcodeArm64::parse(opcode)), + _ => {} + } +} + +fn print_entry_impl(address: u32, opcode: impl Display) { + println!( + "Found entry with function address 0x{:08x} and opcode {}", + address, opcode + ); +} diff --git a/third_party/rust/macho-unwind-info/src/error.rs b/third_party/rust/macho-unwind-info/src/error.rs new file mode 100644 index 000000000000..c83a1fe51008 --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/error.rs @@ -0,0 +1,60 @@ +/// The error type used in this crate. +#[derive(thiserror::Error, Debug, Clone, Copy, PartialEq, Eq)] +pub enum Error { + /// The data slice was not big enough to read the struct, or we + /// were trying to follow an invalid offset to somewhere outside + /// of the data bounds. + #[error("Read error: {0}")] + ReadError(#[from] ReadError), + + /// Each page has a first_address which is supposed to match the + /// start address of its first function entry. If the two addresses + /// don't match, then the lookup will fail for addresses which fall + /// in the gap between the page start address and the page's first + /// function's start address. + #[error("The page entry's first_address didn't match the address of its first function")] + InvalidPageEntryFirstAddress, + + /// The page kind was set to an unrecognized value. + #[error("Invalid page kind")] + InvalidPageKind, + + /// There is only supposed to be one sentinel page, at the very end + /// of the pages list - its first_address gives the end address of + /// the unwind info address range. If a sentinel page is encountered + /// somewhere else, this error is thrown. + #[error("Unexpected sentinel page")] + UnexpectedSentinelPage, +} + +/// This error indicates that the data slice was not large enough to +/// read the respective item. +#[derive(thiserror::Error, Debug, Clone, Copy, PartialEq, Eq)] +pub enum ReadError { + #[error("Could not read CompactUnwindInfoHeader")] + Header, + + #[error("Could not read global opcodes")] + GlobalOpcodes, + + #[error("Could not read pages")] + Pages, + + #[error("Could not read RegularPage")] + RegularPage, + + #[error("Could not read RegularPage functions")] + RegularPageFunctions, + + #[error("Could not read CompressedPage")] + CompressedPage, + + #[error("Could not read CompressedPage functions")] + CompressedPageFunctions, + + #[error("Could not read local opcodes")] + LocalOpcodes, + + #[error("Could not read page kind")] + PageKind, +} diff --git a/third_party/rust/macho-unwind-info/src/lib.rs b/third_party/rust/macho-unwind-info/src/lib.rs new file mode 100644 index 000000000000..e463adbeadff --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/lib.rs @@ -0,0 +1,385 @@ +//! A zero-copy parser for the contents of the `__unwind_info` section of a +//! mach-O binary. +//! +//! Quickly look up the unwinding opcode for an address. Then parse the opcode to find +//! out how to recover the return address and the caller frame's register values. +//! +//! This crate is intended to be fast enough to be used in a sampling profiler. +//! Re-parsing from scratch is cheap and can be done on every sample. +//! +//! For the full unwinding experience, both `__unwind_info` and `__eh_frame` may need +//! to be consulted. The two sections are complementary: `__unwind_info` handles the +//! easy cases, and refers to an `__eh_frame` FDE for the hard cases. Conversely, +//! `__eh_frame` only includes FDEs for functions whose unwinding info cannot be +//! represented in `__unwind_info`. +//! +//! On x86 and x86_64, `__unwind_info` can represent most functions regardless of +//! whether they were compiled with framepointers or without. +//! +//! On arm64, compiling without framepointers is strongly discouraged, and +//! `__unwind_info` can only represent functions which have framepointers or +//! which don't need to restore any registers. As a result, if you have an arm64 +//! binary without framepointers (rare!), then the `__unwind_info` basically just +//! acts as an index for `__eh_frame`, similarly to `.eh_frame_hdr` for ELF. +//! +//! In clang's default configuration for arm64, non-leaf functions have framepointers +//! and leaf functions without stored registers on the stack don't have framepointers. +//! For leaf functions, the return address is kept in the `lr` register for the entire +//! duration of the function. And the unwind info lets you discern between these two +//! types of functions ("frame-based" and "frameless"). +//! +//! # Example +//! +//! ```rust +//! use macho_unwind_info::UnwindInfo; +//! use macho_unwind_info::opcodes::OpcodeX86_64; +//! +//! # fn example(data: &[u8]) -> Result<(), macho_unwind_info::Error> { +//! let unwind_info = UnwindInfo::parse(data)?; +//! +//! if let Some(function) = unwind_info.lookup(0x1234)? { +//! println!("Found function entry covering the address 0x1234:"); +//! let opcode = OpcodeX86_64::parse(function.opcode); +//! println!("0x{:08x}..0x{:08x}: {}", function.start_address, function.end_address, opcode); +//! } +//! # Ok(()) +//! # } +//! ``` + +mod error; +mod num_display; + +/// Provides architecture-specific opcode parsing. +pub mod opcodes; +/// Lower-level structs for interpreting the format data. Can be used if the convenience APIs are too limiting. +pub mod raw; + +mod reader; + +pub use error::*; +use raw::*; + +/// A parsed representation of the unwind info. +/// +/// The UnwindInfo contains a list of pages, each of which contain a list of +/// function entries. +pub struct UnwindInfo<'a> { + /// The full __unwind_info section data. + data: &'a [u8], + + /// The list of global opcodes. + global_opcodes: &'a [Opcode], + + /// The list of page entries in this UnwindInfo. + pages: &'a [PageEntry], +} + +/// The information about a single function in the UnwindInfo. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct Function { + /// The address where this function starts. + pub start_address: u32, + + /// The address where this function ends. Includes the padding at the end of + /// the function. In reality, this is the address of the *next* function + /// entry, or for the last function this is the address of the sentinel page + /// entry. + pub end_address: u32, + + /// The opcode which describes the unwinding information for this function. + /// This opcode needs to be parsed in an architecture-specific manner. + /// See the [opcodes] module for the facilities to do so. + pub opcode: u32, +} + +impl<'a> UnwindInfo<'a> { + /// Create an [UnwindInfo] instance which wraps the raw bytes of a mach-O binary's + /// `__unwind_info` section. The data can have arbitrary alignment. The parsing done + /// in this function is minimal; it's basically just three bounds checks. + pub fn parse(data: &'a [u8]) -> Result { + let header = CompactUnwindInfoHeader::parse(data)?; + let global_opcodes = header.global_opcodes(data)?; + let pages = header.pages(data)?; + Ok(Self { + data, + global_opcodes, + pages, + }) + } + + /// Returns an iterator over all the functions in this UnwindInfo. + pub fn functions(&self) -> FunctionIter<'a> { + FunctionIter { + data: self.data, + global_opcodes: self.global_opcodes, + pages: self.pages, + cur_page: None, + } + } + + /// Returns the range of addresses covered by unwind information. + pub fn address_range(&self) -> core::ops::Range { + if self.pages.is_empty() { + return 0..0; + } + let first_page = self.pages.first().unwrap(); + let last_page = self.pages.last().unwrap(); + first_page.first_address()..last_page.first_address() + } + + /// Looks up the unwind information for the function that covers the given address. + /// Returns `Ok(Some(function))` if a function was found. + /// Returns `Ok(None)` if the address was outside of the range of addresses covered + /// by the unwind info. + /// Returns `Err(error)` if there was a problem with the format of the `__unwind_info` + /// data. + /// + /// This lookup is architecture agnostic. The opcode is returned as a u32. + /// To actually perform unwinding, the opcode needs to be parsed in an + /// architecture-specific manner. + /// + /// The design of the compact unwinding format makes this lookup extremely cheap. + /// It's just two binary searches: First to find the right page, end then to find + /// the right function within a page. The search happens inside the wrapped data, + /// with no extra copies. + pub fn lookup(&self, pc: u32) -> Result, Error> { + let Self { + pages, + data, + global_opcodes, + } = self; + let page_index = match pages.binary_search_by_key(&pc, PageEntry::first_address) { + Ok(i) => i, + Err(insertion_index) => { + if insertion_index == 0 { + return Ok(None); + } + insertion_index - 1 + } + }; + if page_index == pages.len() - 1 { + // We found the sentinel last page, which just marks the end of the range. + // So the looked up address is at or after the end address, i.e. outside the + // range of addresses covered by this UnwindInfo. + return Ok(None); + } + let page_entry = &pages[page_index]; + let next_page_entry = &pages[page_index + 1]; + let page_offset = page_entry.page_offset(); + match page_entry.page_kind(data)? { + consts::PAGE_KIND_REGULAR => { + let page = RegularPage::parse(data, page_offset.into())?; + let functions = page.functions(data, page_offset)?; + let function_index = + match functions.binary_search_by_key(&pc, RegularFunctionEntry::address) { + Ok(i) => i, + Err(insertion_index) => { + if insertion_index == 0 { + return Err(Error::InvalidPageEntryFirstAddress); + } + insertion_index - 1 + } + }; + let entry = &functions[function_index]; + let fun_address = entry.address(); + let next_fun_address = if let Some(next_entry) = functions.get(function_index + 1) { + next_entry.address() + } else { + next_page_entry.first_address() + }; + Ok(Some(Function { + start_address: fun_address, + end_address: next_fun_address, + opcode: entry.opcode(), + })) + } + consts::PAGE_KIND_COMPRESSED => { + let page = CompressedPage::parse(data, page_offset.into())?; + let functions = page.functions(data, page_offset)?; + let page_address = page_entry.first_address(); + let rel_pc = pc - page_address; + let function_index = match functions.binary_search_by_key(&rel_pc, |&entry| { + CompressedFunctionEntry::new(entry.into()).relative_address() + }) { + Ok(i) => i, + Err(insertion_index) => { + if insertion_index == 0 { + return Err(Error::InvalidPageEntryFirstAddress); + } + insertion_index - 1 + } + }; + + let entry = CompressedFunctionEntry::new(functions[function_index].into()); + let fun_address = page_address + entry.relative_address(); + let next_fun_address = if let Some(next_entry) = functions.get(function_index + 1) { + let next_entry = CompressedFunctionEntry::new((*next_entry).into()); + page_address + next_entry.relative_address() + } else { + next_page_entry.first_address() + }; + + let opcode_index: usize = entry.opcode_index().into(); + let opcode = if opcode_index < global_opcodes.len() { + global_opcodes[opcode_index].opcode() + } else { + let local_opcodes = page.local_opcodes(data, page_offset)?; + let local_index = opcode_index - global_opcodes.len(); + local_opcodes[local_index].opcode() + }; + Ok(Some(Function { + start_address: fun_address, + end_address: next_fun_address, + opcode, + })) + } + consts::PAGE_KIND_SENTINEL => { + // Only the last page should be a sentinel page, and we've already checked earlier + // that we're not in the last page. + Err(Error::UnexpectedSentinelPage) + } + _ => Err(Error::InvalidPageKind), + } + } +} + +/// An iterator over the functions in an UnwindInfo page. +pub struct FunctionIter<'a> { + /// The full __unwind_info section data. + data: &'a [u8], + + /// The list of global opcodes. + global_opcodes: &'a [Opcode], + + /// The slice of the remaining to-be-iterated-over pages. + pages: &'a [PageEntry], + + /// The page whose functions we're iterating over at the moment. + cur_page: Option>, +} + +/// The current page of the function iterator. +/// The functions field is the slice of the remaining to-be-iterated-over functions. +#[derive(Clone, Copy)] +enum PageWithPartialFunctions<'a> { + Regular { + next_page_address: u32, + functions: &'a [RegularFunctionEntry], + }, + Compressed { + page_address: u32, + next_page_address: u32, + local_opcodes: &'a [Opcode], + functions: &'a [U32], + }, +} + +impl<'a> FunctionIter<'a> { + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> Result, Error> { + loop { + let cur_page = if let Some(cur_page) = self.cur_page.as_mut() { + cur_page + } else { + let cur_page = match self.next_page()? { + Some(page) => page, + None => return Ok(None), + }; + self.cur_page.insert(cur_page) + }; + + match cur_page { + PageWithPartialFunctions::Regular { + next_page_address, + functions, + } => { + if let Some((entry, remainder)) = functions.split_first() { + *functions = remainder; + let start_address = entry.address(); + let end_address = remainder + .first() + .map(RegularFunctionEntry::address) + .unwrap_or(*next_page_address); + return Ok(Some(Function { + start_address, + end_address, + opcode: entry.opcode(), + })); + } + } + PageWithPartialFunctions::Compressed { + page_address, + functions, + next_page_address, + local_opcodes, + } => { + if let Some((entry, remainder)) = functions.split_first() { + *functions = remainder; + let entry = CompressedFunctionEntry::new((*entry).into()); + let start_address = *page_address + entry.relative_address(); + let end_address = match remainder.first() { + Some(next_entry) => { + let next_entry = CompressedFunctionEntry::new((*next_entry).into()); + *page_address + next_entry.relative_address() + } + None => *next_page_address, + }; + let opcode_index: usize = entry.opcode_index().into(); + let opcode = if opcode_index < self.global_opcodes.len() { + self.global_opcodes[opcode_index].opcode() + } else { + let local_index = opcode_index - self.global_opcodes.len(); + local_opcodes[local_index].opcode() + }; + return Ok(Some(Function { + start_address, + end_address, + opcode, + })); + } + } + } + self.cur_page = None; + } + } + + fn next_page(&mut self) -> Result>, Error> { + let (page_entry, remainder) = match self.pages.split_first() { + Some(split) => split, + None => return Ok(None), + }; + + self.pages = remainder; + + let next_page_entry = match remainder.first() { + Some(entry) => entry, + None => return Ok(None), + }; + + let page_offset = page_entry.page_offset(); + let page_address = page_entry.first_address(); + let next_page_address = next_page_entry.first_address(); + let data = self.data; + let cur_page = match page_entry.page_kind(data)? { + consts::PAGE_KIND_REGULAR => { + let page = RegularPage::parse(data, page_offset.into())?; + PageWithPartialFunctions::Regular { + functions: page.functions(data, page_offset)?, + next_page_address, + } + } + consts::PAGE_KIND_COMPRESSED => { + let page = CompressedPage::parse(data, page_offset.into())?; + PageWithPartialFunctions::Compressed { + page_address, + next_page_address, + functions: page.functions(data, page_offset)?, + local_opcodes: page.local_opcodes(data, page_offset)?, + } + } + consts::PAGE_KIND_SENTINEL => return Err(Error::UnexpectedSentinelPage), + _ => return Err(Error::InvalidPageKind), + }; + Ok(Some(cur_page)) + } +} diff --git a/third_party/rust/macho-unwind-info/src/num_display.rs b/third_party/rust/macho-unwind-info/src/num_display.rs new file mode 100644 index 000000000000..553e05817201 --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/num_display.rs @@ -0,0 +1,17 @@ +use std::fmt::{Binary, Debug, LowerHex}; + +pub struct HexNum(pub N); + +impl Debug for HexNum { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + LowerHex::fmt(&self.0, f) + } +} + +pub struct BinNum(pub N); + +impl Debug for BinNum { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Binary::fmt(&self.0, f) + } +} diff --git a/third_party/rust/macho-unwind-info/src/opcodes/arm64.rs b/third_party/rust/macho-unwind-info/src/opcodes/arm64.rs new file mode 100644 index 000000000000..becac7dd0c14 --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/opcodes/arm64.rs @@ -0,0 +1,120 @@ +use std::fmt::Display; + +use super::bitfield::OpcodeBitfield; +use crate::raw::consts::*; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum OpcodeArm64 { + Null, + Frameless { + stack_size_in_bytes: u16, + }, + Dwarf { + eh_frame_fde: u32, + }, + FrameBased { + saved_reg_pair_count: u8, + + // Whether each register pair was pushed + d14_and_d15_saved: bool, + d12_and_d13_saved: bool, + d10_and_d11_saved: bool, + d8_and_d9_saved: bool, + + x27_and_x28_saved: bool, + x25_and_x26_saved: bool, + x23_and_x24_saved: bool, + x21_and_x22_saved: bool, + x19_and_x20_saved: bool, + }, + UnrecognizedKind(u8), +} + +impl OpcodeArm64 { + pub fn parse(opcode: u32) -> Self { + match OpcodeBitfield::new(opcode).kind() { + OPCODE_KIND_NULL => OpcodeArm64::Null, + OPCODE_KIND_ARM64_FRAMELESS => OpcodeArm64::Frameless { + stack_size_in_bytes: (((opcode >> 12) & 0b1111_1111_1111) as u16) * 16, + }, + OPCODE_KIND_ARM64_DWARF => OpcodeArm64::Dwarf { + eh_frame_fde: (opcode & 0xffffff), + }, + OPCODE_KIND_ARM64_FRAMEBASED => { + let saved_reg_pair_count = (opcode & 0b1_1111_1111).count_ones() as u8; + OpcodeArm64::FrameBased { + saved_reg_pair_count, + d14_and_d15_saved: ((opcode >> 8) & 1) == 1, + d12_and_d13_saved: ((opcode >> 7) & 1) == 1, + d10_and_d11_saved: ((opcode >> 6) & 1) == 1, + d8_and_d9_saved: ((opcode >> 5) & 1) == 1, + x27_and_x28_saved: ((opcode >> 4) & 1) == 1, + x25_and_x26_saved: ((opcode >> 3) & 1) == 1, + x23_and_x24_saved: ((opcode >> 2) & 1) == 1, + x21_and_x22_saved: ((opcode >> 1) & 1) == 1, + x19_and_x20_saved: (opcode & 1) == 1, + } + } + kind => OpcodeArm64::UnrecognizedKind(kind), + } + } +} + +impl Display for OpcodeArm64 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + OpcodeArm64::Null => { + write!(f, "(uncovered)")?; + } + OpcodeArm64::Frameless { + stack_size_in_bytes, + } => { + if *stack_size_in_bytes == 0 { + write!(f, "CFA=reg31")?; + } else { + write!(f, "CFA=reg31+{}", stack_size_in_bytes)?; + } + } + OpcodeArm64::Dwarf { eh_frame_fde } => { + write!(f, "(check eh_frame FDE 0x{:x})", eh_frame_fde)?; + } + OpcodeArm64::FrameBased { + d14_and_d15_saved, + d12_and_d13_saved, + d10_and_d11_saved, + d8_and_d9_saved, + x27_and_x28_saved, + x25_and_x26_saved, + x23_and_x24_saved, + x21_and_x22_saved, + x19_and_x20_saved, + .. + } => { + write!(f, "CFA=reg29+16: reg29=[CFA-16], reg30=[CFA-8]")?; + let mut offset = 32; + let mut next_pair = |pair_saved, a, b| { + if pair_saved { + let r = write!(f, ", {}=[CFA-{}], {}=[CFA-{}]", a, offset, b, offset + 8); + offset += 16; + r + } else { + Ok(()) + } + }; + next_pair(*d14_and_d15_saved, "reg14", "reg15")?; + next_pair(*d12_and_d13_saved, "reg12", "reg13")?; + next_pair(*d10_and_d11_saved, "reg10", "reg11")?; + next_pair(*d8_and_d9_saved, "reg8", "reg9")?; + next_pair(*x27_and_x28_saved, "reg27", "reg28")?; + next_pair(*x25_and_x26_saved, "reg25", "reg26")?; + next_pair(*x23_and_x24_saved, "reg23", "reg24")?; + next_pair(*x21_and_x22_saved, "reg21", "reg22")?; + next_pair(*x19_and_x20_saved, "reg19", "reg20")?; + } + OpcodeArm64::UnrecognizedKind(kind) => { + write!(f, "!! Unrecognized kind {}", kind)?; + } + } + Ok(()) + } +} diff --git a/third_party/rust/macho-unwind-info/src/opcodes/bitfield.rs b/third_party/rust/macho-unwind-info/src/opcodes/bitfield.rs new file mode 100644 index 000000000000..4210100c90da --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/opcodes/bitfield.rs @@ -0,0 +1,55 @@ +use crate::num_display::BinNum; +use std::fmt::Debug; + +pub struct OpcodeBitfield(pub u32); + +impl OpcodeBitfield { + pub fn new(value: u32) -> Self { + Self(value) + } + + /// Whether this instruction is the start of a function. + pub fn is_function_start(&self) -> bool { + self.0 >> 31 == 1 + } + + /// Whether there is an lsda entry for this instruction. + pub fn has_lsda(&self) -> bool { + (self.0 >> 30) & 0b1 == 1 + } + + /// An index into the global personalities array + /// (TODO: ignore if has_lsda() == false?) + pub fn personality_index(&self) -> u8 { + ((self.0 >> 28) & 0b11) as u8 + } + + /// The architecture-specific kind of opcode this is, specifying how to + /// interpret the remaining 24 bits of the opcode. + pub fn kind(&self) -> u8 { + ((self.0 >> 24) & 0b1111) as u8 + } + + /// The architecture-specific remaining 24 bits. + pub fn specific_bits(&self) -> u32 { + self.0 & 0xffffff + } +} + +impl From for OpcodeBitfield { + fn from(opcode: u32) -> OpcodeBitfield { + OpcodeBitfield::new(opcode) + } +} + +impl Debug for OpcodeBitfield { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Opcode") + .field("kind", &self.kind()) + .field("is_function_start", &self.is_function_start()) + .field("has_lsda", &self.has_lsda()) + .field("personality_index", &self.personality_index()) + .field("specific_bits", &BinNum(self.specific_bits())) + .finish() + } +} diff --git a/third_party/rust/macho-unwind-info/src/opcodes/mod.rs b/third_party/rust/macho-unwind-info/src/opcodes/mod.rs new file mode 100644 index 000000000000..8b6d83be7a8b --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/opcodes/mod.rs @@ -0,0 +1,10 @@ +mod arm64; +mod bitfield; +mod permutation; +mod x86; +mod x86_64; + +pub use arm64::*; +pub use bitfield::*; +pub use x86::*; +pub use x86_64::*; diff --git a/third_party/rust/macho-unwind-info/src/opcodes/permutation.rs b/third_party/rust/macho-unwind-info/src/opcodes/permutation.rs new file mode 100644 index 000000000000..09930c0947f0 --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/opcodes/permutation.rs @@ -0,0 +1,81 @@ +/// Magically unpack up to 6 values from 10 bits. +/// +/// Background: +/// +/// Let's start with a simpler example of packing a list of numbers. +/// Let's say you want to store 2 values a and b, which can each be 0, 1, or 2. +/// You can store this as x = a * 3 + b. Then you can get out (a, b) by doing a +/// division by 3 with remainder, because this has the form of n * 3 + (something less than 3) +/// +/// Similar, for four values, you can use: +/// +/// ```text +/// x = a * 27 + b * 9 + c * 3 + d. +/// ^^^^^^^^^^^^^^^^^ == x % 27 +/// ^^^^^^^^^ == x % 9 +/// ^ == x % 3 +/// x == 27 * a + rem27 +/// rem27 == 9 * b + rem9 +/// rem9 == 3 * c + rem3 +/// rem3 = d +/// ``` +/// +/// Written differently: +/// `x = d + 3 * (c + 3 * (b + (3 * a)))` +/// +/// So that was the case for when all digits have the same range (0..3 in this example). +/// +/// In this function we want to decode a permutation. In a permutation of n items, +/// for the first digit we can choose one of n items, for the second digit we can +/// choose one of the remaining n - 1 items, for the third one of the remaining n - 2 etc. +/// +/// We have the choice between 6 registers, so n = 6 in this function. +/// Each digit is stored zero-based. So a is in 0..6, b is in 0..5, c in 0..4 etc. +/// +/// We encode as (a, b, c) as c + 4 * (b + 5 * a) +/// [...] +pub fn decode_permutation_6(count: u32, mut encoding: u32) -> std::result::Result<[u8; 6], ()> { + if count > 6 { + return Err(()); + } + + let mut compressed_regindexes = [0; 6]; + + if count > 4 { + compressed_regindexes[4] = encoding % 2; + encoding /= 2; + } + if count > 3 { + compressed_regindexes[3] = encoding % 3; + encoding /= 3; + } + if count > 2 { + compressed_regindexes[2] = encoding % 4; + encoding /= 4; + } + if count > 1 { + compressed_regindexes[1] = encoding % 5; + encoding /= 5; + } + if count > 0 { + compressed_regindexes[0] = encoding; + } + + if compressed_regindexes[0] >= 6 { + return Err(()); + } + + let mut registers = [0; 6]; + let mut used = [false; 6]; + for i in 0..count { + let compressed_regindex = compressed_regindexes[i as usize]; + debug_assert!(compressed_regindex < 6 - i); + let uncompressed_regindex = (0..6) + .filter(|ri| !used[*ri]) + .nth(compressed_regindex as usize) + .unwrap(); + used[uncompressed_regindex] = true; + registers[i as usize] = (uncompressed_regindex + 1) as u8; + } + Ok(registers) +} diff --git a/third_party/rust/macho-unwind-info/src/opcodes/x86.rs b/third_party/rust/macho-unwind-info/src/opcodes/x86.rs new file mode 100644 index 000000000000..d98b6b44f2f1 --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/opcodes/x86.rs @@ -0,0 +1,242 @@ +use std::fmt::Display; + +use super::bitfield::OpcodeBitfield; +use super::permutation::decode_permutation_6; +use crate::consts::*; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum RegisterNameX86 { + Ebx, + Ecx, + Edx, + Edi, + Esi, + Ebp, +} + +impl RegisterNameX86 { + pub fn parse(n: u8) -> Option { + match n { + 1 => Some(RegisterNameX86::Ebx), + 2 => Some(RegisterNameX86::Ecx), + 3 => Some(RegisterNameX86::Edx), + 4 => Some(RegisterNameX86::Edi), + 5 => Some(RegisterNameX86::Esi), + 6 => Some(RegisterNameX86::Ebp), + _ => None, + } + } + + pub fn dwarf_name(&self) -> &'static str { + match self { + RegisterNameX86::Ebx => "reg3", + RegisterNameX86::Ecx => "reg1", + RegisterNameX86::Edx => "reg2", + RegisterNameX86::Edi => "reg7", + RegisterNameX86::Esi => "reg6", + RegisterNameX86::Ebp => "reg5", + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum OpcodeX86 { + Null, + FrameBased { + stack_offset_in_bytes: u16, + saved_regs: [Option; 5], + }, + FramelessImmediate { + stack_size_in_bytes: u16, + saved_regs: [Option; 6], + }, + FramelessIndirect { + /// Offset from the start of the function into the middle of a `sub` + /// instruction, pointing right at the instruction's "immediate" which + /// is a u32 value with the offset we need. (NOTE: not divided by anything!) + immediate_offset_from_function_start: u8, + + /// An offset to add to the loaded stack size. + /// This allows the stack size to differ slightly from the `sub`, to + /// compensate for any function prologue that pushes a bunch of + /// pointer-sized registers. This adjust value includes the return + /// address on the stack. For example, if the function begins with six push + /// instructions, followed by a sub instruction, then stack_adjust_in_bytes + /// is 28: 4 bytes for the return address + 6 * 4 for each pushed register. + stack_adjust_in_bytes: u8, + + /// The registers, in the order that they need to be popped in when + /// returning / unwinding from this function. (Reverse order from + /// function prologue!) + /// Can have leading `None`s. + saved_regs: [Option; 6], + }, + Dwarf { + eh_frame_fde: u32, + }, + InvalidFrameless, + UnrecognizedKind(u8), +} + +impl OpcodeX86 { + pub fn parse(opcode: u32) -> Self { + match OpcodeBitfield::new(opcode).kind() { + OPCODE_KIND_NULL => OpcodeX86::Null, + OPCODE_KIND_X86_FRAMEBASED => OpcodeX86::FrameBased { + stack_offset_in_bytes: (((opcode >> 16) & 0xff) as u16) * 4, + saved_regs: [ + RegisterNameX86::parse(((opcode >> 12) & 0b111) as u8), + RegisterNameX86::parse(((opcode >> 9) & 0b111) as u8), + RegisterNameX86::parse(((opcode >> 6) & 0b111) as u8), + RegisterNameX86::parse(((opcode >> 3) & 0b111) as u8), + RegisterNameX86::parse((opcode & 0b111) as u8), + ], + }, + OPCODE_KIND_X86_FRAMELESS_IMMEDIATE => { + let stack_size_in_bytes = (((opcode >> 16) & 0xff) as u16) * 4; + let register_count = (opcode >> 10) & 0b111; + let register_permutation = opcode & 0b11_1111_1111; + let saved_registers = + match decode_permutation_6(register_count, register_permutation) { + Ok(regs) => regs, + Err(_) => return OpcodeX86::InvalidFrameless, + }; + OpcodeX86::FramelessImmediate { + stack_size_in_bytes, + saved_regs: [ + RegisterNameX86::parse(saved_registers[0]), + RegisterNameX86::parse(saved_registers[1]), + RegisterNameX86::parse(saved_registers[2]), + RegisterNameX86::parse(saved_registers[3]), + RegisterNameX86::parse(saved_registers[4]), + RegisterNameX86::parse(saved_registers[5]), + ], + } + } + OPCODE_KIND_X86_FRAMELESS_INDIRECT => { + let immediate_offset_from_function_start = (opcode >> 16) as u8; + let stack_adjust_in_bytes = ((opcode >> 13) & 0b111) as u8 * 4; + let register_count = (opcode >> 10) & 0b111; + let register_permutation = opcode & 0b11_1111_1111; + let saved_registers = + match decode_permutation_6(register_count, register_permutation) { + Ok(regs) => regs, + Err(_) => return OpcodeX86::InvalidFrameless, + }; + OpcodeX86::FramelessIndirect { + immediate_offset_from_function_start, + stack_adjust_in_bytes, + saved_regs: [ + RegisterNameX86::parse(saved_registers[0]), + RegisterNameX86::parse(saved_registers[1]), + RegisterNameX86::parse(saved_registers[2]), + RegisterNameX86::parse(saved_registers[3]), + RegisterNameX86::parse(saved_registers[4]), + RegisterNameX86::parse(saved_registers[5]), + ], + } + } + OPCODE_KIND_X86_DWARF => OpcodeX86::Dwarf { + eh_frame_fde: (opcode & 0xffffff), + }, + kind => OpcodeX86::UnrecognizedKind(kind), + } + } +} + +impl Display for OpcodeX86 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + OpcodeX86::Null => { + write!(f, "(uncovered)")?; + } + OpcodeX86::FrameBased { + stack_offset_in_bytes, + saved_regs, + } => { + // ebp was set to esp before the saved registers were pushed. + // The first pushed register is at ebp - 4 (== CFA - 12), the last at ebp - stack_offset_in_bytes. + write!(f, "CFA=reg6+8: reg6=[CFA-8], reg16=[CFA-4]")?; + let max_count = (*stack_offset_in_bytes / 4) as usize; + let mut offset = *stack_offset_in_bytes + 8; // + 2 for rbp, return address + for reg in saved_regs.iter().rev().take(max_count) { + if let Some(reg) = reg { + write!(f, ", {}=[CFA-{}]", reg.dwarf_name(), offset)?; + } + offset -= 4; + } + } + OpcodeX86::FramelessImmediate { + stack_size_in_bytes, + saved_regs, + } => { + if *stack_size_in_bytes == 0 { + write!(f, "CFA=reg7:",)?; + } else { + write!(f, "CFA=reg7+{}:", *stack_size_in_bytes)?; + } + write!(f, " reg16=[CFA-4]")?; + let mut offset = 2 * 4; + for reg in saved_regs.iter().rev().flatten() { + write!(f, ", {}=[CFA-{}]", reg.dwarf_name(), offset)?; + offset += 4; + } + } + OpcodeX86::FramelessIndirect { + immediate_offset_from_function_start, + stack_adjust_in_bytes, + saved_regs, + } => { + write!( + f, + "CFA=[function_start+{}]+{}", + immediate_offset_from_function_start, stack_adjust_in_bytes + )?; + write!(f, " reg16=[CFA-4]")?; + let mut offset = 2 * 4; + for reg in saved_regs.iter().rev().flatten() { + write!(f, ", {}=[CFA-{}]", reg.dwarf_name(), offset)?; + offset += 4; + } + } + OpcodeX86::Dwarf { eh_frame_fde } => { + write!(f, "(check eh_frame FDE 0x{:x})", eh_frame_fde)?; + } + OpcodeX86::InvalidFrameless => { + write!( + f, + "!! frameless immediate or indirect with invalid permutation encoding" + )?; + } + OpcodeX86::UnrecognizedKind(kind) => { + write!(f, "!! Unrecognized kind {}", kind)?; + } + } + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_frameless_indirect() { + use RegisterNameX86::*; + assert_eq!( + OpcodeX86::parse(0x30df800), + OpcodeX86::FramelessIndirect { + immediate_offset_from_function_start: 13, + stack_adjust_in_bytes: 28, + saved_regs: [ + Some(Ebx), + Some(Ecx), + Some(Edx), + Some(Edi), + Some(Esi), + Some(Ebp) + ] + } + ) + } +} diff --git a/third_party/rust/macho-unwind-info/src/opcodes/x86_64.rs b/third_party/rust/macho-unwind-info/src/opcodes/x86_64.rs new file mode 100644 index 000000000000..9783c8490dc9 --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/opcodes/x86_64.rs @@ -0,0 +1,249 @@ +use std::fmt::Display; + +use super::bitfield::OpcodeBitfield; +use super::permutation::decode_permutation_6; +use crate::consts::*; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum RegisterNameX86_64 { + Rbx, + R12, + R13, + R14, + R15, + Rbp, +} + +impl RegisterNameX86_64 { + pub fn parse(n: u8) -> Option { + match n { + 1 => Some(RegisterNameX86_64::Rbx), + 2 => Some(RegisterNameX86_64::R12), + 3 => Some(RegisterNameX86_64::R13), + 4 => Some(RegisterNameX86_64::R14), + 5 => Some(RegisterNameX86_64::R15), + 6 => Some(RegisterNameX86_64::Rbp), + _ => None, + } + } + + pub fn dwarf_name(&self) -> &'static str { + match self { + RegisterNameX86_64::Rbx => "reg3", + RegisterNameX86_64::R12 => "reg12", + RegisterNameX86_64::R13 => "reg13", + RegisterNameX86_64::R14 => "reg14", + RegisterNameX86_64::R15 => "reg15", + RegisterNameX86_64::Rbp => "reg6", + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum OpcodeX86_64 { + Null, + FrameBased { + stack_offset_in_bytes: u16, + saved_regs: [Option; 5], + }, + FramelessImmediate { + stack_size_in_bytes: u16, + saved_regs: [Option; 6], + }, + FramelessIndirect { + /// Offset from the start of the function into the middle of a `sub` + /// instruction, pointing right at the instruction's "immediate" which + /// is a u32 value with the offset we need. (NOTE: not divided by anything!) + /// Example: + /// - function_start is 0x1c20 + /// - immediate_offset_from_function_start is 13 (= 0xd), + /// - there's sub instruction at 0x1c2a: sub rsp, 0xc28. + /// This instruction is encoded as 48 81 EC 28 0C 00 00, with the 28 + /// byte at 0x1c2d (= 0x1c20 + 13). The immediate is 28 0C 00 00, + /// interpreted as a little-endian u32: 0xc28. + immediate_offset_from_function_start: u8, + + /// An offset to add to the loaded stack size. + /// This allows the stack size to differ slightly from the `sub`, to + /// compensate for any function prologue that pushes a bunch of + /// pointer-sized registers. This adjust value includes the return + /// address on the stack. For example, if the function begins with six push + /// instructions, followed by a sub instruction, then stack_adjust_in_bytes + /// is 56: 8 bytes for the return address + 6 * 8 for each pushed register. + stack_adjust_in_bytes: u8, + + /// The registers, in the order that they need to be popped in when + /// returning / unwinding from this function. (Reverse order from + /// function prologue!) + /// Can have leading `None`s. + saved_regs: [Option; 6], + }, + Dwarf { + eh_frame_fde: u32, + }, + InvalidFrameless, + UnrecognizedKind(u8), +} + +impl OpcodeX86_64 { + pub fn parse(opcode: u32) -> Self { + match OpcodeBitfield::new(opcode).kind() { + OPCODE_KIND_NULL => OpcodeX86_64::Null, + OPCODE_KIND_X86_FRAMEBASED => OpcodeX86_64::FrameBased { + stack_offset_in_bytes: (((opcode >> 16) & 0xff) as u16) * 8, + saved_regs: [ + RegisterNameX86_64::parse(((opcode >> 12) & 0b111) as u8), + RegisterNameX86_64::parse(((opcode >> 9) & 0b111) as u8), + RegisterNameX86_64::parse(((opcode >> 6) & 0b111) as u8), + RegisterNameX86_64::parse(((opcode >> 3) & 0b111) as u8), + RegisterNameX86_64::parse((opcode & 0b111) as u8), + ], + }, + OPCODE_KIND_X86_FRAMELESS_IMMEDIATE => { + let stack_size_in_bytes = (((opcode >> 16) & 0xff) as u16) * 8; + let register_count = (opcode >> 10) & 0b111; + let register_permutation = opcode & 0b11_1111_1111; + let saved_registers = + match decode_permutation_6(register_count, register_permutation) { + Ok(regs) => regs, + Err(_) => return OpcodeX86_64::InvalidFrameless, + }; + OpcodeX86_64::FramelessImmediate { + stack_size_in_bytes, + saved_regs: [ + RegisterNameX86_64::parse(saved_registers[0]), + RegisterNameX86_64::parse(saved_registers[1]), + RegisterNameX86_64::parse(saved_registers[2]), + RegisterNameX86_64::parse(saved_registers[3]), + RegisterNameX86_64::parse(saved_registers[4]), + RegisterNameX86_64::parse(saved_registers[5]), + ], + } + } + OPCODE_KIND_X86_FRAMELESS_INDIRECT => { + let immediate_offset_from_function_start = (opcode >> 16) as u8; + let stack_adjust_in_bytes = ((opcode >> 13) & 0b111) as u8 * 8; + let register_count = (opcode >> 10) & 0b111; + let register_permutation = opcode & 0b11_1111_1111; + let saved_registers = + match decode_permutation_6(register_count, register_permutation) { + Ok(regs) => regs, + Err(_) => return OpcodeX86_64::InvalidFrameless, + }; + OpcodeX86_64::FramelessIndirect { + immediate_offset_from_function_start, + stack_adjust_in_bytes, + saved_regs: [ + RegisterNameX86_64::parse(saved_registers[0]), + RegisterNameX86_64::parse(saved_registers[1]), + RegisterNameX86_64::parse(saved_registers[2]), + RegisterNameX86_64::parse(saved_registers[3]), + RegisterNameX86_64::parse(saved_registers[4]), + RegisterNameX86_64::parse(saved_registers[5]), + ], + } + } + OPCODE_KIND_X86_DWARF => OpcodeX86_64::Dwarf { + eh_frame_fde: (opcode & 0xffffff), + }, + kind => OpcodeX86_64::UnrecognizedKind(kind), + } + } +} + +impl Display for OpcodeX86_64 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + OpcodeX86_64::Null => { + write!(f, "(uncovered)")?; + } + OpcodeX86_64::FrameBased { + stack_offset_in_bytes, + saved_regs, + } => { + // rbp was set to rsp before the saved registers were pushed. + // The first pushed register is at rbp - 8 (== CFA - 24), the last at rbp - stack_offset_in_bytes. + write!(f, "CFA=reg6+16: reg6=[CFA-16], reg16=[CFA-8]")?; + let max_count = (*stack_offset_in_bytes / 8) as usize; + let mut offset = *stack_offset_in_bytes + 16; // + 2 for rbp, return address + for reg in saved_regs.iter().rev().take(max_count) { + if let Some(reg) = reg { + write!(f, ", {}=[CFA-{}]", reg.dwarf_name(), offset)?; + } + offset -= 8; + } + } + OpcodeX86_64::FramelessImmediate { + stack_size_in_bytes, + saved_regs, + } => { + if *stack_size_in_bytes == 0 { + write!(f, "CFA=reg7:",)?; + } else { + write!(f, "CFA=reg7+{}:", *stack_size_in_bytes)?; + } + write!(f, " reg16=[CFA-8]")?; + let mut offset = 2 * 8; + for reg in saved_regs.iter().rev().flatten() { + write!(f, ", {}=[CFA-{}]", reg.dwarf_name(), offset)?; + offset += 8; + } + } + OpcodeX86_64::FramelessIndirect { + immediate_offset_from_function_start, + stack_adjust_in_bytes, + saved_regs, + } => { + write!( + f, + "CFA=[function_start+{}]+{}", + immediate_offset_from_function_start, stack_adjust_in_bytes + )?; + write!(f, " reg16=[CFA-8]")?; + let mut offset = 2 * 8; + for reg in saved_regs.iter().rev().flatten() { + write!(f, ", {}=[CFA-{}]", reg.dwarf_name(), offset)?; + offset += 8; + } + } + OpcodeX86_64::Dwarf { eh_frame_fde } => { + write!(f, "(check eh_frame FDE 0x{:x})", eh_frame_fde)?; + } + OpcodeX86_64::InvalidFrameless => { + write!( + f, + "!! frameless immediate or indirect with invalid permutation encoding" + )?; + } + OpcodeX86_64::UnrecognizedKind(kind) => { + write!(f, "!! Unrecognized kind {}", kind)?; + } + } + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_frameless_indirect() { + use RegisterNameX86_64::*; + assert_eq!( + OpcodeX86_64::parse(0x30df800), + OpcodeX86_64::FramelessIndirect { + immediate_offset_from_function_start: 13, + stack_adjust_in_bytes: 56, + saved_regs: [ + Some(Rbx), + Some(R12), + Some(R13), + Some(R14), + Some(R15), + Some(Rbp) + ] + } + ) + } +} diff --git a/third_party/rust/macho-unwind-info/src/raw/compressed_function.rs b/third_party/rust/macho-unwind-info/src/raw/compressed_function.rs new file mode 100644 index 000000000000..8d939988fdb2 --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/raw/compressed_function.rs @@ -0,0 +1,44 @@ +use crate::num_display::HexNum; +use std::fmt::Debug; + +/// Allows accessing the two packed values from a "compressed" function entry. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct CompressedFunctionEntry(pub u32); + +/// Entries are a u32 that contains two packed values (from high to low): +/// * 8 bits: opcode index +/// * 24 bits: function address +impl CompressedFunctionEntry { + /// Wrap the u32. + pub fn new(value: u32) -> Self { + Self(value) + } + + /// The opcode index. + /// * 0..global_opcodes_len => index into global palette + /// * global_opcodes_len..255 => index into local palette + /// (subtract global_opcodes_len to get the real local index) + pub fn opcode_index(&self) -> u8 { + (self.0 >> 24) as u8 + } + + /// The function address, relative to the page's first_address. + pub fn relative_address(&self) -> u32 { + self.0 & 0xffffff + } +} + +impl From for CompressedFunctionEntry { + fn from(entry: u32) -> CompressedFunctionEntry { + CompressedFunctionEntry::new(entry) + } +} + +impl Debug for CompressedFunctionEntry { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CompressedFunctionEntry") + .field("opcode_index", &HexNum(self.opcode_index())) + .field("relative_address", &HexNum(self.relative_address())) + .finish() + } +} diff --git a/third_party/rust/macho-unwind-info/src/raw/consts.rs b/third_party/rust/macho-unwind-info/src/raw/consts.rs new file mode 100644 index 000000000000..047763d97634 --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/raw/consts.rs @@ -0,0 +1,14 @@ +pub const PAGE_KIND_SENTINEL: u32 = 1; // used in the last page, whose first_address is the end address +pub const PAGE_KIND_REGULAR: u32 = 2; +pub const PAGE_KIND_COMPRESSED: u32 = 3; + +pub const OPCODE_KIND_NULL: u8 = 0; + +pub const OPCODE_KIND_X86_FRAMEBASED: u8 = 1; +pub const OPCODE_KIND_X86_FRAMELESS_IMMEDIATE: u8 = 2; +pub const OPCODE_KIND_X86_FRAMELESS_INDIRECT: u8 = 3; +pub const OPCODE_KIND_X86_DWARF: u8 = 4; + +pub const OPCODE_KIND_ARM64_FRAMELESS: u8 = 2; +pub const OPCODE_KIND_ARM64_DWARF: u8 = 3; +pub const OPCODE_KIND_ARM64_FRAMEBASED: u8 = 4; diff --git a/third_party/rust/macho-unwind-info/src/raw/format.rs b/third_party/rust/macho-unwind-info/src/raw/format.rs new file mode 100644 index 000000000000..9bdc3e1c89a6 --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/raw/format.rs @@ -0,0 +1,114 @@ +use std::fmt::Debug; +use zerocopy_derive::{FromBytes, FromZeroes, Unaligned}; + +use super::unaligned::{U16, U32}; + +// Written with help from https://gankra.github.io/blah/compact-unwinding/ + +/// The `__unwind_info` header. +#[derive(Unaligned, FromZeroes, FromBytes, Debug, Clone, Copy)] +#[repr(C)] +pub struct CompactUnwindInfoHeader { + /// The version. Only version 1 is currently defined + pub version: U32, + + /// The array of U32 global opcodes (offset relative to start of root page). + /// + /// These may be indexed by "compressed" second-level pages. + pub global_opcodes_offset: U32, + pub global_opcodes_len: U32, + + /// The array of U32 global personality codes (offset relative to start of root page). + /// + /// Personalities define the style of unwinding that an unwinder should use, + /// and how to interpret the LSDA functions for a function (see below). + pub personalities_offset: U32, + pub personalities_len: U32, + + /// The array of [`PageEntry`]'s describing the second-level pages + /// (offset relative to start of root page). + pub pages_offset: U32, + pub pages_len: U32, + // After this point there are several dynamically-sized arrays whose precise + // order and positioning don't matter, because they are all accessed using + // offsets like the ones above. The arrays are: + + // global_opcodes: [u32; global_opcodes_len], + // personalities: [u32; personalities_len], + // pages: [PageEntry; pages_len], + // lsdas: [LsdaEntry; unknown_len], +} + +/// One element of the array of pages. +#[derive(Unaligned, FromZeroes, FromBytes, Clone, Copy)] +#[repr(C)] +pub struct PageEntry { + /// The first address mapped by this page. + /// + /// This is useful for binary-searching for the page that can map + /// a specific address in the binary (the primary kind of lookup + /// performed by an unwinder). + pub first_address: U32, + + /// Offset of the second-level page. + /// + /// This may point to either a [`RegularPage`] or a [`CompressedPage`]. + /// Which it is can be determined by the 32-bit "kind" value that is at + /// the start of both layouts. + pub page_offset: U32, + + /// Base offset into the lsdas array that functions in this page will be + /// relative to. + pub lsda_index_offset: U32, +} + +/// A non-compressed page. +#[derive(Unaligned, FromZeroes, FromBytes, Debug, Clone, Copy)] +#[repr(C)] +pub struct RegularPage { + /// Always 2 (use to distinguish from CompressedPage). + pub kind: U32, + + /// The Array of [`RegularFunctionEntry`]'s (offset relative to **start of this page**). + pub functions_offset: U16, + pub functions_len: U16, +} + +/// A "compressed" page. +#[derive(Unaligned, FromZeroes, FromBytes, Debug, Clone, Copy)] +#[repr(C)] +pub struct CompressedPage { + /// Always 3 (use to distinguish from RegularPage). + pub kind: U32, + + /// The array of compressed u32 function entries (offset relative to **start of this page**). + /// + /// Entries are a u32 that contains two packed values (from highest to lowest bits): + /// * 8 bits: opcode index + /// * 0..global_opcodes_len => index into global palette + /// * global_opcodes_len..255 => index into local palette (subtract global_opcodes_len) + /// * 24 bits: instruction address + /// * address is relative to this page's first_address! + pub functions_offset: U16, + pub functions_len: U16, + + /// The array of u32 local opcodes for this page (offset relative to **start of this page**). + pub local_opcodes_offset: U16, + pub local_opcodes_len: U16, +} + +/// An opcode. +#[derive(Unaligned, FromZeroes, FromBytes, Debug, Clone, Copy)] +#[repr(C)] +pub struct Opcode(pub U32); + +/// A function entry from a non-compressed page. +#[derive(Unaligned, FromZeroes, FromBytes, Debug, Clone, Copy)] +#[repr(C)] +pub struct RegularFunctionEntry { + /// The address in the binary for this function entry (absolute). + pub address: U32, + + /// The opcode for this address. + pub opcode: Opcode, +} diff --git a/third_party/rust/macho-unwind-info/src/raw/impls.rs b/third_party/rust/macho-unwind-info/src/raw/impls.rs new file mode 100644 index 000000000000..6d28b967bf1b --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/raw/impls.rs @@ -0,0 +1,167 @@ +use std::fmt::Debug; + +use super::format::{ + CompactUnwindInfoHeader, CompressedPage, Opcode, PageEntry, RegularFunctionEntry, RegularPage, +}; +use super::unaligned::U32; +use crate::error::ReadError; +use crate::num_display::HexNum; +use crate::reader::Reader; + +type Result = std::result::Result; + +impl CompactUnwindInfoHeader { + pub fn parse(data: &[u8]) -> Result<&Self> { + data.read_at::(0) + .ok_or(ReadError::Header) + } + + pub fn global_opcodes_offset(&self) -> u32 { + self.global_opcodes_offset.into() + } + + pub fn global_opcodes_len(&self) -> u32 { + self.global_opcodes_len.into() + } + + pub fn pages_offset(&self) -> u32 { + self.pages_offset.into() + } + + pub fn pages_len(&self) -> u32 { + self.pages_len.into() + } + + /// Return the list of global opcodes. + pub fn global_opcodes<'data>(&self, data: &'data [u8]) -> Result<&'data [Opcode]> { + data.read_slice_at::( + self.global_opcodes_offset().into(), + self.global_opcodes_len() as usize, + ) + .ok_or(ReadError::GlobalOpcodes) + } + + /// Return the list of pages. + pub fn pages<'data>(&self, data: &'data [u8]) -> Result<&'data [PageEntry]> { + data.read_slice_at::(self.pages_offset().into(), self.pages_len() as usize) + .ok_or(ReadError::Pages) + } +} + +impl RegularPage { + pub fn parse(data: &[u8], page_offset: u64) -> Result<&Self> { + data.read_at::(page_offset) + .ok_or(ReadError::RegularPage) + } + + pub fn functions_offset(&self) -> u16 { + self.functions_offset.into() + } + + pub fn functions_len(&self) -> u16 { + self.functions_len.into() + } + + pub fn functions<'data>( + &self, + data: &'data [u8], + page_offset: u32, + ) -> Result<&'data [RegularFunctionEntry]> { + let relative_functions_offset = self.functions_offset(); + let functions_len: usize = self.functions_len().into(); + let functions_offset = page_offset as u64 + relative_functions_offset as u64; + data.read_slice_at::(functions_offset, functions_len) + .ok_or(ReadError::RegularPageFunctions) + } +} + +impl CompressedPage { + pub fn parse(data: &[u8], page_offset: u64) -> Result<&Self> { + data.read_at::(page_offset) + .ok_or(ReadError::CompressedPage) + } + + pub fn functions_offset(&self) -> u16 { + self.functions_offset.into() + } + + pub fn functions_len(&self) -> u16 { + self.functions_len.into() + } + + pub fn local_opcodes_offset(&self) -> u16 { + self.local_opcodes_offset.into() + } + + pub fn local_opcodes_len(&self) -> u16 { + self.local_opcodes_len.into() + } + + pub fn functions<'data>(&self, data: &'data [u8], page_offset: u32) -> Result<&'data [U32]> { + let relative_functions_offset = self.functions_offset(); + let functions_len: usize = self.functions_len().into(); + let functions_offset = page_offset as u64 + relative_functions_offset as u64; + data.read_slice_at::(functions_offset, functions_len) + .ok_or(ReadError::CompressedPageFunctions) + } + + /// Return the list of local opcodes. + pub fn local_opcodes<'data>( + &self, + data: &'data [u8], + page_offset: u32, + ) -> Result<&'data [Opcode]> { + let relative_local_opcodes_offset = self.local_opcodes_offset(); + let local_opcodes_len: usize = self.local_opcodes_len().into(); + let local_opcodes_offset = page_offset as u64 + relative_local_opcodes_offset as u64; + data.read_slice_at::(local_opcodes_offset, local_opcodes_len) + .ok_or(ReadError::LocalOpcodes) + } +} + +impl Opcode { + pub fn opcode(&self) -> u32 { + self.0.into() + } +} + +impl RegularFunctionEntry { + pub fn address(&self) -> u32 { + self.address.into() + } + + pub fn opcode(&self) -> u32 { + self.opcode.opcode() + } +} + +impl PageEntry { + pub fn page_offset(&self) -> u32 { + self.page_offset.into() + } + + pub fn first_address(&self) -> u32 { + self.first_address.into() + } + + pub fn lsda_index_offset(&self) -> u32 { + self.lsda_index_offset.into() + } + + pub fn page_kind(&self, data: &[u8]) -> Result { + let kind = *data + .read_at::(self.page_offset().into()) + .ok_or(ReadError::PageKind)?; + Ok(kind.into()) + } +} + +impl Debug for PageEntry { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PageEntry") + .field("first_address", &HexNum(self.first_address())) + .field("page_offset", &HexNum(self.page_offset())) + .field("lsda_index_offset", &HexNum(self.lsda_index_offset())) + .finish() + } +} diff --git a/third_party/rust/macho-unwind-info/src/raw/mod.rs b/third_party/rust/macho-unwind-info/src/raw/mod.rs new file mode 100644 index 000000000000..a8bf1cc0ae40 --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/raw/mod.rs @@ -0,0 +1,9 @@ +mod compressed_function; +pub mod consts; +mod format; +mod impls; +mod unaligned; + +pub use compressed_function::*; +pub use format::*; +pub use unaligned::*; diff --git a/third_party/rust/macho-unwind-info/src/raw/unaligned.rs b/third_party/rust/macho-unwind-info/src/raw/unaligned.rs new file mode 100644 index 000000000000..52aa2eb18dae --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/raw/unaligned.rs @@ -0,0 +1,53 @@ +use std::fmt::Debug; + +use zerocopy_derive::{FromBytes, FromZeroes, Unaligned}; + +/// An unaligned little-endian `u32` value. +#[derive( + Unaligned, FromZeroes, FromBytes, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, +)] +#[repr(transparent)] +pub struct U32([u8; 4]); + +impl From for U32 { + fn from(n: u32) -> Self { + U32(n.to_le_bytes()) + } +} + +impl From for u32 { + fn from(n: U32) -> Self { + u32::from_le_bytes(n.0) + } +} + +impl Debug for U32 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + u32::fmt(&(*self).into(), f) + } +} + +/// An unaligned little-endian `u16` value. +#[derive( + Unaligned, FromZeroes, FromBytes, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, +)] +#[repr(transparent)] +pub struct U16([u8; 2]); + +impl From for U16 { + fn from(n: u16) -> Self { + U16(n.to_le_bytes()) + } +} + +impl From for u16 { + fn from(n: U16) -> Self { + u16::from_le_bytes(n.0) + } +} + +impl Debug for U16 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + u16::fmt(&(*self).into(), f) + } +} diff --git a/third_party/rust/macho-unwind-info/src/reader.rs b/third_party/rust/macho-unwind-info/src/reader.rs new file mode 100644 index 000000000000..bbffb499119c --- /dev/null +++ b/third_party/rust/macho-unwind-info/src/reader.rs @@ -0,0 +1,22 @@ +use zerocopy::{FromBytes, Ref, Unaligned}; + +pub trait Reader { + fn read_at(&self, offset: u64) -> Option<&T>; + fn read_slice_at(&self, offset: u64, len: usize) -> Option<&[T]>; +} + +impl Reader for [u8] { + fn read_at(&self, offset: u64) -> Option<&T> { + let offset: usize = offset.try_into().ok()?; + let end: usize = offset.checked_add(core::mem::size_of::())?; + let lv = Ref::<&[u8], T>::new_unaligned(self.get(offset..end)?)?; + Some(lv.into_ref()) + } + + fn read_slice_at(&self, offset: u64, len: usize) -> Option<&[T]> { + let offset: usize = offset.try_into().ok()?; + let end: usize = offset.checked_add(core::mem::size_of::().checked_mul(len)?)?; + let lv = Ref::<&[u8], [T]>::new_slice_unaligned(self.get(offset..end)?)?; + Some(lv.into_slice()) + } +} diff --git a/third_party/rust/minidump-common/.cargo-checksum.json b/third_party/rust/minidump-common/.cargo-checksum.json index 84e8ed903708..11dd649b85bb 100644 --- a/third_party/rust/minidump-common/.cargo-checksum.json +++ b/third_party/rust/minidump-common/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"32470c1f1471a46161eb9fda47e6e7d50e2615bfb0722bcd9355fbb90174319e","LICENSE":"06de63df29199a394442b57a28e886059ddc940973e10646877a0793fd53e2c9","README.md":"4c2a1448aab9177fd5f033faaf704af7bb222bf0804079fd3cff90fa1df4b812","src/errors/linux.rs":"df743ac9478e39f8a577f4f10f2d1317babad7b7c0d26cdbba2ea6b5426f4126","src/errors/macos.rs":"4516aaeb7abf6209f5cd94e86a1e55a9675ef77262f52e3b2d5596fd4b858458","src/errors/mod.rs":"f224af66124fd31a040c8da11bbab7b7795b48e4edea76e01c1f4dee537ea38a","src/errors/windows.rs":"0567af7bfac3ae2a8dff418e10873d8a5bf15a8b8ac6892c5ffdab08ec3ac901","src/format.rs":"4d9baaa3b3b52b4311efaadb12921088141becba8890ae977f0a2807eaa7f820","src/lib.rs":"0900c00594b3c386b86127055889006f0d7d0004b08455fadb0e60d55a469cab","src/traits.rs":"93127ad69a849325ed66a0626e0bdae05868488f81c539d35c71a7bfbb9e51ac","src/utils.rs":"17e8777b05998a8149fc5168af3bca1e0f9aeffe28cb3d6dbfb89c546f75e5ed"},"package":"1bb6eaf88cc770fa58e6ae721cf2e40c2ca6a4c942ae8c7aa324d680bd3c6717"} \ No newline at end of file +{"files":{"Cargo.toml":"fa97252c2595c4676f3e4b8027c0df5a808506e73cab45be1509c9d9bb7e1721","LICENSE":"06de63df29199a394442b57a28e886059ddc940973e10646877a0793fd53e2c9","README.md":"4c2a1448aab9177fd5f033faaf704af7bb222bf0804079fd3cff90fa1df4b812","src/errors/linux.rs":"df743ac9478e39f8a577f4f10f2d1317babad7b7c0d26cdbba2ea6b5426f4126","src/errors/macos.rs":"4516aaeb7abf6209f5cd94e86a1e55a9675ef77262f52e3b2d5596fd4b858458","src/errors/mod.rs":"f224af66124fd31a040c8da11bbab7b7795b48e4edea76e01c1f4dee537ea38a","src/errors/windows.rs":"0567af7bfac3ae2a8dff418e10873d8a5bf15a8b8ac6892c5ffdab08ec3ac901","src/format.rs":"4d9baaa3b3b52b4311efaadb12921088141becba8890ae977f0a2807eaa7f820","src/lib.rs":"0900c00594b3c386b86127055889006f0d7d0004b08455fadb0e60d55a469cab","src/traits.rs":"93127ad69a849325ed66a0626e0bdae05868488f81c539d35c71a7bfbb9e51ac","src/utils.rs":"17e8777b05998a8149fc5168af3bca1e0f9aeffe28cb3d6dbfb89c546f75e5ed"},"package":"95a2b640f80e5514f49509ff1f97fb24693f95ef5be5ed810d70df4283a68acc"} \ No newline at end of file diff --git a/third_party/rust/minidump-common/Cargo.toml b/third_party/rust/minidump-common/Cargo.toml index 416d3b703e77..de9d6445586a 100644 --- a/third_party/rust/minidump-common/Cargo.toml +++ b/third_party/rust/minidump-common/Cargo.toml @@ -12,7 +12,7 @@ [package] edition = "2018" name = "minidump-common" -version = "0.21.1" +version = "0.22.0" authors = ["Ted Mielczarek "] description = "Some common types for working with minidump files." homepage = "https://github.com/rust-minidump/rust-minidump" diff --git a/third_party/rust/minidump-unwind/.cargo-checksum.json b/third_party/rust/minidump-unwind/.cargo-checksum.json new file mode 100644 index 000000000000..1cb711e37e4b --- /dev/null +++ b/third_party/rust/minidump-unwind/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"f235b4052f488a701a6e96b416b7ef1b34bdcafed5bb561799625407cf377335","LICENSE":"06de63df29199a394442b57a28e886059ddc940973e10646877a0793fd53e2c9","README.md":"0202d4bced7cbc5fb801a916d11c752019111a0043fb6476ef945414d322588e","src/amd64.rs":"c05bfdb7479ba5e2d3ba942b75e66e38784b2fe7deef5045344db43d438bb9c8","src/amd64_unittest.rs":"9a0aea53f153201bd8ffcfec215966be671098de508ce1429d4536402893c0d5","src/arm.rs":"a61b28eabf8c72ea7d16911426c6bbab4e08766ca43e75099d0cd7817e02a977","src/arm64.rs":"c8233f255b64d116ea804a0fe15a4ee0211124f9434d0125a957d267ab6ea3c2","src/arm64_old.rs":"78843e9a46e3ce5f461a19d63445fd1b2aac7f4e6d91aac0f3b9e352b958606c","src/arm64_unittest.rs":"d48d7577422aa53bb041bc243e866d369bdc57fd1219ebe7f7032b9bfedfc1dc","src/arm_unittest.rs":"b2e64e57ed638c228d41ecf35ea57f0a6f9ca007a5288bc63a779fa759548c50","src/lib.rs":"77e4267e6c91195e848f0cddca1ad1b3e564c950d21781a835de3540a6bee7f5","src/mips.rs":"897aa10ade29adc4253bb2049b19a7e1690bf548ce015ba63eba1f8bcc04361c","src/symbols/debuginfo.rs":"e549b1e4d5ced1e0fe89c2c8bb63ba210d303e10c60b21de5045e3d5b98ef9f0","src/symbols/mod.rs":"a598e48bf2ef657e7c833f7f2d3950a10c68483e71b98f136c7975baba9ad238","src/system_info.rs":"228ac55b18a647e5302b5cb7c10e65c9d046decb5d9207e4ded098405bf1739c","src/x86.rs":"fab7ccec6285a9970da7f13709b8e7e53b1198ca275235e4bd415a620d87197e","src/x86_unittest.rs":"73212f5b1c2ce4605e540230c5574de817e42e0447ffe304b8822b69066aa7c8"},"package":"afb5af4cbb631c54fe8c0c058799e9ac95b31c6e282f1afaaaaad10c2c441fcb"} \ No newline at end of file diff --git a/third_party/rust/minidump-unwind/Cargo.toml b/third_party/rust/minidump-unwind/Cargo.toml new file mode 100644 index 000000000000..3f45dfb17833 --- /dev/null +++ b/third_party/rust/minidump-unwind/Cargo.toml @@ -0,0 +1,98 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "minidump-unwind" +version = "0.22.0" +authors = ["Alex Franchuk "] +description = "A library for producing stack traces from minidump files." +homepage = "https://github.com/rust-minidump/rust-minidump" +readme = "README.md" +license = "MIT" +repository = "https://github.com/rust-minidump/rust-minidump" +resolver = "2" + +[package.metadata.docs.rs] +all-features = true + +[dependencies.async-trait] +version = "0.1.52" + +[dependencies.breakpad-symbols] +version = "0.22.0" + +[dependencies.cachemap2] +version = "0.3.0" +optional = true + +[dependencies.framehop] +version = "0.12" +optional = true + +[dependencies.futures-util] +version = "0.3.25" +optional = true + +[dependencies.memmap2] +version = "0.9" +optional = true + +[dependencies.minidump] +version = "0.22.0" + +[dependencies.minidump-common] +version = "0.22.0" + +[dependencies.object] +version = "0.36" +features = ["read"] +optional = true +default-features = false + +[dependencies.scroll] +version = "0.12.0" + +[dependencies.tracing] +version = "0.1.34" +features = ["log"] + +[dependencies.wholesym] +version = "0.7" +optional = true + +[dev-dependencies.doc-comment] +version = "0.3.3" + +[dev-dependencies.test-assembler] +version = "0.1.6" + +[dev-dependencies.tokio] +version = "1.12.0" +features = ["full"] + +[features] +debuginfo = ["debuginfo-symbols"] +debuginfo-symbols = [ + "debuginfo-unwind", + "futures-util", + "wholesym", +] +debuginfo-unwind = [ + "cachemap2", + "framehop", + "memmap2", + "object", +] +http = ["breakpad-symbols/http"] + +[badges.travis-ci] +repository = "rust-minidump/rust-minidump" diff --git a/third_party/rust/minidump-unwind/LICENSE b/third_party/rust/minidump-unwind/LICENSE new file mode 100644 index 000000000000..3af7a472f939 --- /dev/null +++ b/third_party/rust/minidump-unwind/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2015-2023 rust-minidump contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/rust/minidump-unwind/README.md b/third_party/rust/minidump-unwind/README.md new file mode 100644 index 000000000000..9d7ab2f395c3 --- /dev/null +++ b/third_party/rust/minidump-unwind/README.md @@ -0,0 +1,80 @@ +# minidump-unwind + +[![crates.io](https://img.shields.io/crates/v/minidump-unwind.svg)](https://crates.io/crates/minidump-unwind) [![](https://docs.rs/minidump-unwind/badge.svg)](https://docs.rs/minidump-unwind) + +A library for unwinding and producing stack traces from minidump files. This crate provides APIs for +producing symbolicated stack traces for the threads in a minidump. The primary entrypoint is the +`walk_stack` function. + +If you want lower-level access to the minidump's contents, use the [minidump](https://crates.io/crates/minidump) crate. + +If you want higher-level functionality build on top of this crate, see +[minidump-processor](https://crates.io/crates/minidump-processor). + +## Example Usage + +```rust +use minidump::{ + Minidump, MinidumpException, MinidumpMiscInfo, MinidumpModuleList, MinidumpSystemInfo, + UnifiedMemoryList +}; +use minidump_unwind::{CallStack, http_symbol_supplier, Symbolizer, SystemInfo, walk_stack}; + +#[tokio::main] +async fn main() { + // Read the minidump + let dump = Minidump::read_path("../testdata/test.dmp").unwrap(); + + // Configure the symbolizer and processor + let symbols_urls = vec![String::from("https://symbols.totallyrealwebsite.org")]; + let symbols_paths = vec![]; + let mut symbols_cache = std::env::temp_dir(); + symbols_cache.push("minidump-cache"); + let symbols_tmp = std::env::temp_dir(); + let timeout = std::time::Duration::from_secs(1000); + + // Specify a symbol supplier (here we're using the most powerful one, the http supplier) + let provider = Symbolizer::new(http_symbol_supplier( + symbols_paths, + symbols_urls, + symbols_cache, + symbols_tmp, + timeout, + )); + + let system_info: MinidumpSystemInfo = dump.get_stream().unwrap(); + let misc_info: Option = dump.get_stream().ok(); + let modules: MinidumpModuleList = dump.get_stream().unwrap(); + let exception: MinidumpException = dump.get_stream().unwrap(); + let exception_context = exception.context(&system_info, misc_info.as_ref()).unwrap(); + let memory_list = dump.get_stream().map(UnifiedMemoryList::Memory) + .or_else(|_| dump.get_stream().map(UnifiedMemoryList::Memory64)) + .unwrap(); + + let stack_memory = memory_list.memory_at_address(exception_context.get_stack_pointer()); + + let mut stack = CallStack::with_context(exception_context.into_owned()); + + walk_stack( + 0, + (), + &mut stack, + stack_memory, + &modules, + &SystemInfo { + os: system_info.os, + os_version: None, + os_build: None, + cpu: system_info.cpu, + cpu_info: system_info.cpu_info().map(|info| info.into_owned()), + cpu_microcode_version: None, + cpu_count: 1, + }, + &provider, + ).await; + + for frame in stack.frames { + println!("{:?}", frame); + } +} +``` diff --git a/third_party/rust/minidump-unwind/src/amd64.rs b/third_party/rust/minidump-unwind/src/amd64.rs new file mode 100644 index 000000000000..ec36cd678811 --- /dev/null +++ b/third_party/rust/minidump-unwind/src/amd64.rs @@ -0,0 +1,440 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +// Note since x86 and Amd64 have basically the same ABI, this implementation +// is written to largely erase the details of the two wherever possible, +// so that it can be copied between the two with minimal changes. It's not +// worth the effort to *actually* unify the implementations. + +use super::impl_prelude::*; +use minidump::format::CONTEXT_AMD64; +use minidump::system_info::Os; +use minidump::{ + MinidumpContext, MinidumpContextValidity, MinidumpModuleList, MinidumpRawContext, UnifiedMemory, +}; +use std::collections::HashSet; +use tracing::trace; + +type Pointer = u64; +const POINTER_WIDTH: Pointer = 8; +const INSTRUCTION_REGISTER: &str = "rip"; +const STACK_POINTER_REGISTER: &str = "rsp"; +const FRAME_POINTER_REGISTER: &str = "rbp"; +// FIXME: rdi and rsi are also preserved on windows (but not in sysv) -- we should handle that? +const CALLEE_SAVED_REGS: &[&str] = &["rbx", "rbp", "r12", "r13", "r14", "r15"]; + +async fn get_caller_by_cfi

( + ctx: &CONTEXT_AMD64, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + trace!("trying cfi"); + + if let MinidumpContextValidity::Some(ref which) = args.valid() { + if !which.contains(STACK_POINTER_REGISTER) { + return None; + } + } + + let mut stack_walker = CfiStackWalker::from_ctx_and_args(ctx, args, callee_forwarded_regs)?; + + args.symbol_provider + .walk_frame(stack_walker.module, &mut stack_walker) + .await?; + let caller_ip = stack_walker.caller_ctx.rip; + let caller_sp = stack_walker.caller_ctx.rsp; + + trace!( + "cfi evaluation was successful -- caller_ip: 0x{:016x}, caller_sp: 0x{:016x}", + caller_ip, + caller_sp, + ); + + // Do absolutely NO validation! Yep! As long as CFI evaluation succeeds + // (which does include ip and sp resolving), just blindly assume the + // values are correct. I Don't Like This, but it's what breakpad does and + // we should start with a baseline of parity. + + trace!("cfi result seems valid"); + + let context = MinidumpContext { + raw: MinidumpRawContext::Amd64(stack_walker.caller_ctx), + valid: MinidumpContextValidity::Some(stack_walker.caller_validity), + }; + Some(StackFrame::from_context(context, FrameTrust::CallFrameInfo)) +} + +fn callee_forwarded_regs(valid: &MinidumpContextValidity) -> HashSet<&'static str> { + match valid { + MinidumpContextValidity::All => CALLEE_SAVED_REGS.iter().copied().collect(), + MinidumpContextValidity::Some(ref which) => CALLEE_SAVED_REGS + .iter() + .filter(|®| which.contains(reg)) + .copied() + .collect(), + } +} + +fn get_caller_by_frame_pointer

( + ctx: &CONTEXT_AMD64, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + let stack_memory = args.stack_memory; + // On Windows x64, frame-pointer unwinding purely with the data on the stack + // is not possible, as proper unwinding requires access to `UNWIND_INFO`, + // because the frame pointer does not necessarily point to the end of the + // frame. + // In particular, the docs state that: + // > [The frame register] offset permits pointing the FP register into the + // > middle of the local stack allocation [...] + // https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64 + if args.system_info.os == Os::Windows { + return None; + } + + trace!("trying frame pointer"); + if let MinidumpContextValidity::Some(ref which) = args.valid() { + if !which.contains(FRAME_POINTER_REGISTER) { + return None; + } + if !which.contains(STACK_POINTER_REGISTER) { + return None; + } + } + + let last_bp = ctx.rbp; + let last_sp = ctx.rsp; + // Assume that the standard %bp-using x64 calling convention is in + // use. + // + // The typical x64 calling convention, when frame pointers are present, + // is for the calling procedure to use CALL, which pushes the return + // address onto the stack and sets the instruction pointer (%ip) to + // the entry point of the called routine. The called routine then + // PUSHes the calling routine's frame pointer (%bp) onto the stack + // before copying the stack pointer (%sp) to the frame pointer (%bp). + // Therefore, the calling procedure's frame pointer is always available + // by dereferencing the called procedure's frame pointer, and the return + // address is always available at the memory location immediately above + // the address pointed to by the called procedure's frame pointer. The + // calling procedure's stack pointer (%sp) is 2 pointers higher than the + // value of the called procedure's frame pointer at the time the calling + // procedure made the CALL: 1 pointer for the return address pushed by the + // CALL itself, and 1 pointer for the callee's` PUSH of the caller's frame + // pointer. + // + // %ip_new = *(%bp_old + ptr) + // %bp_new = *(%bp_old) + // %sp_new = %bp_old + ptr*2 + + if last_bp >= u64::MAX - POINTER_WIDTH * 2 { + // Although this code generally works fine if the pointer math overflows, + // debug builds will still panic, and this guard protects against it without + // drowning the rest of the code in checked_add. + return None; + } + let caller_ip = stack_memory.get_memory_at_address(last_bp + POINTER_WIDTH)?; + let caller_bp = stack_memory.get_memory_at_address(last_bp)?; + let caller_sp = last_bp + POINTER_WIDTH * 2; + + // If the recovered ip is not a canonical address it can't be + // the return address, so bp must not have been a frame pointer. + + // Since we're assuming coherent frame pointers, check that the frame pointers + // and stack pointers are well-ordered. + if caller_sp <= last_bp || caller_bp < caller_sp { + trace!("rejecting frame pointer result for unreasonable frame pointer"); + return None; + } + // Since we're assuming coherent frame pointers, check that the resulting + // frame pointer is still inside stack memory. + let _unused: Pointer = stack_memory.get_memory_at_address(caller_bp)?; + // Don't accept obviously wrong instruction pointers. + if is_non_canonical(caller_ip) { + trace!("rejecting frame pointer result for unreasonable instruction pointer"); + return None; + } + // Don't accept obviously wrong stack pointers. + if !stack_seems_valid(caller_sp, last_sp, stack_memory) { + trace!("rejecting frame pointer result for unreasonable stack pointer"); + return None; + } + + trace!( + "frame pointer seems valid -- caller_ip: 0x{:016x}, caller_sp: 0x{:016x}", + caller_ip, + caller_sp, + ); + + let caller_ctx = CONTEXT_AMD64 { + rip: caller_ip, + rsp: caller_sp, + rbp: caller_bp, + ..CONTEXT_AMD64::default() + }; + let mut valid = HashSet::new(); + valid.insert(INSTRUCTION_REGISTER); + valid.insert(STACK_POINTER_REGISTER); + valid.insert(FRAME_POINTER_REGISTER); + let context = MinidumpContext { + raw: MinidumpRawContext::Amd64(caller_ctx), + valid: MinidumpContextValidity::Some(valid), + }; + Some(StackFrame::from_context(context, FrameTrust::FramePointer)) +} + +async fn get_caller_by_scan

( + ctx: &CONTEXT_AMD64, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + trace!("trying scan"); + let stack_memory = args.stack_memory; + + // Stack scanning is just walking from the end of the frame until we encounter + // a value on the stack that looks like a pointer into some code (it's an address + // in a range covered by one of our modules). If we find such an instruction, + // we assume it's an ip value that was pushed by the CALL instruction that created + // the current frame. The next frame is then assumed to end just before that + // ip value. + let last_bp = match args.valid() { + MinidumpContextValidity::All => Some(ctx.rbp), + MinidumpContextValidity::Some(ref which) => { + if !which.contains(STACK_POINTER_REGISTER) { + trace!("cannot scan without stack pointer"); + return None; + } + if which.contains(FRAME_POINTER_REGISTER) { + Some(ctx.rbp) + } else { + None + } + } + }; + let last_sp = ctx.rsp; + + // Number of pointer-sized values to scan through in our search. + let default_scan_range = 40; + let extended_scan_range = default_scan_range * 4; + + // Breakpad devs found that the first frame of an unwind can be really messed up, + // and therefore benefits from a longer scan. Let's do it too. + let scan_range = if let FrameTrust::Context = args.callee_frame.trust { + extended_scan_range + } else { + default_scan_range + }; + + for i in 0..scan_range { + let address_of_ip = last_sp.checked_add(i * POINTER_WIDTH)?; + let caller_ip = stack_memory.get_memory_at_address(address_of_ip)?; + if instruction_seems_valid(caller_ip, args.modules, args.symbol_provider).await { + // ip is pushed by CALL, so sp is just address_of_ip + ptr + let caller_sp = address_of_ip.checked_add(POINTER_WIDTH)?; + + // Try to restore bp as well. This can be possible in two cases: + // + // 1. This function has the standard prologue that pushes bp and + // sets bp = sp. If this is the case, then the current bp should be + // immediately after (before in memory) address_of_ip. + // + // 2. This function does not use bp, and has just preserved it + // from the caller. If this is the case, bp should be before + // (after in memory) address_of_ip. + // + // We then try our best to eliminate bogus-looking bp's with some + // simple heuristics like "is a valid stack address". + let mut caller_bp = None; + + // This value was specifically computed for x86 frames (see the x86 + // impl for details), but 128 KB is still an extremely generous + // frame size on x64. + const MAX_REASONABLE_GAP_BETWEEN_FRAMES: Pointer = 128 * 1024; + + // NOTE: minor divergence from the x86 impl here: for whatever + // reason the x64 breakpad tests only work if we gate option (1) on + // having a valid `bp` that points next to address_of_ip already. + // It's unclear why, perhaps the test is buggy, but for now we + // preserve that behaviour. + if let Some(last_bp) = last_bp { + // If we're on the first iteration of the scan, there can't possibly be a + // frame pointer, because the entire stack frame is taken up by the return + // pointer. And if we're not on the first iteration, then the last iteration + // already loaded the location we expect the frame pointer to be in, so we can + // unconditionally load it here. + if i > 0 { + let address_of_bp = address_of_ip - POINTER_WIDTH; + // Can assume this resolves because we already walked over it when + // checking address_of_ip values. + let bp = stack_memory.get_memory_at_address(address_of_bp)?; + if last_bp == address_of_bp + && bp > address_of_ip + && bp - address_of_bp <= MAX_REASONABLE_GAP_BETWEEN_FRAMES + { + // Final sanity check that resulting bp is still inside stack memory. + if stack_memory.get_memory_at_address::(bp).is_some() { + caller_bp = Some(bp); + } + } else if last_bp >= caller_sp { + // Don't sanity check that the address is inside the stack? Hmm. + caller_bp = Some(last_bp); + } + } + } + + trace!( + "scan seems valid -- caller_ip: 0x{:08x}, caller_sp: 0x{:08x}", + caller_ip, + caller_sp, + ); + + let caller_ctx = CONTEXT_AMD64 { + rip: caller_ip, + rsp: caller_sp, + rbp: caller_bp.unwrap_or(0), + ..CONTEXT_AMD64::default() + }; + let mut valid = HashSet::new(); + valid.insert(INSTRUCTION_REGISTER); + valid.insert(STACK_POINTER_REGISTER); + if caller_bp.is_some() { + valid.insert(FRAME_POINTER_REGISTER); + } + let context = MinidumpContext { + raw: MinidumpRawContext::Amd64(caller_ctx), + valid: MinidumpContextValidity::Some(valid), + }; + return Some(StackFrame::from_context(context, FrameTrust::Scan)); + } + } + + None +} + +/// The most strict validation we have for instruction pointers. +/// +/// This is only used for stack-scanning, because it's explicitly +/// trying to distinguish between total garbage and correct values. +/// cfi and frame_pointer approaches do not use this validation +/// because by default they're working with plausible/trustworthy +/// data. +/// +/// Specifically, not using this validation allows cfi/fp methods +/// to unwind through frames we don't have mapped modules for (such as +/// OS APIs). This may seem confusing since we obviously don't have cfi +/// for unmapped modules! +/// +/// The way this works is that we will use cfi to unwind some frame we +/// know about and *end up* in a function we know nothing about, but with +/// all the right register values. At this point, frame pointers will +/// often do the correct thing even though we don't know what code we're +/// in -- until we get back into code we do know about and cfi kicks back in. +/// At worst, this sets scanning up in a better position for success! +/// +/// If we applied this more rigorous validation to cfi/fp methods, we +/// would just discard the correct register values from the known frame +/// and immediately start doing unreliable scans. +async fn instruction_seems_valid

( + instruction: Pointer, + modules: &MinidumpModuleList, + symbol_provider: &P, +) -> bool +where + P: SymbolProvider + Sync, +{ + if is_non_canonical(instruction) || instruction == 0 { + return false; + } + + super::instruction_seems_valid_by_symbols(instruction, modules, symbol_provider).await +} + +fn stack_seems_valid( + caller_sp: Pointer, + callee_sp: Pointer, + stack_memory: UnifiedMemory<'_, '_>, +) -> bool { + // The stack shouldn't *grow* when we unwind + if caller_sp <= callee_sp { + return false; + } + + // The stack pointer should be in the stack + stack_memory + .get_memory_at_address::(caller_sp) + .is_some() +} + +fn is_non_canonical(ptr: Pointer) -> bool { + // x64 has the notion of a "canonical address", as a result of only 48 bits + // of a pointer actually being used, because this is all that a 4-level page + // table can support. A canonical address copies bit 47 to all the otherwise + // unused high bits. This creates two ranges where no valid pointers should + // ever exist. + // + // Note that as of this writing, 5-level page tables *do* exist, and when enabled + // 57 bits are used. However modern JS engines rely on only 48 bits being used + // to perform "NaN boxing" optimizations, so it's reasonable to assume + // by default that only 4-level page tables are used. (Even if enabled at + // the system level, Linux only exposes non-48-bit pointers to a process + // if that process explicitly opts in with a special operation.) + ptr > 0x7FFFFFFFFFFF && ptr < 0xFFFF800000000000 +} + +pub async fn get_caller_frame

( + ctx: &CONTEXT_AMD64, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + // .await doesn't like closures, so don't use Option chaining + let mut frame = None; + if frame.is_none() { + frame = get_caller_by_cfi(ctx, args).await; + } + if frame.is_none() { + frame = get_caller_by_frame_pointer(ctx, args); + } + if frame.is_none() { + frame = get_caller_by_scan(ctx, args).await; + } + let mut frame = frame?; + + // We now check the frame to see if it looks like unwinding is complete, + // based on the frame we computed having a nonsense value. Returning + // None signals to the unwinder to stop unwinding. + + // if the instruction is within the first ~page of memory, it's basically + // null, and we can assume unwinding is complete. + if frame.context.get_instruction_pointer() < 4096 { + trace!("instruction pointer was nullish, assuming unwind complete"); + return None; + } + // If the new stack pointer is at a lower address than the old, + // then that's clearly incorrect. Treat this as end-of-stack to + // enforce progress and avoid infinite loops. + if frame.context.get_stack_pointer() <= ctx.rsp { + trace!("stack pointer went backwards, assuming unwind complete"); + return None; + } + + // Ok, the frame now seems well and truly valid, do final cleanup. + + // A caller's ip is the return address, which is the instruction + // *after* the CALL that caused us to arrive at the callee. Set + // the value to one less than that, so it points within the + // CALL instruction. This is important because we use this value + // to lookup the CFI we need to unwind the next frame. + let ip = frame.context.get_instruction_pointer(); + frame.instruction = ip - 1; + + Some(frame) +} diff --git a/third_party/rust/minidump-unwind/src/amd64_unittest.rs b/third_party/rust/minidump-unwind/src/amd64_unittest.rs new file mode 100644 index 000000000000..a26e02ea93f0 --- /dev/null +++ b/third_party/rust/minidump-unwind/src/amd64_unittest.rs @@ -0,0 +1,810 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +use crate::*; +use minidump::format::CONTEXT_AMD64; +use minidump::system_info::{Cpu, Os}; +use std::collections::HashMap; +use test_assembler::*; + +struct TestFixture { + pub raw: CONTEXT_AMD64, + pub modules: MinidumpModuleList, + pub system_info: SystemInfo, + pub symbols: HashMap, +} + +impl TestFixture { + pub fn new() -> TestFixture { + TestFixture { + raw: CONTEXT_AMD64::default(), + // Give the two modules reasonable standard locations and names + // for tests to play with. + modules: MinidumpModuleList::from_modules(vec![ + MinidumpModule::new(0x00007400c0000000, 0x10000, "module1"), + MinidumpModule::new(0x00007500b0000000, 0x10000, "module2"), + ]), + system_info: SystemInfo { + os: Os::Linux, + os_version: None, + os_build: None, + cpu: Cpu::X86_64, + cpu_info: None, + cpu_microcode_version: None, + cpu_count: 1, + }, + symbols: HashMap::new(), + } + } + + pub async fn walk_stack(&self, stack: Section) -> CallStack { + let context = MinidumpContext { + raw: MinidumpRawContext::Amd64(self.raw.clone()), + valid: MinidumpContextValidity::All, + }; + let base = stack.start().value().unwrap(); + let size = stack.size(); + let stack = stack.get_contents().unwrap(); + let stack_memory = &MinidumpMemory { + desc: Default::default(), + base_address: base, + size, + bytes: &stack, + endian: scroll::LE, + }; + let symbolizer = Symbolizer::new(string_symbol_supplier(self.symbols.clone())); + let mut stack = CallStack::with_context(context); + + walk_stack( + 0, + (), + &mut stack, + Some(UnifiedMemory::Memory(stack_memory)), + &self.modules, + &self.system_info, + &symbolizer, + ) + .await; + + stack + } + + pub fn add_symbols(&mut self, name: String, symbols: String) { + self.symbols.insert(name, symbols); + } +} + +#[tokio::test] +async fn test_simple() { + let mut f = TestFixture::new(); + let stack = Section::new(); + stack.start().set_const(0x80000000); + // There should be no references to the stack in this walk: we don't + // provide any call frame information, so trying to reconstruct the + // context frame's caller should fail. So there's no need for us to + // provide stack contents. + f.raw.rip = 0x00007400c0000200; + f.raw.rbp = 0x8000000080000000; + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 1); + let f = &s.frames[0]; + let m = f.module.as_ref().unwrap(); + assert_eq!(m.code_file(), "module1"); +} + +#[tokio::test] +async fn test_caller_pushed_rbp() { + // Functions typically push their %rbp upon entry and set %rbp pointing + // there. If stackwalking finds a plausible address for the next frame's + // %rbp directly below the return address, assume that it is indeed the + // next frame's %rbp. + let mut f = TestFixture::new(); + let mut stack = Section::new(); + let stack_start = 0x8000000080000000; + let return_address = 0x00007500b0000110; + stack.start().set_const(stack_start); + + let frame0_rbp = Label::new(); + let frame1_sp = Label::new(); + let frame1_rbp = Label::new(); + + stack = stack + // frame 0 + .append_repeated(0, 16) // space + .D64(0x00007400b0000000) // junk that's not + .D64(0x00007500b0000000) // a return address + .D64(0x00007400c0001000) // a couple of plausible addresses + .D64(0x00007500b000aaaa) // that are not within functions + .mark(&frame0_rbp) + .D64(&frame1_rbp) // caller-pushed %rbp + .D64(return_address) // actual return address + // frame 1 + .mark(&frame1_sp) + .append_repeated(0, 32) // body of frame1 + .mark(&frame1_rbp) // end of stack + .D64(0); + + f.raw.rip = 0x00007400c0000200; + f.raw.rbp = frame0_rbp.value().unwrap(); + f.raw.rsp = stack.start().value().unwrap(); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // To avoid reusing locals by mistake + let f0 = &s.frames[0]; + assert_eq!(f0.trust, FrameTrust::Context); + assert_eq!(f0.context.valid, MinidumpContextValidity::All); + if let MinidumpRawContext::Amd64(ctx) = &f0.context.raw { + assert_eq!(ctx.rbp, frame0_rbp.value().unwrap()); + } else { + unreachable!(); + } + } + + { + // To avoid reusing locals by mistake + let f1 = &s.frames[1]; + assert_eq!(f1.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = f1.context.valid { + assert!(which.contains("rip")); + assert!(which.contains("rsp")); + assert!(which.contains("rbp")); + } else { + unreachable!(); + } + if let MinidumpRawContext::Amd64(ctx) = &f1.context.raw { + assert_eq!(ctx.rip, return_address); + assert_eq!(ctx.rsp, frame1_sp.value().unwrap()); + assert_eq!(ctx.rbp, frame1_rbp.value().unwrap()); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_windows_rbp_scan() { + let mut f = TestFixture::new(); + f.system_info.os = Os::Windows; + + let mut stack = Section::new(); + let stack_start = 0x8000000080000000; + let return_address = 0x00007500b0000110; + stack.start().set_const(stack_start); + + let frame0_rbp = Label::new(); + let frame1_sp = Label::new(); + let frame1_rbp = Label::new(); + + stack = stack + // frame 0 + .append_repeated(0, 16) // space + .D64(0x00000000b0000000) // junk that's not + .D64(0x00000000b0000000) // a return address + .mark(&frame0_rbp) // the FP can point to the middle of the stack on Windows + .D64(0x00000000c0001000) + .D64(0x00000000b000aaaa) + .D64(&frame1_rbp) // caller-pushed %rbp + .D64(return_address) // actual return address + // frame 1 + .mark(&frame1_sp) + .append_repeated(0, 32) // body of frame1 + .mark(&frame1_rbp) // end of stack + .D64(0); + + f.raw.rip = 0x00007400c0000200; + f.raw.rbp = frame0_rbp.value().unwrap(); + f.raw.rsp = stack.start().value().unwrap(); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // To avoid reusing locals by mistake + let f0 = &s.frames[0]; + assert_eq!(f0.trust, FrameTrust::Context); + assert_eq!(f0.context.valid, MinidumpContextValidity::All); + if let MinidumpRawContext::Amd64(ctx) = &f0.context.raw { + assert_eq!(ctx.rbp, frame0_rbp.value().unwrap()); + } else { + unreachable!(); + } + } + + { + // To avoid reusing locals by mistake + let f1 = &s.frames[1]; + assert_eq!(f1.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = f1.context.valid { + assert!(which.contains("rip")); + assert!(which.contains("rsp")); + } else { + unreachable!(); + } + if let MinidumpRawContext::Amd64(ctx) = &f1.context.raw { + assert_eq!(ctx.rip, return_address); + assert_eq!(ctx.rsp, frame1_sp.value().unwrap()); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_scan_without_symbols() { + // When the stack walker resorts to scanning the stack, + // only addresses located within loaded modules are + // considered valid return addresses. + // Force scanning through three frames to ensure that the + // stack pointer is set properly in scan-recovered frames. + let mut f = TestFixture::new(); + let mut stack = Section::new(); + let stack_start = 0x8000000080000000; + stack.start().set_const(stack_start); + + let return_address1 = 0x00007500b0000100; + let return_address2 = 0x00007500b0000900; + + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + let frame1_rbp = Label::new(); + stack = stack + // frame 0 + .append_repeated(0, 16) // space + .D64(0x00007400b0000000) // junk that's not + .D64(0x00007500d0000000) // a return address + .D64(return_address1) // actual return address + // frame 1 + .mark(&frame1_sp) + .append_repeated(0, 16) // space + .D64(0x00007400b0000000) // more junk + .D64(0x00007500d0000000) + .mark(&frame1_rbp) + .D64(stack_start) // This is in the right place to be + // a saved rbp, but it's bogus, so + // we shouldn't report it. + .D64(return_address2) // actual return address + // frame 2 + .mark(&frame2_sp) + .append_repeated(0, 32); // end of stack + + f.raw.rip = 0x00007400c0000200; + f.raw.rbp = frame1_rbp.value().unwrap(); + f.raw.rsp = stack.start().value().unwrap(); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 3); + + { + // To avoid reusing locals by mistake + let f0 = &s.frames[0]; + assert_eq!(f0.trust, FrameTrust::Context); + assert_eq!(f0.context.valid, MinidumpContextValidity::All); + } + + { + // To avoid reusing locals by mistake + let f1 = &s.frames[1]; + assert_eq!(f1.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = f1.context.valid { + assert!(which.contains("rip")); + assert!(which.contains("rsp")); + assert!(which.contains("rbp")); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Amd64(ctx) = &f1.context.raw { + assert_eq!(ctx.rip, return_address1); + assert_eq!(ctx.rsp, frame1_sp.value().unwrap()); + assert_eq!(ctx.rbp, frame1_rbp.value().unwrap()); + } else { + unreachable!(); + } + } + + { + // To avoid reusing locals by mistake + let f2 = &s.frames[2]; + assert_eq!(f2.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = f2.context.valid { + assert!(which.contains("rip")); + assert!(which.contains("rsp")); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Amd64(ctx) = &f2.context.raw { + assert_eq!(ctx.rip, return_address2); + assert_eq!(ctx.rsp, frame2_sp.value().unwrap()); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_scan_with_symbols() { + // Test that we can refine our scanning using symbols. Specifically we + // should be able to reject pointers that are in modules but don't map to + // any FUNC/PUBLIC record. + let mut f = TestFixture::new(); + let mut stack = Section::new(); + let stack_start = 0x8000000080000000u64; + stack.start().set_const(stack_start); + + let return_address = 0x00007500b0000110u64; + + let frame1_rsp = Label::new(); + let frame1_rbp = Label::new(); + stack = stack + // frame 0 + .append_repeated(0, 16) // space + .D64(0x00007400b0000000u64) // junk that's not + .D64(0x00007500b0000000u64) // a return address + .D64(0x00007400c0001000u64) // a couple of plausible addresses + .D64(0x00007500b000aaaau64) // that are not within functions + .D64(return_address) // actual return address + // frame 1 + .mark(&frame1_rsp) + .append_repeated(0, 32) + .mark(&frame1_rbp); // end of stack + + f.raw.rip = 0x00007400c0000200; + f.raw.rbp = frame1_rbp.value().unwrap(); + f.raw.rsp = stack.start().value().unwrap(); + + f.add_symbols( + String::from("module1"), + // The youngest frame's function. + String::from("FUNC 100 400 10 monotreme\n"), + ); + f.add_symbols( + String::from("module2"), + // The calling frame's function. + String::from("FUNC 100 400 10 marsupial\n"), + ); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Amd64(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("rip", valid).unwrap(), return_address); + assert_eq!( + ctx.get_register("rsp", valid).unwrap(), + frame1_rsp.value().unwrap() + ); + assert_eq!( + ctx.get_register("rbp", valid).unwrap(), + frame1_rbp.value().unwrap() + ); + } else { + unreachable!(); + } + } +} + +const CALLEE_SAVE_REGS: &[&str] = &["rip", "rbx", "rbp", "rsp", "r12", "r13", "r14", "r15"]; + +fn init_cfi_state() -> (TestFixture, Section, CONTEXT_AMD64, MinidumpContextValidity) { + let mut f = TestFixture::new(); + let symbols = [ + // The youngest frame's function. + "FUNC 4000 1000 10 enchiridion\n", + // Initially, just a return address. + "STACK CFI INIT 4000 100 .cfa: $rsp 8 + .ra: .cfa 8 - ^\n", + // Push %rbx. + "STACK CFI 4001 .cfa: $rsp 16 + $rbx: .cfa 16 - ^\n", + // Save %r12 in %rbx. Weird, but permitted. + "STACK CFI 4002 $r12: $rbx\n", + // Allocate frame space, and save %r13. + "STACK CFI 4003 .cfa: $rsp 40 + $r13: .cfa 32 - ^\n", + // Put the return address in %r13. + "STACK CFI 4005 .ra: $r13\n", + // Save %rbp, and use it as a frame pointer. + "STACK CFI 4006 .cfa: $rbp 16 + $rbp: .cfa 24 - ^\n", + // The calling function. + "FUNC 5000 1000 10 epictetus\n", + // Mark it as end of stack. + "STACK CFI INIT 5000 1000 .cfa: $rsp .ra 0\n", + ]; + f.add_symbols(String::from("module1"), symbols.concat()); + + f.raw.set_register("rsp", 0x8000000080000000); + f.raw.set_register("rip", 0x00007400c0005510); + f.raw.set_register("rbp", 0x68995b1de4700266); + f.raw.set_register("rbx", 0x5a5beeb38de23be8); + f.raw.set_register("r12", 0xed1b02e8cc0fc79c); + f.raw.set_register("r13", 0x1d20ad8acacbe930); + f.raw.set_register("r14", 0xe94cffc2f7adaa28); + f.raw.set_register("r15", 0xb638d17d8da413b5); + + let raw_valid = MinidumpContextValidity::All; + + let expected = f.raw.clone(); + let expected_regs = CALLEE_SAVE_REGS; + let expected_valid = MinidumpContextValidity::Some(expected_regs.iter().copied().collect()); + + let stack = Section::new(); + stack + .start() + .set_const(f.raw.get_register("rsp", &raw_valid).unwrap()); + + (f, stack, expected, expected_valid) +} + +async fn check_cfi( + f: TestFixture, + stack: Section, + expected: CONTEXT_AMD64, + expected_valid: MinidumpContextValidity, +) { + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 + if let MinidumpContextValidity::Some(ref expected_regs) = expected_valid { + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::CallFrameInfo); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), expected_regs.len()); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Amd64(ctx) = &frame.context.raw { + for reg in expected_regs { + assert_eq!( + ctx.get_register(reg, valid), + expected.get_register(reg, &expected_valid), + "{reg} registers didn't match!" + ); + } + return; + } + } + } + unreachable!(); +} + +#[tokio::test] +async fn test_cfi_at_4000() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_rsp = Label::new(); + stack = stack + .D64(0x00007400c0005510) + .mark(&frame1_rsp) + .append_repeated(0, 1000); + + expected.set_register("rsp", frame1_rsp.value().unwrap()); + f.raw.set_register("rip", 0x00007400c0004000); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4001() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_rsp = Label::new(); + stack = stack + .D64(0x5a5beeb38de23be8) // saved %rbx + .D64(0x00007400c0005510) // return address + .mark(&frame1_rsp) + .append_repeated(0, 1000); + + expected.set_register("rsp", frame1_rsp.value().unwrap()); + f.raw.set_register("rip", 0x00007400c0004001); + f.raw.set_register("rbx", 0xbe0487d2f9eafe29); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4002() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_rsp = Label::new(); + stack = stack + .D64(0x5a5beeb38de23be8) // saved %rbx + .D64(0x00007400c0005510) // return address + .mark(&frame1_rsp) + .append_repeated(0, 1000); + + expected.set_register("rsp", frame1_rsp.value().unwrap()); + f.raw.set_register("rip", 0x00007400c0004002); + f.raw.set_register("rbx", 0xed1b02e8cc0fc79c); // saved %r12 + f.raw.set_register("r12", 0xb0118de918a4bcea); // callee's (distinct) %r12 value + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4003() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_rsp = Label::new(); + stack = stack + .D64(0x0e023828dffd4d81) // garbage + .D64(0x1d20ad8acacbe930) // saved %r13 + .D64(0x319e68b49e3ace0f) // garbage + .D64(0x5a5beeb38de23be8) // saved %rbx + .D64(0x00007400c0005510) // return address + .mark(&frame1_rsp) + .append_repeated(0, 1000); + + expected.set_register("rsp", frame1_rsp.value().unwrap()); + f.raw.set_register("rip", 0x00007400c0004003); + f.raw.set_register("rbx", 0xed1b02e8cc0fc79c); // saved %r12 + f.raw.set_register("r12", 0x89d04fa804c87a43); // callee's (distinct) %r12 + f.raw.set_register("r13", 0x5118e02cbdb24b03); // callee's (distinct) %r13 + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4004() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_rsp = Label::new(); + stack = stack + .D64(0x0e023828dffd4d81) // garbage + .D64(0x1d20ad8acacbe930) // saved %r13 + .D64(0x319e68b49e3ace0f) // garbage + .D64(0x5a5beeb38de23be8) // saved %rbx + .D64(0x00007400c0005510) // return address + .mark(&frame1_rsp) + .append_repeated(0, 1000); + + expected.set_register("rsp", frame1_rsp.value().unwrap()); + f.raw.set_register("rip", 0x00007400c0004004); + f.raw.set_register("rbx", 0xed1b02e8cc0fc79c); // saved %r12 + f.raw.set_register("r12", 0x46b1b8868891b34a); // callee's (distinct) %r12 + f.raw.set_register("r13", 0x5118e02cbdb24b03); // callee's (distinct) %r13 + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4005() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_rsp = Label::new(); + stack = stack + .D64(0x4b516dd035745953) // garbage + .D64(0x1d20ad8acacbe930) // saved %r13 + .D64(0xa6d445e16ae3d872) // garbage + .D64(0x5a5beeb38de23be8) // saved %rbx + .D64(0xaa95fa054aedfbae) // garbage + .mark(&frame1_rsp) + .append_repeated(0, 1000); + + expected.set_register("rsp", frame1_rsp.value().unwrap()); + f.raw.set_register("rip", 0x00007400c0004005); + f.raw.set_register("rbx", 0xed1b02e8cc0fc79c); // saved %r12 + f.raw.set_register("r12", 0x46b1b8868891b34a); // callee's %r12 + f.raw.set_register("r13", 0x00007400c0005510); // return address + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4006() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame0_rbp = Label::new(); + let frame1_rsp = Label::new(); + stack = stack + .D64(0x043c6dfceb91aa34) // garbage + .D64(0x1d20ad8acacbe930) // saved %r13 + .D64(0x68995b1de4700266) // saved %rbp + .mark(&frame0_rbp) // frame pointer points here + .D64(0x5a5beeb38de23be8) // saved %rbx + .D64(0xf015ee516ad89eab) // garbage + .mark(&frame1_rsp) + .append_repeated(0, 1000); + + expected.set_register("rsp", frame1_rsp.value().unwrap()); + f.raw.set_register("rip", 0x00007400c0004006); + f.raw.set_register("rbp", frame0_rbp.value().unwrap()); + f.raw.set_register("rbx", 0xed1b02e8cc0fc79c); // saved %r12 + f.raw.set_register("r12", 0x26e007b341acfebd); // callee's %r12 + f.raw.set_register("r13", 0x00007400c0005510); // return address + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_frame_pointer_overflow() { + // Make sure we don't explode when trying frame pointer analysis on a value + // that will overflow. + + type Pointer = u64; + let stack_max: Pointer = Pointer::MAX; + let stack_size: Pointer = 1000; + let bad_frame_ptr: Pointer = stack_max; + + let mut f = TestFixture::new(); + let mut stack = Section::new(); + let stack_start: Pointer = stack_max - stack_size; + stack.start().set_const(stack_start); + + stack = stack + // frame 0 + .append_repeated(0, stack_size as usize); // junk, not important to the test + + f.raw.rip = 0x00007400c0000200; + f.raw.rbp = bad_frame_ptr; + f.raw.rsp = stack.start().value().unwrap() as Pointer; + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 1); + + // As long as we don't panic, we're good! +} + +#[tokio::test] +async fn test_frame_pointer_barely_no_overflow() { + // This is test_caller_pushed_rbp but with the all the values pushed + // as close to the upper memory boundary as possible, to confirm that + // our code doesn't randomly overflow *AND* isn't overzealous in + // its overflow guards. + + let mut f = TestFixture::new(); + let mut stack = Section::new(); + + type Pointer = u64; + let stack_max: Pointer = Pointer::MAX; + let pointer_size: Pointer = std::mem::size_of::() as Pointer; + let stack_size: Pointer = pointer_size * 3; + + let stack_start: Pointer = stack_max - stack_size; + let return_address: Pointer = 0x00007500b0000110; + stack.start().set_const(stack_start); + + let frame0_fp = Label::new(); + let frame1_sp = Label::new(); + let frame1_fp = Label::new(); + + stack = stack + // frame 0 + .mark(&frame0_fp) + .D64(&frame1_fp) // caller-pushed %rbp + .D64(return_address) // actual return address + // frame 1 + .mark(&frame1_sp) + .mark(&frame1_fp) // end of stack + .D64(0); + + f.raw.rip = 0x00007400c0000200; + f.raw.rbp = frame0_fp.value().unwrap() as Pointer; + f.raw.rsp = stack.start().value().unwrap(); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // To avoid reusing locals by mistake + let f0 = &s.frames[0]; + assert_eq!(f0.trust, FrameTrust::Context); + assert_eq!(f0.context.valid, MinidumpContextValidity::All); + if let MinidumpRawContext::Amd64(ctx) = &f0.context.raw { + assert_eq!(ctx.rbp, frame0_fp.value().unwrap() as Pointer); + } else { + unreachable!(); + } + } + + { + // To avoid reusing locals by mistake + let f1 = &s.frames[1]; + assert_eq!(f1.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = f1.context.valid { + assert!(which.contains("rip")); + assert!(which.contains("rsp")); + assert!(which.contains("rbp")); + } else { + unreachable!(); + } + if let MinidumpRawContext::Amd64(ctx) = &f1.context.raw { + assert_eq!(ctx.rip, return_address); + assert_eq!(ctx.rsp, frame1_sp.value().unwrap() as Pointer); + assert_eq!(ctx.rbp, frame1_fp.value().unwrap() as Pointer); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_scan_walk_overflow() { + // There's a possible overflow when address_of_ip starts out at 0. + // + // To avoid this, we only try to recover rbp when we're scanning at least + // 1 pointer width away from the start of the stack. + let mut f = TestFixture::new(); + let mut stack = Section::new(); + let stack_start = 0; + stack.start().set_const(stack_start); + + let return_address1 = 0x00007500b0000100_u64; + + let frame1_sp = Label::new(); + let frame1_rbp = Label::new(); + + stack = stack + // frame 0 + .D64(return_address1) // actual return address + // frame 1 + .mark(&frame1_sp) + .append_repeated(0, 16) // space + .D64(0x00007400b0000000) // more junk + .D64(0x00007500d0000000) + .mark(&frame1_rbp); + + f.raw.rip = 0x00007400c0000200; + f.raw.rbp = frame1_rbp.value().unwrap(); + f.raw.rsp = stack.start().value().unwrap(); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // To avoid reusing locals by mistake + let f0 = &s.frames[0]; + assert_eq!(f0.trust, FrameTrust::Context); + assert_eq!(f0.context.valid, MinidumpContextValidity::All); + } + + { + // To avoid reusing locals by mistake + let f1 = &s.frames[1]; + assert_eq!(f1.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = f1.context.valid { + assert!(which.contains("rip")); + assert!(which.contains("rsp")); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Amd64(ctx) = &f1.context.raw { + assert_eq!(ctx.rip, return_address1); + assert_eq!(ctx.rsp, frame1_sp.value().unwrap()); + // We were unable to recover rbp, so it defaulted to 0. + assert_eq!(ctx.rbp, 0); + } else { + unreachable!(); + } + } +} diff --git a/third_party/rust/minidump-unwind/src/arm.rs b/third_party/rust/minidump-unwind/src/arm.rs new file mode 100644 index 000000000000..bd669883b852 --- /dev/null +++ b/third_party/rust/minidump-unwind/src/arm.rs @@ -0,0 +1,334 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +// NOTE: arm64_old.rs and arm64.rs should be identical except for the names of +// their context types. + +use super::impl_prelude::*; +use minidump::system_info::Os; +use minidump::{ + CpuContext, MinidumpContext, MinidumpContextValidity, MinidumpModuleList, MinidumpRawContext, +}; +use std::collections::HashSet; +use tracing::trace; + +type ArmContext = minidump::format::CONTEXT_ARM; +type Pointer = ::Register; +type Registers = minidump::format::ArmRegisterNumbers; + +const POINTER_WIDTH: Pointer = std::mem::size_of::() as Pointer; +const FRAME_POINTER: &str = Registers::FramePointer.name(); +const STACK_POINTER: &str = Registers::StackPointer.name(); +const PROGRAM_COUNTER: &str = Registers::ProgramCounter.name(); +const _LINK_REGISTER: &str = Registers::LinkRegister.name(); +const CALLEE_SAVED_REGS: &[&str] = &["r4", "r5", "r6", "r7", "r8", "r9", "r10", "fp"]; + +async fn get_caller_by_cfi

( + ctx: &ArmContext, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + trace!("trying cfi"); + + let _last_sp = ctx.get_register(STACK_POINTER, args.valid())?; + + let mut stack_walker = CfiStackWalker::from_ctx_and_args(ctx, args, callee_forwarded_regs)?; + + args.symbol_provider + .walk_frame(stack_walker.module, &mut stack_walker) + .await?; + let caller_pc = stack_walker.caller_ctx.get_register_always(PROGRAM_COUNTER); + let caller_sp = stack_walker.caller_ctx.get_register_always(STACK_POINTER); + + trace!( + "cfi evaluation was successful -- caller_pc: 0x{:016x}, caller_sp: 0x{:016x}", + caller_pc, + caller_sp, + ); + + // Do absolutely NO validation! Yep! As long as CFI evaluation succeeds + // (which does include pc and sp resolving), just blindly assume the + // values are correct. I Don't Like This, but it's what breakpad does and + // we should start with a baseline of parity. + + let context = MinidumpContext { + raw: MinidumpRawContext::Arm(stack_walker.caller_ctx), + valid: MinidumpContextValidity::Some(stack_walker.caller_validity), + }; + Some(StackFrame::from_context(context, FrameTrust::CallFrameInfo)) +} + +fn callee_forwarded_regs(valid: &MinidumpContextValidity) -> HashSet<&'static str> { + match valid { + MinidumpContextValidity::All => CALLEE_SAVED_REGS.iter().copied().collect(), + MinidumpContextValidity::Some(ref which) => CALLEE_SAVED_REGS + .iter() + .filter(|®| which.contains(reg)) + .copied() + .collect(), + } +} + +fn get_caller_by_frame_pointer

( + ctx: &ArmContext, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + // The ARM manual states that: + // > LR can be used for other purposes when it is not required to support + // > a return from a subroutine. + // In other words, we need to be conservative and treat it as a general + // purpose register. Except on iOS, which has stricter conventions around + // register use, and does guarantee that LR contains a valid return addr. + if args.system_info.os != Os::Ios { + return None; + } + + trace!("trying frame pointer"); + // Assume that the standard %fp-using ARM calling convention is in use. + // The main quirk of this ABI is that the return address doesn't need to + // be restored from the stack -- it's already in the link register (lr). + // But that means we need to save/restore lr itself so that the *caller's* + // return address can be recovered. + // + // In the standard calling convention, the following happens: + // + // lr := return_address (done implicitly by a call) + // PUSH fp, lr (save fp and lr to the stack -- ARM pushes in pairs) + // fp := sp (update the frame pointer to the current stack pointer) + // + // So to restore the caller's registers, we have: + // + // sp := fp + ptr*2 + // pc := *(fp + ptr) + // fp := *fp + let last_fp = ctx.get_register(FRAME_POINTER, args.valid())?; + let last_sp = ctx.get_register(STACK_POINTER, args.valid())?; + + if last_fp >= u32::MAX - POINTER_WIDTH * 2 { + // Although this code generally works fine if the pointer math overflows, + // debug builds will still panic, and this guard protects against it without + // drowning the rest of the code in checked_add. + return None; + } + let (caller_fp, caller_pc, caller_sp) = if last_fp == 0 { + // In this case we want unwinding to stop. One of the termination conditions in get_caller_frame + // is that caller_sp <= last_sp. Therefore we can force termination by setting caller_sp = last_sp. + (0, 0, last_sp) + } else { + ( + args.stack_memory.get_memory_at_address(last_fp as u64)?, + args.stack_memory + .get_memory_at_address(last_fp as u64 + POINTER_WIDTH as u64)?, + last_fp + POINTER_WIDTH * 2, + ) + }; + + // Don't do any more validation, just assume it worked. + + trace!( + "frame pointer seems valid -- caller_pc: 0x{:016x}, caller_sp: 0x{:016x}", + caller_pc, + caller_sp, + ); + + let mut caller_ctx = ArmContext::default(); + caller_ctx.set_register(PROGRAM_COUNTER, caller_pc); + caller_ctx.set_register(FRAME_POINTER, caller_fp); + caller_ctx.set_register(STACK_POINTER, caller_sp); + + let mut valid = HashSet::new(); + valid.insert(PROGRAM_COUNTER); + valid.insert(FRAME_POINTER); + valid.insert(STACK_POINTER); + + let context = MinidumpContext { + raw: MinidumpRawContext::Arm(caller_ctx), + valid: MinidumpContextValidity::Some(valid), + }; + Some(StackFrame::from_context(context, FrameTrust::FramePointer)) +} + +async fn get_caller_by_scan

( + ctx: &ArmContext, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + trace!("trying scan"); + // Stack scanning is just walking from the end of the frame until we encounter + // a value on the stack that looks like a pointer into some code (it's an address + // in a range covered by one of our modules). If we find such an instruction, + // we assume it's an pc value that was pushed by the CALL instruction that created + // the current frame. The next frame is then assumed to end just before that + // pc value. + let last_sp = ctx.get_register(STACK_POINTER, args.valid())?; + + // Number of pointer-sized values to scan through in our search. + let default_scan_range = 40; + let extended_scan_range = default_scan_range * 4; + + // Breakpad devs found that the first frame of an unwind can be really messed up, + // and therefore benefits from a longer scan. Let's do it too. + let scan_range = if let FrameTrust::Context = args.callee_frame.trust { + extended_scan_range + } else { + default_scan_range + }; + + for i in 0..scan_range { + let address_of_pc = last_sp.checked_add(i * POINTER_WIDTH)?; + let caller_pc = args + .stack_memory + .get_memory_at_address(address_of_pc as u64)?; + if instruction_seems_valid(caller_pc, args.modules, args.symbol_provider).await { + // pc is pushed by CALL, so sp is just address_of_pc + ptr + let caller_sp = address_of_pc.checked_add(POINTER_WIDTH)?; + + // Don't do any more validation, and don't try to restore fp + // (that's what breakpad does!) + + trace!( + "scan seems valid -- caller_pc: 0x{:08x}, caller_sp: 0x{:08x}", + caller_pc, + caller_sp, + ); + + let mut caller_ctx = ArmContext::default(); + caller_ctx.set_register(PROGRAM_COUNTER, caller_pc); + caller_ctx.set_register(STACK_POINTER, caller_sp); + + let mut valid = HashSet::new(); + valid.insert(PROGRAM_COUNTER); + valid.insert(STACK_POINTER); + + let context = MinidumpContext { + raw: MinidumpRawContext::Arm(caller_ctx), + valid: MinidumpContextValidity::Some(valid), + }; + return Some(StackFrame::from_context(context, FrameTrust::Scan)); + } + } + + None +} + +/// The most strict validation we have for instruction pointers. +/// +/// This is only used for stack-scanning, because it's explicitly +/// trying to distinguish between total garbage and correct values. +/// cfi and frame_pointer approaches do not use this validation +/// because by default they're working with plausible/trustworthy +/// data. +/// +/// Specifically, not using this validation allows cfi/fp methods +/// to unwind through frames we don't have mapped modules for (such as +/// OS APIs). This may seem confusing since we obviously don't have cfi +/// for unmapped modules! +/// +/// The way this works is that we will use cfi to unwind some frame we +/// know about and *end up* in a function we know nothing about, but with +/// all the right register values. At this point, frame pointers will +/// often do the correct thing even though we don't know what code we're +/// in -- until we get back into code we do know about and cfi kicks back in. +/// At worst, this sets scanning up in a better position for success! +/// +/// If we applied this more rigorous validation to cfi/fp methods, we +/// would just discard the correct register values from the known frame +/// and immediately start doing unreliable scans. +async fn instruction_seems_valid

( + instruction: Pointer, + modules: &MinidumpModuleList, + symbol_provider: &P, +) -> bool +where + P: SymbolProvider + Sync, +{ + super::instruction_seems_valid_by_symbols(instruction as u64, modules, symbol_provider).await +} + +/* +// ARM is currently hyper-permissive, so we don't use this, +// but here it is in case we change our minds! +fn stack_seems_valid( + caller_sp: Pointer, + callee_sp: Pointer, + stack_memory: UnifiedMemory<'_, '_>, +) -> bool { + // The stack shouldn't *grow* when we unwind + if caller_sp < callee_sp { + return false; + } + + // The stack pointer should be in the stack + stack_memory + .get_memory_at_address::(caller_sp as u64) + .is_some() +} +*/ + +pub async fn get_caller_frame

( + ctx: &ArmContext, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + // .await doesn't like closures, so don't use Option chaining + let mut frame = None; + if frame.is_none() { + frame = get_caller_by_cfi(ctx, args).await; + } + if frame.is_none() { + frame = get_caller_by_frame_pointer(ctx, args); + } + if frame.is_none() { + frame = get_caller_by_scan(ctx, args).await; + } + let mut frame = frame?; + + // We now check the frame to see if it looks like unwinding is complete, + // based on the frame we computed having a nonsense value. Returning + // None signals to the unwinder to stop unwinding. + + // if the instruction is within the first ~page of memory, it's basically + // null, and we can assume unwinding is complete. + if frame.context.get_instruction_pointer() < 4096 { + trace!("instruction pointer was nullish, assuming unwind complete"); + return None; + } + // If the new stack pointer is at a lower address than the old, + // then that's clearly incorrect. Treat this as end-of-stack to + // enforce progress and avoid infinite loops. + let sp = frame.context.get_stack_pointer(); + let last_sp = ctx.get_register_always("sp") as u64; + if sp <= last_sp { + // Arm leaf functions may not actually touch the stack (thanks + // to the link register allowing you to "push" the return address + // to a register), so we need to permit the stack pointer to not + // change for the first frame of the unwind. After that we need + // more strict validation to avoid infinite loops. + let is_leaf = args.callee_frame.trust == FrameTrust::Context && sp == last_sp; + if !is_leaf { + trace!("stack pointer went backwards, assuming unwind complete"); + return None; + } + } + + // Ok, the frame now seems well and truly valid, do final cleanup. + + // A caller's ip is the return address, which is the instruction + // *after* the CALL that caused us to arrive at the callee. Set + // the value to 2 less than that, so it points to the CALL instruction + // (arm instructions are all 2 bytes wide). This is important because + // we use this value to lookup the CFI we need to unwind the next frame. + let ip = frame.context.get_instruction_pointer(); + frame.instruction = ip - 2; + + Some(frame) +} diff --git a/third_party/rust/minidump-unwind/src/arm64.rs b/third_party/rust/minidump-unwind/src/arm64.rs new file mode 100644 index 000000000000..4aae7fc565f6 --- /dev/null +++ b/third_party/rust/minidump-unwind/src/arm64.rs @@ -0,0 +1,475 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +// NOTE: arm64_old.rs and arm64.rs should be identical except for the names of +// their context types. + +use super::impl_prelude::*; +use minidump::{ + CpuContext, MinidumpContext, MinidumpContextValidity, MinidumpModuleList, MinidumpRawContext, + Module, +}; +use std::collections::HashSet; +use tracing::trace; + +type ArmContext = minidump::format::CONTEXT_ARM64; +type Pointer = ::Register; +type Registers = minidump::format::Arm64RegisterNumbers; + +const POINTER_WIDTH: Pointer = std::mem::size_of::() as Pointer; +const FRAME_POINTER: &str = Registers::FramePointer.name(); +const LINK_REGISTER: &str = Registers::LinkRegister.name(); +const STACK_POINTER: &str = "sp"; +const PROGRAM_COUNTER: &str = "pc"; +const CALLEE_SAVED_REGS: &[&str] = &[ + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "fp", +]; + +async fn get_caller_by_cfi

( + ctx: &ArmContext, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + trace!("trying cfi"); + + let _last_sp = ctx.get_register(STACK_POINTER, args.valid())?; + + let mut stack_walker = CfiStackWalker::from_ctx_and_args(ctx, args, callee_forwarded_regs)?; + + args.symbol_provider + .walk_frame(stack_walker.module, &mut stack_walker) + .await?; + + let caller_pc = stack_walker.caller_ctx.get_register_always(PROGRAM_COUNTER); + let caller_sp = stack_walker.caller_ctx.get_register_always(STACK_POINTER); + let new_valid = MinidumpContextValidity::Some(stack_walker.caller_validity); + + // Apply ptr auth stripping + let caller_pc = ptr_auth_strip(args.modules, caller_pc); + stack_walker + .caller_ctx + .set_register(PROGRAM_COUNTER, caller_pc); + // Nothing should really ever restore lr, but CFI is more magic so whatever sure + if let Some(lr) = stack_walker + .caller_ctx + .get_register(LINK_REGISTER, &new_valid) + { + stack_walker + .caller_ctx + .set_register(LINK_REGISTER, ptr_auth_strip(args.modules, lr)); + } + if let Some(fp) = stack_walker + .caller_ctx + .get_register(FRAME_POINTER, &new_valid) + { + stack_walker + .caller_ctx + .set_register(FRAME_POINTER, ptr_auth_strip(args.modules, fp)); + } + + trace!( + "cfi evaluation was successful -- caller_pc: 0x{:016x}, caller_sp: 0x{:016x}", + caller_pc, + caller_sp, + ); + + // Do absolutely NO validation! Yep! As long as CFI evaluation succeeds + // (which does include pc and sp resolving), just blindly assume the + // values are correct. I Don't Like This, but it's what breakpad does and + // we should start with a baseline of parity. + + // FIXME?: for whatever reason breakpad actually does block on the address + // being canonical *ONLY* for arm64, which actually rejects null pc early! + // Let's not do that to keep our code more uniform. + + let context = MinidumpContext { + raw: MinidumpRawContext::Arm64(stack_walker.caller_ctx), + valid: new_valid, + }; + Some(StackFrame::from_context(context, FrameTrust::CallFrameInfo)) +} + +fn callee_forwarded_regs(valid: &MinidumpContextValidity) -> HashSet<&'static str> { + match valid { + MinidumpContextValidity::All => CALLEE_SAVED_REGS.iter().copied().collect(), + MinidumpContextValidity::Some(ref which) => CALLEE_SAVED_REGS + .iter() + .filter(|®| which.contains(reg)) + .copied() + .collect(), + } +} + +fn get_caller_by_frame_pointer

( + ctx: &ArmContext, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + trace!("trying frame pointer"); + // Ok so there exists 3 kinds of stackframes in ARM64: + // + // * stackless leaves + // * stackful leaves + // * normal frames + // + // + // # Normal Frames + // + // Let's start with normal frames. In the standard calling convention, the following happens: + // + // lr := return_address (performed implicitly by ARM's function call instruction) + // PUSH fp, lr (save fp and lr to the stack -- ARM64 pushes in pairs) + // fp := sp (update the frame pointer to the current stack pointer) + // + // So to restore the caller's registers, we have: + // + // pc := *(fp + ptr) (this will get the return address, usual offset caveats apply) + // sp := fp + ptr*2 + // fp := *fp + // + // Note that although we push lr, we don't restore lr. That's because lr is just our + // return address, and is therefore essentially a "saved" pc. lr is caller-saved *and* + // automatically overwritten by every CALL, so the callee (the frame we're unwinding right now) + // has no business ever knowing it, let alone restoring it. lr is generally just saved + // immediately and then used as a free general purpose register, and therefore will generally + // contain random garbage unrelated to unwinding. + // + // + // # Leaf Functions + // + // Now leaf functions are a bit messier. These are functions which don't call other functions + // and therefore don't actually ever need to save lr or fp. As such, they can be entirely + // stackless, although they don't have to be. So calling a leaf function is just: + // + // lr := return_address + // + // + // And to restore the caller's registers, we have: + // + // pc := lr + // sp := sp - + // fp := fp + // + // Unfortunately, we're unaware of any way to "detect" that a function is a leaf or not + // without symbols/cfi just telling you that. Since we're in frame pointer unwinding, + // we probably don't have those available! And even if we did, we still wouldn't know if + // the frame was stackless or not, so we wouldn't know how to restore sp reliably and might + // get the stack in a weird state for subsequent (possibly CFI-based) frames. + // Also, if we incorrectly guess a frame is a leaf, we'll also use a probably-random-garbage + // lr as a pc and potentially halluncinate a bunch. + // + // + // # Conclusion + // + // At the moment we think it's safest/best to just always assume we're unwinding a normal + // frame. Statistically this is true (most frames are, even if they happen to be at the + // top of the stack when we crash), and if the frame *is* a leaf then our `fp` is likely + // to be the correct fp of the next frame. This will effectively result in us unwinding + // our caller instead of ourselves, causing the caller to be omitted from the backtrace + // but otherwise perfectly syncing up for the rest of the frames. + let last_fp = ctx.get_register(FRAME_POINTER, args.valid())?; + let last_sp = ctx.get_register(STACK_POINTER, args.valid())?; + + if last_fp >= u64::MAX - POINTER_WIDTH * 2 { + // Although this code generally works fine if the pointer math overflows, + // debug builds will still panic, and this guard protects against it without + // drowning the rest of the code in checked_add. + return None; + } + + let (caller_fp, caller_pc, caller_sp) = if last_fp == 0 { + // In this case we want unwinding to stop. One of the termination conditions in get_caller_frame + // is that caller_sp <= last_sp. Therefore we can force termination by setting caller_sp = last_sp. + (0, 0, last_sp) + } else { + ( + args.stack_memory.get_memory_at_address(last_fp)?, + args.stack_memory + .get_memory_at_address(last_fp + POINTER_WIDTH)?, + last_fp + POINTER_WIDTH * 2, + ) + }; + let caller_fp = ptr_auth_strip(args.modules, caller_fp); + let caller_pc = ptr_auth_strip(args.modules, caller_pc); + + // Don't accept obviously wrong instruction pointers. + if is_non_canonical(caller_pc) { + trace!("rejecting frame pointer result for unreasonable instruction pointer"); + return None; + } + + // Don't actually validate that the stack makes sense (duplicating breakpad behaviour). + + trace!( + "frame pointer seems valid -- caller_pc: 0x{:016x}, caller_sp: 0x{:016x}", + caller_pc, + caller_sp, + ); + + let mut caller_ctx = ArmContext::default(); + caller_ctx.set_register(PROGRAM_COUNTER, caller_pc); + caller_ctx.set_register(FRAME_POINTER, caller_fp); + caller_ctx.set_register(STACK_POINTER, caller_sp); + + let mut valid = HashSet::new(); + valid.insert(PROGRAM_COUNTER); + valid.insert(FRAME_POINTER); + valid.insert(STACK_POINTER); + + let context = MinidumpContext { + raw: MinidumpRawContext::Arm64(caller_ctx), + valid: MinidumpContextValidity::Some(valid), + }; + Some(StackFrame::from_context(context, FrameTrust::FramePointer)) +} + +fn ptr_auth_strip(modules: &MinidumpModuleList, ptr: Pointer) -> Pointer { + // ARMv8.3 introduced a code hardening system called "Pointer Authentication" + // which is used on Apple platforms. It adds some extra high bits to the + // several pointers when they get pushed to memory, including the return + // address (lr) and frame pointer (fp), which both get pushed at the start + // of most non-leaf functions. + // + // We lack some of the proper context to implement the "strip" primitive, because + // the amount of bits that are "real" pointer depends on various extensions like + // pointer tagging and how big page tables are. If we allocate too many bits to + // "real" then we can get ptr_auth bits in our pointers, and if we allocate too + // few we can end up truncating our pointers. Thankfully we'll usually have a bit + // of margin from pointers not having the highest real bits set. + // + // To help us guess, we have a few pieces of information: + // + // * Apple seems to default to a 17/47 split, so 47 bits for "real" is a good baseline + // * We know the address ranges of various loaded (and unloaded modules) + // * We know the address range of the stacks + // * We *can* know the address range of some sections of the heap (MemoryList) + // * We *can* know the page mappings (MemoryInfo) + // + // Right now we only incorporate the first two. Ideally we would process all those sources + // once at the start of stack walking and pass it down to the ARM stackwalker but that's + // a lot of annoying rewiring that won't necessarily improve results. + let apple_default_max_addr = (1 << 47) - 1; + let max_module_addr = modules + .by_addr() + .next_back() + .map(|last_module| { + last_module + .base_address() + .saturating_add(last_module.size()) + }) + .unwrap_or(0); + let max_addr = u64::max(apple_default_max_addr, max_module_addr); + + // We can convert a "highest" address into a suitable mask by getting the next_power_of_two + // (a single bit >= the max) and subtracting one from it (producing all 1's <= that bit). + // There are two corner cases to this: + // + // * the next_power_of_two being 2^65, in which case our mask should be !0 (all ones) + // * the max addr being a power of two already means we will actually lose that one value + // + // The first case is handled by using checked_next_power_of_two. The second case isn't really + // handled by it very improbable. We do however make sure the apple max isn't a power of two. + let mask = max_addr + .checked_next_power_of_two() + .map(|high_bit| high_bit - 1) + .unwrap_or(!0); + + // In principle, if we've done a good job of computing the mask, we can apply it regardless + // of if there's any ptr auth bits. Either it will clear the auth or be a noop. We don't + // check if this messes up, because there's too many subtleties like JITed code to reliably + // detect this going awry. + ptr & mask +} + +async fn get_caller_by_scan

( + ctx: &ArmContext, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + trace!("trying scan"); + // Stack scanning is just walking from the end of the frame until we encounter + // a value on the stack that looks like a pointer into some code (it's an address + // in a range covered by one of our modules). If we find such an instruction, + // we assume it's an pc value that was pushed by the CALL instruction that created + // the current frame. The next frame is then assumed to end just before that + // pc value. + let last_sp = ctx.get_register(STACK_POINTER, args.valid())?; + + // Number of pointer-sized values to scan through in our search. + let default_scan_range = 40; + let extended_scan_range = default_scan_range * 4; + + // Breakpad devs found that the first frame of an unwind can be really messed up, + // and therefore benefits from a longer scan. Let's do it too. + let scan_range = if let FrameTrust::Context = args.callee_frame.trust { + extended_scan_range + } else { + default_scan_range + }; + + for i in 0..scan_range { + let address_of_pc = last_sp.checked_add(i * POINTER_WIDTH)?; + let caller_pc = args.stack_memory.get_memory_at_address(address_of_pc)?; + if instruction_seems_valid(caller_pc, args.modules, args.symbol_provider).await { + // pc is pushed by CALL, so sp is just address_of_pc + ptr + let caller_sp = address_of_pc.checked_add(POINTER_WIDTH)?; + + // Don't do any more validation, and don't try to restore fp + // (that's what breakpad does!) + + trace!( + "scan seems valid -- caller_pc: 0x{:08x}, caller_sp: 0x{:08x}", + caller_pc, + caller_sp, + ); + + let mut caller_ctx = ArmContext::default(); + caller_ctx.set_register(PROGRAM_COUNTER, caller_pc); + caller_ctx.set_register(STACK_POINTER, caller_sp); + + let mut valid = HashSet::new(); + valid.insert(PROGRAM_COUNTER); + valid.insert(STACK_POINTER); + + let context = MinidumpContext { + raw: MinidumpRawContext::Arm64(caller_ctx), + valid: MinidumpContextValidity::Some(valid), + }; + return Some(StackFrame::from_context(context, FrameTrust::Scan)); + } + } + + None +} + +/// The most strict validation we have for instruction pointers. +/// +/// This is only used for stack-scanning, because it's explicitly +/// trying to distinguish between total garbage and correct values. +/// cfi and frame_pointer approaches do not use this validation +/// because by default they're working with plausible/trustworthy +/// data. +/// +/// Specifically, not using this validation allows cfi/fp methods +/// to unwind through frames we don't have mapped modules for (such as +/// OS APIs). This may seem confusing since we obviously don't have cfi +/// for unmapped modules! +/// +/// The way this works is that we will use cfi to unwind some frame we +/// know about and *end up* in a function we know nothing about, but with +/// all the right register values. At this point, frame pointers will +/// often do the correct thing even though we don't know what code we're +/// in -- until we get back into code we do know about and cfi kicks back in. +/// At worst, this sets scanning up in a better position for success! +/// +/// If we applied this more rigorous validation to cfi/fp methods, we +/// would just discard the correct register values from the known frame +/// and immediately start doing unreliable scans. +async fn instruction_seems_valid

( + instruction: Pointer, + modules: &MinidumpModuleList, + symbol_provider: &P, +) -> bool +where + P: SymbolProvider + Sync, +{ + if is_non_canonical(instruction) || instruction == 0 { + return false; + } + + super::instruction_seems_valid_by_symbols(instruction, modules, symbol_provider).await +} + +fn is_non_canonical(instruction: Pointer) -> bool { + // Reject instructions in the first page or above the user-space threshold. + !(0x1000..=0x000fffffffffffff).contains(&instruction) +} + +/* +// ARM64 is currently hyper-permissive, so we don't use this, +// but here it is in case we change our minds! +fn stack_seems_valid( + caller_sp: Pointer, + callee_sp: Pointer, + stack_memory: UnifiedMemory<'_, '_>, +) -> bool { + // The stack shouldn't *grow* when we unwind + if caller_sp < callee_sp { + return false; + } + + // The stack pointer should be in the stack + stack_memory + .get_memory_at_address::(caller_sp as u64) + .is_some() +} +*/ + +pub async fn get_caller_frame

( + ctx: &ArmContext, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + // .await doesn't like closures, so don't use Option chaining + let mut frame = None; + if frame.is_none() { + frame = get_caller_by_cfi(ctx, args).await; + } + if frame.is_none() { + frame = get_caller_by_frame_pointer(ctx, args); + } + if frame.is_none() { + frame = get_caller_by_scan(ctx, args).await; + } + let mut frame = frame?; + + // We now check the frame to see if it looks like unwinding is complete, + // based on the frame we computed having a nonsense value. Returning + // None signals to the unwinder to stop unwinding. + + // if the instruction is within the first ~page of memory, it's basically + // null, and we can assume unwinding is complete. + if frame.context.get_instruction_pointer() < 4096 { + trace!("instruction pointer was nullish, assuming unwind complete"); + return None; + } + + // If the new stack pointer is at a lower address than the old, + // then that's clearly incorrect. Treat this as end-of-stack to + // enforce progress and avoid infinite loops. + + let sp = frame.context.get_stack_pointer(); + let last_sp = ctx.get_register_always("sp"); + if sp <= last_sp { + // Arm leaf functions may not actually touch the stack (thanks + // to the link register allowing you to "push" the return address + // to a register), so we need to permit the stack pointer to not + // change for the first frame of the unwind. After that we need + // more strict validation to avoid infinite loops. + let is_leaf = args.callee_frame.trust == FrameTrust::Context && sp == last_sp; + if !is_leaf { + trace!("stack pointer went backwards, assuming unwind complete"); + return None; + } + } + + // Ok, the frame now seems well and truly valid, do final cleanup. + + // A caller's ip is the return address, which is the instruction + // *after* the CALL that caused us to arrive at the callee. Set + // the value to 4 less than that, so it points to the CALL instruction + // (arm64 instructions are all 4 bytes wide). This is important because + // we use this value to lookup the CFI we need to unwind the next frame. + let ip = frame.context.get_instruction_pointer(); + frame.instruction = ip - 4; + + Some(frame) +} diff --git a/third_party/rust/minidump-unwind/src/arm64_old.rs b/third_party/rust/minidump-unwind/src/arm64_old.rs new file mode 100644 index 000000000000..8c4d9204b6e8 --- /dev/null +++ b/third_party/rust/minidump-unwind/src/arm64_old.rs @@ -0,0 +1,475 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +// NOTE: arm64_old.rs and arm64.rs should be identical except for the names of +// their context types. + +use super::impl_prelude::*; +use minidump::{ + CpuContext, MinidumpContext, MinidumpContextValidity, MinidumpModuleList, MinidumpRawContext, + Module, +}; +use std::collections::HashSet; +use tracing::trace; + +type ArmContext = minidump::format::CONTEXT_ARM64_OLD; +type Pointer = ::Register; +type Registers = minidump::format::Arm64RegisterNumbers; + +const POINTER_WIDTH: Pointer = std::mem::size_of::() as Pointer; +const FRAME_POINTER: &str = Registers::FramePointer.name(); +const LINK_REGISTER: &str = Registers::LinkRegister.name(); +const STACK_POINTER: &str = "sp"; +const PROGRAM_COUNTER: &str = "pc"; +const CALLEE_SAVED_REGS: &[&str] = &[ + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "fp", +]; + +async fn get_caller_by_cfi

( + ctx: &ArmContext, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + trace!("trying cfi"); + + let _last_sp = ctx.get_register(STACK_POINTER, args.valid())?; + + let mut stack_walker = CfiStackWalker::from_ctx_and_args(ctx, args, callee_forwarded_regs)?; + + args.symbol_provider + .walk_frame(stack_walker.module, &mut stack_walker) + .await?; + + let caller_pc = stack_walker.caller_ctx.get_register_always(PROGRAM_COUNTER); + let caller_sp = stack_walker.caller_ctx.get_register_always(STACK_POINTER); + let new_valid = MinidumpContextValidity::Some(stack_walker.caller_validity); + + // Apply ptr auth stripping + let caller_pc = ptr_auth_strip(args.modules, caller_pc); + stack_walker + .caller_ctx + .set_register(PROGRAM_COUNTER, caller_pc); + // Nothing should really ever restore lr, but CFI is more magic so whatever sure + if let Some(lr) = stack_walker + .caller_ctx + .get_register(LINK_REGISTER, &new_valid) + { + stack_walker + .caller_ctx + .set_register(LINK_REGISTER, ptr_auth_strip(args.modules, lr)); + } + if let Some(fp) = stack_walker + .caller_ctx + .get_register(FRAME_POINTER, &new_valid) + { + stack_walker + .caller_ctx + .set_register(FRAME_POINTER, ptr_auth_strip(args.modules, fp)); + } + + trace!( + "cfi evaluation was successful -- caller_pc: 0x{:016x}, caller_sp: 0x{:016x}", + caller_pc, + caller_sp, + ); + + // Do absolutely NO validation! Yep! As long as CFI evaluation succeeds + // (which does include pc and sp resolving), just blindly assume the + // values are correct. I Don't Like This, but it's what breakpad does and + // we should start with a baseline of parity. + + // FIXME?: for whatever reason breakpad actually does block on the address + // being canonical *ONLY* for arm64, which actually rejects null pc early! + // Let's not do that to keep our code more uniform. + + let context = MinidumpContext { + raw: MinidumpRawContext::OldArm64(stack_walker.caller_ctx), + valid: new_valid, + }; + Some(StackFrame::from_context(context, FrameTrust::CallFrameInfo)) +} + +fn callee_forwarded_regs(valid: &MinidumpContextValidity) -> HashSet<&'static str> { + match valid { + MinidumpContextValidity::All => CALLEE_SAVED_REGS.iter().copied().collect(), + MinidumpContextValidity::Some(ref which) => CALLEE_SAVED_REGS + .iter() + .filter(|®| which.contains(reg)) + .copied() + .collect(), + } +} + +fn get_caller_by_frame_pointer

( + ctx: &ArmContext, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + trace!("trying frame pointer"); + // Ok so there exists 3 kinds of stackframes in ARM64: + // + // * stackless leaves + // * stackful leaves + // * normal frames + // + // + // # Normal Frames + // + // Let's start with normal frames. In the standard calling convention, the following happens: + // + // lr := return_address (performed implicitly by ARM's function call instruction) + // PUSH fp, lr (save fp and lr to the stack -- ARM64 pushes in pairs) + // fp := sp (update the frame pointer to the current stack pointer) + // + // So to restore the caller's registers, we have: + // + // pc := *(fp + ptr) (this will get the return address, usual offset caveats apply) + // sp := fp + ptr*2 + // fp := *fp + // + // Note that although we push lr, we don't restore lr. That's because lr is just our + // return address, and is therefore essentially a "saved" pc. lr is caller-saved *and* + // automatically overwritten by every CALL, so the callee (the frame we're unwinding right now) + // has no business ever knowing it, let alone restoring it. lr is generally just saved + // immediately and then used as a free general purpose register, and therefore will generally + // contain random garbage unrelated to unwinding. + // + // + // # Leaf Functions + // + // Now leaf functions are a bit messier. These are functions which don't call other functions + // and therefore don't actually ever need to save lr or fp. As such, they can be entirely + // stackless, although they don't have to be. So calling a leaf function is just: + // + // lr := return_address + // + // + // And to restore the caller's registers, we have: + // + // pc := lr + // sp := sp - + // fp := fp + // + // Unfortunately, we're unaware of any way to "detect" that a function is a leaf or not + // without symbols/cfi just telling you that. Since we're in frame pointer unwinding, + // we probably don't have those available! And even if we did, we still wouldn't know if + // the frame was stackless or not, so we wouldn't know how to restore sp reliably and might + // get the stack in a weird state for subsequent (possibly CFI-based) frames. + // Also, if we incorrectly guess a frame is a leaf, we'll also use a probably-random-garbage + // lr as a pc and potentially halluncinate a bunch. + // + // + // # Conclusion + // + // At the moment we think it's safest/best to just always assume we're unwinding a normal + // frame. Statistically this is true (most frames are, even if they happen to be at the + // top of the stack when we crash), and if the frame *is* a leaf then our `fp` is likely + // to be the correct fp of the next frame. This will effectively result in us unwinding + // our caller instead of ourselves, causing the caller to be omitted from the backtrace + // but otherwise perfectly syncing up for the rest of the frames. + let last_fp = ctx.get_register(FRAME_POINTER, args.valid())?; + let last_sp = ctx.get_register(STACK_POINTER, args.valid())?; + + if last_fp >= u64::MAX - POINTER_WIDTH * 2 { + // Although this code generally works fine if the pointer math overflows, + // debug builds will still panic, and this guard protects against it without + // drowning the rest of the code in checked_add. + return None; + } + + let (caller_fp, caller_pc, caller_sp) = if last_fp == 0 { + // In this case we want unwinding to stop. One of the termination conditions in get_caller_frame + // is that caller_sp <= last_sp. Therefore we can force termination by setting caller_sp = last_sp. + (0, 0, last_sp) + } else { + ( + args.stack_memory.get_memory_at_address(last_fp)?, + args.stack_memory + .get_memory_at_address(last_fp + POINTER_WIDTH)?, + last_fp + POINTER_WIDTH * 2, + ) + }; + let caller_fp = ptr_auth_strip(args.modules, caller_fp); + let caller_pc = ptr_auth_strip(args.modules, caller_pc); + + // Don't accept obviously wrong instruction pointers. + if is_non_canonical(caller_pc) { + trace!("rejecting frame pointer result for unreasonable instruction pointer"); + return None; + } + + // Don't actually validate that the stack makes sense (duplicating breakpad behaviour). + + trace!( + "frame pointer seems valid -- caller_pc: 0x{:016x}, caller_sp: 0x{:016x}", + caller_pc, + caller_sp, + ); + + let mut caller_ctx = ArmContext::default(); + caller_ctx.set_register(PROGRAM_COUNTER, caller_pc); + caller_ctx.set_register(FRAME_POINTER, caller_fp); + caller_ctx.set_register(STACK_POINTER, caller_sp); + + let mut valid = HashSet::new(); + valid.insert(PROGRAM_COUNTER); + valid.insert(FRAME_POINTER); + valid.insert(STACK_POINTER); + + let context = MinidumpContext { + raw: MinidumpRawContext::OldArm64(caller_ctx), + valid: MinidumpContextValidity::Some(valid), + }; + Some(StackFrame::from_context(context, FrameTrust::FramePointer)) +} + +fn ptr_auth_strip(modules: &MinidumpModuleList, ptr: Pointer) -> Pointer { + // ARMv8.3 introduced a code hardening system called "Pointer Authentication" + // which is used on Apple platforms. It adds some extra high bits to the + // several pointers when they get pushed to memory, including the return + // address (lr) and frame pointer (fp), which both get pushed at the start + // of most non-leaf functions. + // + // We lack some of the proper context to implement the "strip" primitive, because + // the amount of bits that are "real" pointer depends on various extensions like + // pointer tagging and how big page tables are. If we allocate too many bits to + // "real" then we can get ptr_auth bits in our pointers, and if we allocate too + // few we can end up truncating our pointers. Thankfully we'll usually have a bit + // of margin from pointers not having the highest real bits set. + // + // To help us guess, we have a few pieces of information: + // + // * Apple seems to default to a 17/47 split, so 47 bits for "real" is a good baseline + // * We know the address ranges of various loaded (and unloaded modules) + // * We know the address range of the stacks + // * We *can* know the address range of some sections of the heap (MemoryList) + // * We *can* know the page mappings (MemoryInfo) + // + // Right now we only incorporate the first two. Ideally we would process all those sources + // once at the start of stack walking and pass it down to the ARM stackwalker but that's + // a lot of annoying rewiring that won't necessarily improve results. + let apple_default_max_addr = (1 << 47) - 1; + let max_module_addr = modules + .by_addr() + .next_back() + .map(|last_module| { + last_module + .base_address() + .saturating_add(last_module.size()) + }) + .unwrap_or(0); + let max_addr = u64::max(apple_default_max_addr, max_module_addr); + + // We can convert a "highest" address into a suitable mask by getting the next_power_of_two + // (a single bit >= the max) and subtracting one from it (producing all 1's <= that bit). + // There are two corner cases to this: + // + // * the next_power_of_two being 2^65, in which case our mask should be !0 (all ones) + // * the max addr being a power of two already means we will actually lose that one value + // + // The first case is handled by using checked_next_power_of_two. The second case isn't really + // handled by it very improbable. We do however make sure the apple max isn't a power of two. + let mask = max_addr + .checked_next_power_of_two() + .map(|high_bit| high_bit - 1) + .unwrap_or(!0); + + // In principle, if we've done a good job of computing the mask, we can apply it regardless + // of if there's any ptr auth bits. Either it will clear the auth or be a noop. We don't + // check if this messes up, because there's too many subtleties like JITed code to reliably + // detect this going awry. + ptr & mask +} + +async fn get_caller_by_scan

( + ctx: &ArmContext, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + trace!("trying scan"); + // Stack scanning is just walking from the end of the frame until we encounter + // a value on the stack that looks like a pointer into some code (it's an address + // in a range covered by one of our modules). If we find such an instruction, + // we assume it's an pc value that was pushed by the CALL instruction that created + // the current frame. The next frame is then assumed to end just before that + // pc value. + let last_sp = ctx.get_register(STACK_POINTER, args.valid())?; + + // Number of pointer-sized values to scan through in our search. + let default_scan_range = 40; + let extended_scan_range = default_scan_range * 4; + + // Breakpad devs found that the first frame of an unwind can be really messed up, + // and therefore benefits from a longer scan. Let's do it too. + let scan_range = if let FrameTrust::Context = args.callee_frame.trust { + extended_scan_range + } else { + default_scan_range + }; + + for i in 0..scan_range { + let address_of_pc = last_sp.checked_add(i * POINTER_WIDTH)?; + let caller_pc = args.stack_memory.get_memory_at_address(address_of_pc)?; + if instruction_seems_valid(caller_pc, args.modules, args.symbol_provider).await { + // pc is pushed by CALL, so sp is just address_of_pc + ptr + let caller_sp = address_of_pc.checked_add(POINTER_WIDTH)?; + + // Don't do any more validation, and don't try to restore fp + // (that's what breakpad does!) + + trace!( + "scan seems valid -- caller_pc: 0x{:08x}, caller_sp: 0x{:08x}", + caller_pc, + caller_sp, + ); + + let mut caller_ctx = ArmContext::default(); + caller_ctx.set_register(PROGRAM_COUNTER, caller_pc); + caller_ctx.set_register(STACK_POINTER, caller_sp); + + let mut valid = HashSet::new(); + valid.insert(PROGRAM_COUNTER); + valid.insert(STACK_POINTER); + + let context = MinidumpContext { + raw: MinidumpRawContext::OldArm64(caller_ctx), + valid: MinidumpContextValidity::Some(valid), + }; + return Some(StackFrame::from_context(context, FrameTrust::Scan)); + } + } + + None +} + +/// The most strict validation we have for instruction pointers. +/// +/// This is only used for stack-scanning, because it's explicitly +/// trying to distinguish between total garbage and correct values. +/// cfi and frame_pointer approaches do not use this validation +/// because by default they're working with plausible/trustworthy +/// data. +/// +/// Specifically, not using this validation allows cfi/fp methods +/// to unwind through frames we don't have mapped modules for (such as +/// OS APIs). This may seem confusing since we obviously don't have cfi +/// for unmapped modules! +/// +/// The way this works is that we will use cfi to unwind some frame we +/// know about and *end up* in a function we know nothing about, but with +/// all the right register values. At this point, frame pointers will +/// often do the correct thing even though we don't know what code we're +/// in -- until we get back into code we do know about and cfi kicks back in. +/// At worst, this sets scanning up in a better position for success! +/// +/// If we applied this more rigorous validation to cfi/fp methods, we +/// would just discard the correct register values from the known frame +/// and immediately start doing unreliable scans. +async fn instruction_seems_valid

( + instruction: Pointer, + modules: &MinidumpModuleList, + symbol_provider: &P, +) -> bool +where + P: SymbolProvider + Sync, +{ + if is_non_canonical(instruction) || instruction == 0 { + return false; + } + + super::instruction_seems_valid_by_symbols(instruction, modules, symbol_provider).await +} + +fn is_non_canonical(instruction: Pointer) -> bool { + // Reject instructions in the first page or above the user-space threshold. + !(0x1000..=0x000fffffffffffff).contains(&instruction) +} + +/* +// ARM64 is currently hyper-permissive, so we don't use this, +// but here it is in case we change our minds! +fn stack_seems_valid( + caller_sp: Pointer, + callee_sp: Pointer, + stack_memory: UnifiedMemory<'_, '_>, +) -> bool { + // The stack shouldn't *grow* when we unwind + if caller_sp < callee_sp { + return false; + } + + // The stack pointer should be in the stack + stack_memory + .get_memory_at_address::(caller_sp as u64) + .is_some() +} +*/ + +pub async fn get_caller_frame

( + ctx: &ArmContext, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + // .await doesn't like closures, so don't use Option chaining + let mut frame = None; + if frame.is_none() { + frame = get_caller_by_cfi(ctx, args).await; + } + if frame.is_none() { + frame = get_caller_by_frame_pointer(ctx, args); + } + if frame.is_none() { + frame = get_caller_by_scan(ctx, args).await; + } + let mut frame = frame?; + + // We now check the frame to see if it looks like unwinding is complete, + // based on the frame we computed having a nonsense value. Returning + // None signals to the unwinder to stop unwinding. + + // if the instruction is within the first ~page of memory, it's basically + // null, and we can assume unwinding is complete. + if frame.context.get_instruction_pointer() < 4096 { + trace!("instruction pointer was nullish, assuming unwind complete"); + return None; + } + + // If the new stack pointer is at a lower address than the old, + // then that's clearly incorrect. Treat this as end-of-stack to + // enforce progress and avoid infinite loops. + + let sp = frame.context.get_stack_pointer(); + let last_sp = ctx.get_register_always("sp"); + if sp <= last_sp { + // Arm leaf functions may not actually touch the stack (thanks + // to the link register allowing you to "push" the return address + // to a register), so we need to permit the stack pointer to not + // change for the first frame of the unwind. After that we need + // more strict validation to avoid infinite loops. + let is_leaf = args.callee_frame.trust == FrameTrust::Context && sp == last_sp; + if !is_leaf { + trace!("stack pointer went backwards, assuming unwind complete"); + return None; + } + } + + // Ok, the frame now seems well and truly valid, do final cleanup. + + // A caller's ip is the return address, which is the instruction + // *after* the CALL that caused us to arrive at the callee. Set + // the value to 4 less than that, so it points to the CALL instruction + // (arm64 instructions are all 4 bytes wide). This is important because + // we use this value to lookup the CFI we need to unwind the next frame. + let ip = frame.context.get_instruction_pointer(); + frame.instruction = ip - 4; + + Some(frame) +} diff --git a/third_party/rust/minidump-unwind/src/arm64_unittest.rs b/third_party/rust/minidump-unwind/src/arm64_unittest.rs new file mode 100644 index 000000000000..2bb7518333f3 --- /dev/null +++ b/third_party/rust/minidump-unwind/src/arm64_unittest.rs @@ -0,0 +1,1359 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +// NOTE: we don't bother testing arm64_old, it should have identical code at +// all times! + +use crate::*; +use minidump::system_info::{Cpu, Os}; +use std::collections::HashMap; +use test_assembler::*; + +type Context = minidump::format::CONTEXT_ARM64; + +struct TestFixture { + pub raw: Context, + pub modules: MinidumpModuleList, + pub symbols: HashMap, +} + +impl TestFixture { + pub fn new() -> TestFixture { + TestFixture { + raw: Context::default(), + // Give the two modules reasonable standard locations and names + // for tests to play with. + modules: MinidumpModuleList::from_modules(vec![ + MinidumpModule::new(0x40000000, 0x10000, "module1"), + MinidumpModule::new(0x50000000, 0x10000, "module2"), + ]), + symbols: HashMap::new(), + } + } + + pub fn high_module() -> TestFixture { + TestFixture { + raw: Context::default(), + // Same as new but with a really high module to stretch ptr auth stripping + modules: MinidumpModuleList::from_modules(vec![ + MinidumpModule::new(0x40000000, 0x10000, "module1"), + MinidumpModule::new(0x50000000, 0x10000, "module2"), + MinidumpModule::new(0x10000000000000, 0x10000, "high-module"), + ]), + symbols: HashMap::new(), + } + } + + pub fn highest_module() -> TestFixture { + TestFixture { + raw: Context::default(), + // Same as new but with a module so high it sets the maximum address bit + // effectively disabling stripping + modules: MinidumpModuleList::from_modules(vec![ + MinidumpModule::new(0x40000000, 0x10000, "module1"), + MinidumpModule::new(0x50000000, 0x10000, "module2"), + MinidumpModule::new(0xa000_0000_0000_0000, 0x10000, "highest-module"), + ]), + symbols: HashMap::new(), + } + } + + pub async fn walk_stack(&self, stack: Section) -> CallStack { + let context = MinidumpContext { + raw: MinidumpRawContext::Arm64(self.raw.clone()), + valid: MinidumpContextValidity::All, + }; + let base = stack.start().value().unwrap(); + let size = stack.size(); + let stack = stack.get_contents().unwrap(); + let stack_memory = MinidumpMemory { + desc: Default::default(), + base_address: base, + size, + bytes: &stack, + endian: scroll::LE, + }; + let system_info = SystemInfo { + os: Os::Windows, + os_version: None, + os_build: None, + cpu: Cpu::Arm64, + cpu_info: None, + cpu_microcode_version: None, + cpu_count: 1, + }; + let symbolizer = Symbolizer::new(string_symbol_supplier(self.symbols.clone())); + let mut stack = CallStack::with_context(context); + + walk_stack( + 0, + (), + &mut stack, + Some(UnifiedMemory::Memory(&stack_memory)), + &self.modules, + &system_info, + &symbolizer, + ) + .await; + + stack + } + + pub fn add_symbols(&mut self, name: String, symbols: String) { + self.symbols.insert(name, symbols); + } +} + +#[tokio::test] +async fn test_simple() { + let mut f = TestFixture::new(); + let stack = Section::new(); + stack.start().set_const(0x80000000); + // There should be no references to the stack in this walk: we don't + // provide any call frame information, so trying to reconstruct the + // context frame's caller should fail. So there's no need for us to + // provide stack contents. + f.raw.set_register("pc", 0x4000c020); + f.raw.set_register("fp", 0x80000000); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 1); + let f = &s.frames[0]; + let m = f.module.as_ref().unwrap(); + assert_eq!(m.code_file(), "module1"); +} + +#[tokio::test] +async fn test_scan_without_symbols() { + // Scanning should work without any symbols + let mut f = TestFixture::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + + let return_address1 = 0x50000100u64; + let return_address2 = 0x50000900u64; + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + + stack = stack + // frame 0 + .append_repeated(0, 16) // space + .D64(0x40090000) // junk that's not + .D64(0x60000000) // a return address + .D64(return_address1) // actual return address + // frame 1 + .mark(&frame1_sp) + .append_repeated(0, 16) // space + .D64(0xF0000000) // more junk + .D64(0x0000000D) + .D64(return_address2) // actual return address + // frame 2 + .mark(&frame2_sp) + .append_repeated(0, 64); // end of stack + + f.raw.set_register("pc", 0x40005510); + f.raw.set_register("sp", stack.start().value().unwrap()); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 3); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 2); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm64(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address1); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame1_sp.value().unwrap() + ); + } else { + unreachable!(); + } + } + + { + // Frame 2 + let frame = &s.frames[2]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 2); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm64(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address2); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame2_sp.value().unwrap() + ); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_scan_with_symbols() { + // Test that we can refine our scanning using symbols. Specifically we + // should be able to reject pointers that are in modules but don't map to + // any FUNC/PUBLIC record. + let mut f = TestFixture::new(); + let mut stack = Section::new(); + let stack_start = 0x80000000; + stack.start().set_const(stack_start); + + let return_address = 0x50000200; + + let frame1_sp = Label::new(); + stack = stack + // frame 0 + .append_repeated(0, 16) // space + .D64(0x40090000) // junk that's not + .D64(0x60000000) // a return address + .D64(0x40001000) // a couple of plausible addresses + .D64(0x5000F000) // that are not within functions + .D64(return_address) // actual return address + // frame 1 + .mark(&frame1_sp) + .append_repeated(0, 64); // end of stack + + f.raw.set_register("pc", 0x40000200); + f.raw.set_register("sp", stack.start().value().unwrap()); + + f.add_symbols( + String::from("module1"), + // The youngest frame's function. + String::from("FUNC 100 400 10 monotreme\n"), + ); + f.add_symbols( + String::from("module2"), + // The calling frame's function. + String::from("FUNC 100 400 10 marsupial\n"), + ); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 2); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm64(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame1_sp.value().unwrap() + ); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_scan_first_frame() { + // The first (context) frame gets extra long scans, this test checks that. + let mut f = TestFixture::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + + let return_address1 = 0x50000100u64; + let return_address2 = 0x50000900u64; + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + + stack = stack + // frame 0 + .append_repeated(0, 16) // space + .D64(0x40090000) // junk that's not + .D64(0x60000000) // a return address + .append_repeated(0, 96) // more space + .D64(return_address1) // actual return address + // frame 1 + .mark(&frame1_sp) + .append_repeated(0, 32) // space + .D64(0xF0000000) // more junk + .D64(0x0000000D) + .append_repeated(0, 336) // more space + .D64(return_address2) // actual return address (won't be found) + // frame 2 + .mark(&frame2_sp) + .append_repeated(0, 64); // end of stack + + f.raw.set_register("pc", 0x40005510); + f.raw.set_register("sp", stack.start().value().unwrap()); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 2); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm64(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address1); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame1_sp.value().unwrap() + ); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_frame_pointer() { + // Frame-pointer-based unwinding + let mut f = TestFixture::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + + let return_address1 = 0x50000100u64; + let return_address2 = 0x50000900u64; + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + let frame0_fp = Label::new(); + let frame1_fp = Label::new(); + let frame2_fp = Label::new(); + + stack = stack + // frame 0 + .append_repeated(0, 64) // space + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address + .mark(&frame0_fp) // next fp will point to the next value + .D64(&frame1_fp) // save current frame pointer + .D64(return_address1) // save current link register + .mark(&frame1_sp) + // frame 1 + .append_repeated(0, 64) // space + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address + .mark(&frame1_fp) + .D64(&frame2_fp) + .D64(return_address2) + .mark(&frame2_sp) + // frame 2 + .append_repeated(0, 64) // Whatever values on the stack. + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address. + .mark(&frame2_fp) // next fp will point to the next value + .D64(0) + .D64(0); + + f.raw.set_register("pc", 0x40005510); + f.raw.set_register("lr", 0x1fe0fe10); + f.raw.set_register("fp", frame0_fp.value().unwrap()); + f.raw.set_register("sp", stack.start().value().unwrap()); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 3); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm64(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address1); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame1_sp.value().unwrap() + ); + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame1_fp.value().unwrap() + ); + } else { + unreachable!(); + } + } + + { + // Frame 2 + let frame = &s.frames[2]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm64(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address2); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame2_sp.value().unwrap() + ); + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame2_fp.value().unwrap() + ); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_frame_pointer_stackless_leaf() { + // Same as test_frame_pointer but frame0 is a stackless leaf. + // + // In the current implementation we will misunderstand this slightly + // and basically "lose" frame 1, but still properly recover frame 2. + // THIS TEST BREAKING MIGHT MEAN YOU'VE MADE THINGS WORK BETTER! + let mut f = TestFixture::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + + let return_address1 = 0x50000100u64; + let return_address2 = 0x50000900u64; + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + let frame1_fp = Label::new(); + let frame2_fp = Label::new(); + + stack = stack + // frame 0 (all junk!) + .append_repeated(0, 64) // space + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address + .mark(&frame1_sp) + // frame 1 (this is sadly dropped) + .append_repeated(0, 64) // space + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address + .mark(&frame1_fp) + .D64(&frame2_fp) + .D64(return_address2) + .mark(&frame2_sp) + // frame 2 + .append_repeated(0, 64) // Whatever values on the stack. + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address. + .mark(&frame2_fp) // next fp will point to the next value + .D64(0) + .D64(0); + + f.raw.set_register("pc", 0x40005510); + f.raw.set_register("lr", return_address1); // we will sadly ignore this + f.raw.set_register("fp", frame1_fp.value().unwrap()); + f.raw.set_register("sp", stack.start().value().unwrap()); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 2 (found as Frame 1) + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm64(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address2); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame2_sp.value().unwrap() + ); + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame2_fp.value().unwrap() + ); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_frame_pointer_stackful_leaf() { + // Same as test_frame_pointer but frame0 is a stackful leaf. + // + // In the current implementation we will misunderstand this slightly + // and basically "lose" frame 1, but still properly recover frame 2. + // THIS TEST BREAKING MIGHT MEAN YOU'VE MADE THINGS WORK BETTER! + let mut f = TestFixture::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + + let return_address1 = 0x50000100u64; + let return_address2 = 0x50000900u64; + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + let frame1_fp = Label::new(); + let frame2_fp = Label::new(); + + stack = stack + // frame 0 (literally nothing!) + .mark(&frame1_sp) + // frame 1 (this is sadly dropped) + .append_repeated(0, 64) // space + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address + .mark(&frame1_fp) + .D64(&frame2_fp) + .D64(return_address2) + .mark(&frame2_sp) + // frame 2 + .append_repeated(0, 64) // Whatever values on the stack. + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address. + .mark(&frame2_fp) // next fp will point to the next value + .D64(0) + .D64(0); + + f.raw.set_register("pc", 0x40005510); + f.raw.set_register("lr", return_address1); // we will sadly ignore this + f.raw.set_register("fp", frame1_fp.value().unwrap()); + f.raw.set_register("sp", stack.start().value().unwrap()); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 2 (found as Frame 1) + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm64(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address2); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame2_sp.value().unwrap() + ); + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame2_fp.value().unwrap() + ); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_frame_pointer_ptr_auth_strip() { + // Same as the basic frame pointer test but extra high bits have been set which + // must be masked out. This is vaguely emulating Arm Pointer Authentication, + // although very synthetically. This might break if we implement more accurate + // stripping. But at that point we should have a better understanding of how + // to make an "accurate" test! + let mut f = TestFixture::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + + let return_address1 = 0x50000100u64; + let return_address2 = 0x50000900u64; + let authenticated_return_address1 = return_address1 | 0x0013_8000_0000_0000; + let authenticated_return_address2 = return_address2 | 0x1110_0000_0000_0000; + + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + let frame0_fp = Label::new(); + let frame1_fp = Label::new(); + let frame2_fp = Label::new(); + let authenticated_frame1_fp = Label::new(); + let authenticated_frame2_fp = Label::new(); + + stack = stack + // frame 0 + .append_repeated(0, 64) // space + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address + .mark(&frame0_fp) // next fp will point to the next value + .D64(&authenticated_frame1_fp) // save current frame pointer + .D64(authenticated_return_address1) // save current link register + .mark(&frame1_sp) + // frame 1 + .append_repeated(0, 64) // space + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address + .mark(&frame1_fp) + .D64(&authenticated_frame2_fp) + .D64(authenticated_return_address2) + .mark(&frame2_sp) + // frame 2 + .append_repeated(0, 64) // Whatever values on the stack. + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address. + .mark(&frame2_fp) // next fp will point to the next value + .D64(0) + .D64(0); + + authenticated_frame1_fp.set_const(frame1_fp.value().unwrap() | 0xa310_0000_0000_0000); + authenticated_frame2_fp.set_const(frame2_fp.value().unwrap() | 0xf31e_8000_0000_0000); + + f.raw.set_register("pc", 0x40005510); + f.raw.set_register("lr", 0x1fe0fe10); + f.raw.set_register("fp", frame0_fp.value().unwrap()); + f.raw.set_register("sp", stack.start().value().unwrap()); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 3); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm64(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address1); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame1_sp.value().unwrap() + ); + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame1_fp.value().unwrap() + ); + } else { + unreachable!(); + } + } + + { + // Frame 2 + let frame = &s.frames[2]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm64(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address2); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame2_sp.value().unwrap() + ); + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame2_fp.value().unwrap() + ); + } else { + unreachable!(); + } + } +} + +const CALLEE_SAVE_REGS: &[&str] = &[ + "pc", "sp", "fp", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", +]; + +fn init_cfi_state_high_module() -> (TestFixture, Section, Context, MinidumpContextValidity) { + init_cfi_state_common(TestFixture::high_module()) +} + +fn init_cfi_state() -> (TestFixture, Section, Context, MinidumpContextValidity) { + init_cfi_state_common(TestFixture::new()) +} + +fn init_cfi_state_common( + mut f: TestFixture, +) -> (TestFixture, Section, Context, MinidumpContextValidity) { + let symbols = [ + // The youngest frame's function. + "FUNC 4000 1000 10 enchiridion\n", + // Initially, nothing has been pushed on the stack, + // and the return address is still in the link + // register (x30). + "STACK CFI INIT 4000 100 .cfa: sp 0 + .ra: x30\n", + // Push x19, x20, the frame pointer and the link register. + "STACK CFI 4001 .cfa: sp 32 + .ra: .cfa -8 + ^", + " x19: .cfa -32 + ^ x20: .cfa -24 + ^ ", + " x29: .cfa -16 + ^\n", + // Save x19..x22 in x0..x3: verify that we populate + // the youngest frame with all the values we have. + "STACK CFI 4002 x19: x0 x20: x1 x21: x2 x22: x3\n", + // Restore x19..x22. Save the non-callee-saves register x1. + "STACK CFI 4003 .cfa: sp 40 + x1: .cfa 40 - ^", + " x19: x19 x20: x20 x21: x21 x22: x22\n", + // Move the .cfa back eight bytes, to point at the return + // address, and restore the sp explicitly. + "STACK CFI 4005 .cfa: sp 32 + x1: .cfa 32 - ^", + " x29: .cfa 8 - ^ .ra: .cfa ^ sp: .cfa 8 +\n", + // Recover the PC explicitly from a new stack slot; + // provide garbage for the .ra. + "STACK CFI 4006 .cfa: sp 40 + pc: .cfa 40 - ^\n", + // The calling function. + "FUNC 5000 1000 10 epictetus\n", + // Mark it as end of stack. + "STACK CFI INIT 5000 1000 .cfa: 0 .ra: 0\n", + // A function whose CFI makes the stack pointer + // go backwards. + "FUNC 6000 1000 20 palinal\n", + "STACK CFI INIT 6000 1000 .cfa: sp 8 - .ra: x30\n", + // A function with CFI expressions that can't be + // evaluated. + "FUNC 7000 1000 20 rhetorical\n", + "STACK CFI INIT 7000 1000 .cfa: moot .ra: ambiguous\n", + ]; + f.add_symbols(String::from("module1"), symbols.concat()); + + f.raw.set_register("pc", 0x0000_0000_4000_5510); + f.raw.set_register("sp", 0x0000_0000_8000_0000); + f.raw.set_register("fp", 0x0000_00a2_8112_e110); + f.raw.set_register("x19", 0x5e68b5d5b5d55e68); + f.raw.set_register("x20", 0x34f3ebd1ebd134f3); + f.raw.set_register("x21", 0x74bca31ea31e74bc); + f.raw.set_register("x22", 0x16b32dcb2dcb16b3); + f.raw.set_register("x23", 0x21372ada2ada2137); + f.raw.set_register("x24", 0x557dbbbbbbbb557d); + f.raw.set_register("x25", 0x8ca748bf48bf8ca7); + f.raw.set_register("x26", 0x21f0ab46ab4621f0); + f.raw.set_register("x27", 0x146732b732b71467); + f.raw.set_register("x28", 0xa673645fa673645f); + + let raw_valid = MinidumpContextValidity::All; + + let expected = f.raw.clone(); + let expected_regs = CALLEE_SAVE_REGS; + let expected_valid = MinidumpContextValidity::Some(expected_regs.iter().copied().collect()); + + let stack = Section::new(); + stack + .start() + .set_const(f.raw.get_register("sp", &raw_valid).unwrap()); + + (f, stack, expected, expected_valid) +} + +async fn check_cfi( + f: TestFixture, + stack: Section, + expected: Context, + expected_valid: MinidumpContextValidity, +) { + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 + if let MinidumpContextValidity::Some(ref expected_regs) = expected_valid { + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::CallFrameInfo); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), expected_regs.len()); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm64(ctx) = &frame.context.raw { + for reg in expected_regs { + assert_eq!( + ctx.get_register(reg, valid), + expected.get_register(reg, &expected_valid), + "{reg} registers didn't match!" + ); + } + return; + } else { + unreachable!() + } + } + } + unreachable!(); +} + +#[tokio::test] +async fn test_cfi_at_4000() { + let (mut f, mut stack, expected, expected_valid) = init_cfi_state(); + + stack = stack.append_repeated(0, 120); + + f.raw.set_register("pc", 0x0000000040004000); + f.raw.set_register("lr", 0x0000000040005510); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4001() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_sp = Label::new(); + stack = stack + .D64(0x5e68b5d5b5d55e68) // saved x19 + .D64(0x34f3ebd1ebd134f3) // saved x20 + .D64(0x0000_00a2_8112_e110) // saved fp + .D64(0x0000_0000_4000_5510) // return address + .mark(&frame1_sp) + .append_repeated(0, 120); + + expected.set_register("sp", frame1_sp.value().unwrap()); + f.raw.set_register("pc", 0x0000000040004001); + f.raw.set_register("x19", 0xadc9f635a635adc9); + f.raw.set_register("x20", 0x623135ac35ac6231); + f.raw.set_register("fp", 0x5fc4be14be145fc4); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4002() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_sp = Label::new(); + stack = stack + .D64(0xff3dfb81fb81ff3d) // no longer saved x19 + .D64(0x34f3ebd1ebd134f3) // no longer saved x20 + .D64(0x0000_00a2_8112_e110) // saved fp + .D64(0x0000_0000_4000_5510) // return address + .mark(&frame1_sp) + .append_repeated(0, 120); + + expected.set_register("sp", frame1_sp.value().unwrap()); + f.raw.set_register("pc", 0x0000000040004002); + f.raw.iregs[0] = 0x5e68b5d5b5d55e68; // saved x19 + f.raw.iregs[1] = 0x34f3ebd1ebd134f3; // saved x20 + f.raw.iregs[2] = 0x74bca31ea31e74bc; // saved x21 + f.raw.iregs[3] = 0x16b32dcb2dcb16b3; // saved x22 + f.raw.iregs[19] = 0xadc9f635a635adc9; // distinct callee x19 + f.raw.iregs[20] = 0x623135ac35ac6231; // distinct callee x20 + f.raw.iregs[21] = 0xac4543564356ac45; // distinct callee x21 + f.raw.iregs[22] = 0x2561562f562f2561; // distinct callee x22 + f.raw.set_register("fp", 0x5fc4be14be145fc4); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4003() { + let (mut f, mut stack, mut expected, mut expected_valid) = init_cfi_state(); + + let frame1_sp = Label::new(); + stack = stack + .D64(0xdd5a48c848c8dd5a) // saved x1 (even though it's not callee-saves) + .D64(0xff3dfb81fb81ff3d) // no longer saved x19 + .D64(0x34f3ebd1ebd134f3) // no longer saved x20 + .D64(0x0000_00a2_8112_e110) // saved fp + .D64(0x0000_0000_4000_5510) // return address + .mark(&frame1_sp) + .append_repeated(0, 120); + + expected.set_register("sp", frame1_sp.value().unwrap()); + expected.iregs[1] = 0xdd5a48c848c8dd5a; + if let MinidumpContextValidity::Some(ref mut which) = expected_valid { + which.insert("x1"); + } else { + unreachable!(); + } + + f.raw.set_register("pc", 0x0000000040004003); + f.raw.iregs[1] = 0xfb756319fb756319; + f.raw.set_register("fp", 0x5fc4be14be145fc4); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4004() { + // Should just be the same as 4003 + + let (mut f, mut stack, mut expected, mut expected_valid) = init_cfi_state(); + + let frame1_sp = Label::new(); + stack = stack + .D64(0xdd5a48c848c8dd5a) // saved x1 (even though it's not callee-saves) + .D64(0xff3dfb81fb81ff3d) // no longer saved x19 + .D64(0x34f3ebd1ebd134f3) // no longer saved x20 + .D64(0x0000_00a2_8112_e110) // saved fp + .D64(0x0000_0000_4000_5510) // return address + .mark(&frame1_sp) + .append_repeated(0, 120); + + expected.set_register("sp", frame1_sp.value().unwrap()); + expected.iregs[1] = 0xdd5a48c848c8dd5a; + if let MinidumpContextValidity::Some(ref mut which) = expected_valid { + which.insert("x1"); + } else { + unreachable!(); + } + + f.raw.set_register("pc", 0x0000000040004004); + f.raw.iregs[1] = 0xfb756319fb756319; + f.raw.set_register("fp", 0x5fc4be14be145fc4); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4005_ptr_auth_strip_apple() { + // This is the same as the normal 4005 test but with extra garabage (auth) bits + // set in the high 24 bits. This emulates what apple platforms looks like. + + let (mut f, mut stack, mut expected, mut expected_valid) = init_cfi_state(); + + let frame1_sp = Label::new(); + stack = stack + .D64(0xdd5a48c848c8dd5a) // saved x1 (even though it's not callee-saves) + .D64(0xff3dfb81fb81ff3d) // no longer saved x19 + .D64(0x34f3ebd1ebd134f3) // no longer saved x20 + .D64(0xae23_80a2_8112_e110) // saved fp WITH AUTH + .D64(0xae1d_0000_4000_5510) // return address WITH AUTH + .mark(&frame1_sp) + .append_repeated(0, 120); + + expected.set_register("sp", frame1_sp.value().unwrap()); + expected.iregs[1] = 0xdd5a48c848c8dd5a; + if let MinidumpContextValidity::Some(ref mut which) = expected_valid { + which.insert("x1"); + } else { + unreachable!(); + } + + f.raw.set_register("pc", 0x0000000040004005); + f.raw.iregs[1] = 0xfb756319fb756319; + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4005_ptr_auth_strip_high() { + // This is the same as the normal 4005 test but with extra garabage (auth) bits + // set in the **extra** high bits. This emulates what android platforms look like. + + let (mut f, mut stack, mut expected, mut expected_valid) = init_cfi_state_high_module(); + + let frame1_sp = Label::new(); + stack = stack + .D64(0xdd5a48c848c8dd5a) // saved x1 (even though it's not callee-saves) + .D64(0xff3dfb81fb81ff3d) // no longer saved x19 + .D64(0x34f3ebd1ebd134f3) // no longer saved x20 + .D64(0x1003_45a2_8112_e110) // saved fp WITH AUTH + .D64(0x100d_f700_4000_5510) // return address WITH AUTH + .mark(&frame1_sp) + .append_repeated(0, 120); + + expected.set_register("sp", frame1_sp.value().unwrap()); + expected.set_register("fp", 0x0003_45a2_8112_e110); + expected.set_register("pc", 0x000d_f700_4000_5510); + expected.iregs[1] = 0xdd5a48c848c8dd5a; + if let MinidumpContextValidity::Some(ref mut which) = expected_valid { + which.insert("x1"); + } else { + unreachable!(); + } + + f.raw.set_register("pc", 0x0000000040004005); + f.raw.iregs[1] = 0xfb756319fb756319; + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4005() { + // Here we move the .cfa, but provide an explicit rule to recover the SP, + // so again there should be no change in the registers recovered. + + let (mut f, mut stack, mut expected, mut expected_valid) = init_cfi_state(); + + let frame1_sp = Label::new(); + stack = stack + .D64(0xdd5a48c848c8dd5a) // saved x1 (even though it's not callee-saves) + .D64(0xff3dfb81fb81ff3d) // no longer saved x19 + .D64(0x34f3ebd1ebd134f3) // no longer saved x20 + .D64(0x0000_00a2_8112_e110) // saved fp + .D64(0x0000_0000_4000_5510) // return address + .mark(&frame1_sp) + .append_repeated(0, 120); + + expected.set_register("sp", frame1_sp.value().unwrap()); + expected.iregs[1] = 0xdd5a48c848c8dd5a; + if let MinidumpContextValidity::Some(ref mut which) = expected_valid { + which.insert("x1"); + } else { + unreachable!(); + } + + f.raw.set_register("pc", 0x0000000040004005); + f.raw.iregs[1] = 0xfb756319fb756319; + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4006() { + // Here we provide an explicit rule for the PC, and have the saved .ra be + // bogus. + + let (mut f, mut stack, mut expected, mut expected_valid) = init_cfi_state(); + + let frame1_sp = Label::new(); + stack = stack + .D64(0x0000000040005510) // saved pc + .D64(0xdd5a48c848c8dd5a) // saved x1 (even though it's not callee-saves) + .D64(0xff3dfb81fb81ff3d) // no longer saved x19 + .D64(0x34f3ebd1ebd134f3) // no longer saved x20 + .D64(0x0000_00a2_8112_e110) // saved fp + .D64(0xf8d157835783f8d1) // .ra rule recovers this, which is garbage + .mark(&frame1_sp) + .append_repeated(0, 120); + + expected.set_register("sp", frame1_sp.value().unwrap()); + expected.iregs[1] = 0xdd5a48c848c8dd5a; + if let MinidumpContextValidity::Some(ref mut which) = expected_valid { + which.insert("x1"); + } else { + unreachable!(); + } + + f.raw.set_register("pc", 0x0000000040004006); + f.raw.iregs[1] = 0xfb756319fb756319; + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_reject_backwards() { + // Check that we reject rules that would cause the stack pointer to + // move in the wrong direction. + + let (mut f, mut stack, _expected, _expected_valid) = init_cfi_state(); + + stack = stack.append_repeated(0, 120); + + f.raw.set_register("pc", 0x0000000040006000); + f.raw.set_register("sp", 0x0000000080000000); + f.raw.set_register("lr", 0x0000000040005510); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 1); +} + +#[tokio::test] +async fn test_cfi_reject_bad_exprs() { + // Check that we reject rules whose expressions' evaluation fails. + + let (mut f, mut stack, _expected, _expected_valid) = init_cfi_state(); + + stack = stack.append_repeated(0, 120); + + f.raw.set_register("pc", 0x0000000040007000); + f.raw.set_register("sp", 0x0000000080000000); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 1); +} + +#[tokio::test] +async fn test_frame_pointer_overflow() { + // Make sure we don't explode when trying frame pointer analysis on a value + // that will overflow. + + type Pointer = u64; + let stack_max: Pointer = Pointer::MAX; + let stack_size: Pointer = 1000; + let bad_frame_ptr: Pointer = stack_max; + + let mut f = TestFixture::new(); + let mut stack = Section::new(); + let stack_start: Pointer = stack_max - stack_size; + stack.start().set_const(stack_start); + + stack = stack + // frame 0 + .append_repeated(0, stack_size as usize); // junk, not important to the test + + f.raw.set_register("pc", 0x00007400c0000200); + f.raw.set_register("fp", bad_frame_ptr); + f.raw + .set_register("sp", stack.start().value().unwrap() as Pointer); + f.raw.set_register("lr", 0x00007500b0000110); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 1); + + // As long as we don't panic, we're good! +} + +#[tokio::test] +async fn test_frame_pointer_barely_no_overflow() { + // This is a simple frame pointer test but with the all the values pushed + // as close to the upper memory boundary as possible, to confirm that + // our code doesn't randomly overflow *AND* isn't overzealous in + // its overflow guards. + + // We set the highest module here to bypass ptr auth stripping entirely and stress overflows + let mut f = TestFixture::highest_module(); + + let mut stack = Section::new(); + + type Pointer = u64; + let stack_max: Pointer = Pointer::MAX; + let pointer_size: Pointer = std::mem::size_of::() as Pointer; + let stack_size: Pointer = pointer_size * 3; + + let stack_start: Pointer = stack_max - stack_size; + let return_address: Pointer = 0x00007500b0000110; + stack.start().set_const(stack_start); + + let frame0_fp = Label::new(); + let frame1_sp = Label::new(); + let frame1_fp = Label::new(); + + stack = stack + // frame 0 + .mark(&frame0_fp) + .D64(&frame1_fp) // + .D64(return_address) // actual return address + // frame 1 + .mark(&frame1_sp) + .mark(&frame1_fp) // end of stack + .D64(0); + + f.raw.set_register("pc", 0x00007400c0000200); + f.raw + .set_register("fp", frame0_fp.value().unwrap() as Pointer); + f.raw + .set_register("sp", stack.start().value().unwrap() as Pointer); + f.raw.set_register("lr", return_address); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + + if let MinidumpRawContext::Arm64(ctx) = &frame.context.raw { + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame0_fp.value().unwrap() as Pointer + ); + } else { + unreachable!(); + } + } + + { + // Frame 1 + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm64(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame1_sp.value().unwrap() as Pointer + ); + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame1_fp.value().unwrap() as Pointer + ); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_frame_pointer_infinite_equality() { + // Leaf functions on Arm are allowed to not update the stack pointer, so + // it's valid for the frame pointer analysis to conclude that the stack + // pointer doesn't change. However we must only provide this allowance + // to the first stack frame, or else we're vulnerable to infinite loops. + // + // One of the CFI tests already checks that we allow the leaf case to work, + // so here we test that we don't get stuck in an infinite loop for the + // non-leaf case. + // + // This is just a copy-paste of test_frame_pointer except for the line + // "EVIL INFINITE FRAME POINTER" has been changed from frame2_fp to frame1_fp. + let mut f = TestFixture::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + + let return_address1 = 0x50000100u64; + let return_address2 = 0x50000900u64; + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + let frame0_fp = Label::new(); + let frame1_fp = Label::new(); + let frame2_fp = Label::new(); + + stack = stack + // frame 0 + .append_repeated(0, 64) // space + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address + .mark(&frame0_fp) // next fp will point to the next value + .D64(&frame0_fp) // EVIL INFINITE FRAME POINTER + .D64(return_address1) // save current link register + .mark(&frame1_sp) + // frame 1 + .append_repeated(0, 64) // space + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address + .mark(&frame1_fp) + .D64(&frame2_fp) + .D64(return_address2) + .mark(&frame2_sp) + // frame 2 + .append_repeated(0, 64) // Whatever values on the stack. + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address. + .mark(&frame2_fp) // next fp will point to the next value + .D64(0) + .D64(0); + + f.raw.set_register("pc", 0x40005510); + f.raw.set_register("lr", 0x1fe0fe10); + f.raw.set_register("fp", frame0_fp.value().unwrap()); + f.raw.set_register("sp", stack.start().value().unwrap()); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 (a messed up hybrid of frame0 and frame1) + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm64(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address1); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame1_sp.value().unwrap() + ); + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame0_fp.value().unwrap() + ); + } else { + unreachable!(); + } + } + + // Never get to frame 2, alas! +} diff --git a/third_party/rust/minidump-unwind/src/arm_unittest.rs b/third_party/rust/minidump-unwind/src/arm_unittest.rs new file mode 100644 index 000000000000..66f64d0c4bf7 --- /dev/null +++ b/third_party/rust/minidump-unwind/src/arm_unittest.rs @@ -0,0 +1,1184 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +use crate::*; +use minidump::format::CONTEXT_ARM; +use minidump::system_info::{Cpu, Os}; +use std::collections::HashMap; +use test_assembler::*; + +struct TestFixture { + pub raw: CONTEXT_ARM, + pub modules: MinidumpModuleList, + pub system_info: SystemInfo, + pub symbols: HashMap, +} + +impl TestFixture { + pub fn new() -> TestFixture { + TestFixture { + raw: CONTEXT_ARM::default(), + // Give the two modules reasonable standard locations and names + // for tests to play with. + modules: MinidumpModuleList::from_modules(vec![ + MinidumpModule::new(0x40000000, 0x10000, "module1"), + MinidumpModule::new(0x50000000, 0x10000, "module2"), + ]), + system_info: SystemInfo { + os: Os::Ios, + os_version: None, + os_build: None, + cpu: Cpu::Arm, + cpu_info: None, + cpu_microcode_version: None, + cpu_count: 1, + }, + symbols: HashMap::new(), + } + } + + pub async fn walk_stack(&self, stack: Section) -> CallStack { + let context = MinidumpContext { + raw: MinidumpRawContext::Arm(self.raw.clone()), + valid: MinidumpContextValidity::All, + }; + let base = stack.start().value().unwrap(); + let size = stack.size(); + let stack = stack.get_contents().unwrap(); + let stack_memory = MinidumpMemory { + desc: Default::default(), + base_address: base, + size, + bytes: &stack, + endian: scroll::LE, + }; + let symbolizer = Symbolizer::new(string_symbol_supplier(self.symbols.clone())); + let mut stack = CallStack::with_context(context); + + walk_stack( + 0, + (), + &mut stack, + Some(UnifiedMemory::Memory(&stack_memory)), + &self.modules, + &self.system_info, + &symbolizer, + ) + .await; + + stack + } + + pub fn add_symbols(&mut self, name: String, symbols: String) { + self.symbols.insert(name, symbols); + } +} + +#[tokio::test] +async fn test_simple() { + let mut f = TestFixture::new(); + let stack = Section::new(); + stack.start().set_const(0x80000000); + // There should be no references to the stack in this walk: we don't + // provide any call frame information, so trying to reconstruct the + // context frame's caller should fail. So there's no need for us to + // provide stack contents. + f.raw.set_register("pc", 0x4000c020); + f.raw.set_register("fp", 0x80000000); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 1); + let f = &s.frames[0]; + let m = f.module.as_ref().unwrap(); + assert_eq!(m.code_file(), "module1"); +} + +#[tokio::test] +async fn test_scan_without_symbols() { + // Scanning should work without any symbols + let mut f = TestFixture::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + + let return_address1 = 0x50000100u32; + let return_address2 = 0x50000900u32; + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + + stack = stack + // frame 0 + .append_repeated(0, 16) // space + .D32(0x40090000) // junk that's not + .D32(0x60000000) // a return address + .D32(return_address1) // actual return address + // frame 1 + .mark(&frame1_sp) + .append_repeated(0, 16) // space + .D32(0xF0000000) // more junk + .D32(0x0000000D) + .D32(return_address2) // actual return address + // frame 2 + .mark(&frame2_sp) + .append_repeated(0, 32); // end of stack + + f.raw.set_register("pc", 0x40005510); + // set an invalid non-zero value for the frame pointer + // to force stack scanning + f.raw.set_register("fp", 0x00000001); + f.raw + .set_register("sp", stack.start().value().unwrap() as u32); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 3); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 2); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address1); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame1_sp.value().unwrap() as u32 + ); + } else { + unreachable!(); + } + } + + { + // Frame 2 + let frame = &s.frames[2]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 2); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address2); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame2_sp.value().unwrap() as u32 + ); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_scan_first_frame() { + // The first (context) frame gets extra long scans, this test checks that. + let mut f = TestFixture::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + + let return_address1 = 0x50000100u32; + let return_address2 = 0x50000900u32; + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + + stack = stack + // frame 0 + .append_repeated(0, 16) // space + .D32(0x40090000) // junk that's not + .D32(0x60000000) // a return address + .append_repeated(0, 96) // more space + .D32(return_address1) // actual return address + // frame 1 + .mark(&frame1_sp) + .append_repeated(0, 32) // space + .D32(0xF0000000) // more junk + .D32(0x0000000D) + .append_repeated(0, 336) // more space + .D32(return_address2) // actual return address (won't be found) + // frame 2 + .mark(&frame2_sp) + .append_repeated(0, 64); // end of stack + + f.raw.set_register("pc", 0x40005510); + // set an invalid non-zero value for the frame pointer + // to force stack scanning + f.raw.set_register("fp", 0x00000001); + f.raw + .set_register("sp", stack.start().value().unwrap() as u32); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 2); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address1); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame1_sp.value().unwrap() as u32 + ); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_invalid_lr() { + let mut f = TestFixture::new(); + f.system_info.os = Os::Linux; + + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + + let lr = Label::new(); + let return_address1 = 0x50000100u32; + let return_address2 = 0x50000900u32; + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + let frame1_fp = Label::new(); + let frame2_fp = Label::new(); + + stack = stack + // frame 0 + .append_repeated(0, 32) // space + .mark(&lr) // the LR points to something on the stack + .D32(0x0000000D) // junk that's not + .D32(0xF0000000) // a return address + .mark(&frame1_fp) // next fp will point to the next value + .D32(&frame2_fp) // save current frame pointer + .D32(return_address1) // save current link register + .mark(&frame1_sp) + // frame 1 + .append_repeated(0, 32) // space + .D32(0x0000000D) // junk that's not + .D32(0xF0000000) // a return address + .mark(&frame2_fp) + .D32(0) + .D32(return_address2) + .mark(&frame2_sp); + + f.raw.set_register("pc", 0x40005510); + f.raw.set_register("lr", lr.value().unwrap() as u32); + f.raw.set_register("fp", frame1_fp.value().unwrap() as u32); + f.raw + .set_register("sp", stack.start().value().unwrap() as u32); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 3); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 2); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address1); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame1_sp.value().unwrap() as u32 + ); + } else { + unreachable!(); + } + } + + { + // Frame 2 + let frame = &s.frames[2]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 2); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address2); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame2_sp.value().unwrap() as u32 + ); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_frame_pointer() { + // Frame-pointer-based unwinding + let mut f = TestFixture::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + + let return_address1 = 0x50000100u32; + let return_address2 = 0x50000900u32; + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + let frame0_fp = Label::new(); + let frame1_fp = Label::new(); + let frame2_fp = Label::new(); + + stack = stack + // frame 0 + .append_repeated(0, 32) // space + .D32(0x0000000D) // junk that's not + .D32(0xF0000000) // a return address + .mark(&frame0_fp) // next fp will point to the next value + .D32(&frame1_fp) // save current frame pointer + .D32(return_address1) // save current link register + .mark(&frame1_sp) + // frame 1 + .append_repeated(0, 32) // space + .D32(0x0000000D) // junk that's not + .D32(0xF0000000) // a return address + .mark(&frame1_fp) + .D32(&frame2_fp) + .D32(return_address2) + .mark(&frame2_sp) + // frame 2 + .append_repeated(0, 32) // Whatever values on the stack. + .D32(0x0000000D) // junk that's not + .D32(0xF0000000) // a return address. + .mark(&frame2_fp) + .D32(0) + .D32(0); + + f.raw.set_register("pc", 0x40005510); + f.raw.set_register("lr", return_address1); + f.raw.set_register("fp", frame0_fp.value().unwrap() as u32); + f.raw + .set_register("sp", stack.start().value().unwrap() as u32); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 3); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address1); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame1_sp.value().unwrap() as u32 + ); + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame1_fp.value().unwrap() as u32 + ); + } else { + unreachable!(); + } + } + + { + // Frame 2 + let frame = &s.frames[2]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address2); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame2_sp.value().unwrap() as u32 + ); + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame2_fp.value().unwrap() as u32 + ); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_frame_pointer_stackless_leaf() { + // Same as test_frame_pointer but frame0 is a stackless leaf. + // + // In the current implementation we will misunderstand this slightly + // and basically "lose" frame 1, but still properly recover frame 2. + // THIS TEST BREAKING MIGHT MEAN YOU'VE MADE THINGS WORK BETTER! + let mut f = TestFixture::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + + let return_address1 = 0x50000100u32; + let return_address2 = 0x50000900u32; + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + let frame1_fp = Label::new(); + let frame2_fp = Label::new(); + + stack = stack + // frame 0 (literally nothing!) + .mark(&frame1_sp) + // frame 1 (this is sadly dropped) + .append_repeated(0, 32) // space + .D32(0x0000000D) // junk that's not + .D32(0xF0000000) // a return address + .mark(&frame1_fp) + .D32(&frame2_fp) + .D32(return_address2) + .mark(&frame2_sp) + // frame 2 + .append_repeated(0, 32) // Whatever values on the stack. + .D32(0x0000000D) // junk that's not + .D32(0xF0000000) // a return address. + .mark(&frame2_fp) + .D32(0) + .D32(0); + + f.raw.set_register("pc", 0x40005510); + f.raw.set_register("lr", return_address1); // we will sadly ignore this + f.raw.set_register("fp", frame1_fp.value().unwrap() as u32); + f.raw + .set_register("sp", stack.start().value().unwrap() as u32); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 2 (Found as Frame 1) + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address2); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame2_sp.value().unwrap() as u32 + ); + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame2_fp.value().unwrap() as u32 + ); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_frame_pointer_stackful_leaf() { + // Same as test_frame_pointer but frame0 is a stackful leaf. + // + // In the current implementation we will misunderstand this slightly + // and basically "lose" frame 1, but still properly recover frame 2. + // THIS TEST BREAKING MIGHT MEAN YOU'VE MADE THINGS WORK BETTER! + let mut f = TestFixture::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + + let return_address1 = 0x50000100u32; + let return_address2 = 0x50000900u32; + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + let frame1_fp = Label::new(); + let frame2_fp = Label::new(); + + stack = stack + // frame 0 (all junk!) + .append_repeated(0, 64) // space + .D64(0x0000000D) // junk that's not + .D64(0xF0000000) // a return address + .mark(&frame1_sp) + // frame 1 (this is sadly dropped) + .append_repeated(0, 32) // space + .D32(0x0000000D) // junk that's not + .D32(0xF0000000) // a return address + .mark(&frame1_fp) + .D32(&frame2_fp) + .D32(return_address2) + .mark(&frame2_sp) + // frame 2 + .append_repeated(0, 32) // Whatever values on the stack. + .D32(0x0000000D) // junk that's not + .D32(0xF0000000) // a return address. + .mark(&frame2_fp) + .D32(0) + .D32(0); + + f.raw.set_register("pc", 0x40005510); + f.raw.set_register("lr", return_address1); // we will sadly ignore this + f.raw.set_register("fp", frame1_fp.value().unwrap() as u32); + f.raw + .set_register("sp", stack.start().value().unwrap() as u32); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 2 (Found as Frame 1) + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address2); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame2_sp.value().unwrap() as u32 + ); + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame2_fp.value().unwrap() as u32 + ); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_frame_pointer_infinite_equality() { + // Leaf functions on Arm are allowed to not update the stack pointer, so + // it's valid for the frame pointer analysis to conclude that the stack + // pointer doesn't change. However we must only provide this allowance + // to the first stack frame, or else we're vulnerable to infinite loops. + // + // One of the CFI tests already checks that we allow the leaf case to work, + // so here we test that we don't get stuck in an infinite loop for the + // non-leaf case. + // + // This is just a copy-paste of test_frame_pointer except for the line + // "EVIL INFINITE FRAME POINTER" has been changed from frame2_fp to frame1_fp. + let mut f = TestFixture::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + + let return_address1 = 0x50000100u32; + let return_address2 = 0x50000900u32; + let frame1_sp = Label::new(); + let frame2_sp = Label::new(); + let frame0_fp = Label::new(); + let frame1_fp = Label::new(); + let frame2_fp = Label::new(); + + stack = stack + // frame 0 + .append_repeated(0, 32) // space + .D32(0x0000000D) // junk that's not + .D32(0xF0000000) // a return address + .mark(&frame0_fp) // next fp will point to the next value + .D32(&frame0_fp) // EVIL INFINITE FRAME POINTER + .D32(return_address1) // save current link register + .mark(&frame1_sp) + // frame 1 + .append_repeated(0, 32) // space + .D32(0x0000000D) // junk that's not + .D32(0xF0000000) // a return address + .mark(&frame1_fp) + .D32(&frame2_fp) + .D32(return_address2) + .mark(&frame2_sp) + // frame 2 + .append_repeated(0, 32) // Whatever values on the stack. + .D32(0x0000000D) // junk that's not + .D32(0xF0000000) // a return address. + .mark(&frame2_fp) + .D32(0) + .D32(0); + + f.raw.set_register("pc", 0x40005510); + f.raw.set_register("lr", return_address1); + f.raw.set_register("fp", frame0_fp.value().unwrap() as u32); + f.raw + .set_register("sp", stack.start().value().unwrap() as u32); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 (a messed up combination of frame 0 and 1) + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address1); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame1_sp.value().unwrap() as u32 + ); + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame0_fp.value().unwrap() as u32 + ); + } else { + unreachable!(); + } + } + + // Never get to frame 2, alas! +} + +const CALLEE_SAVE_REGS: &[&str] = &["pc", "sp", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "fp"]; + +fn init_cfi_state() -> (TestFixture, Section, CONTEXT_ARM, MinidumpContextValidity) { + let mut f = TestFixture::new(); + let symbols = [ + // The youngest frame's function. + "FUNC 4000 1000 10 enchiridion\n", + // Initially, nothing has been pushed on the stack, + // and the return address is still in the link register. + "STACK CFI INIT 4000 100 .cfa: sp .ra: lr\n", + // Push r4, the frame pointer, and the link register. + "STACK CFI 4001 .cfa: sp 12 + r4: .cfa 12 - ^", + " r11: .cfa 8 - ^ .ra: .cfa 4 - ^\n", + // Save r4..r7 in r0..r3: verify that we populate + // the youngest frame with all the values we have. + "STACK CFI 4002 r4: r0 r5: r1 r6: r2 r7: r3\n", + // Restore r4..r7. Save the non-callee-saves register r1. + "STACK CFI 4003 .cfa: sp 16 + r1: .cfa 16 - ^", + " r4: r4 r5: r5 r6: r6 r7: r7\n", + // Move the .cfa back four bytes, to point at the return + // address, and restore the sp explicitly. + "STACK CFI 4005 .cfa: sp 12 + r1: .cfa 12 - ^", + " r11: .cfa 4 - ^ .ra: .cfa ^ sp: .cfa 4 +\n", + // Recover the PC explicitly from a new stack slot; + // provide garbage for the .ra. + "STACK CFI 4006 .cfa: sp 16 + pc: .cfa 16 - ^\n", + // The calling function. + "FUNC 5000 1000 10 epictetus\n", + // Mark it as end of stack. + "STACK CFI INIT 5000 1000 .cfa: 0 .ra: 0\n", + // A function whose CFI makes the stack pointer + // go backwards. + "FUNC 6000 1000 20 palinal\n", + "STACK CFI INIT 6000 1000 .cfa: sp 4 - .ra: lr\n", + // A function with CFI expressions that can't be + // evaluated. + "FUNC 7000 1000 20 rhetorical\n", + "STACK CFI INIT 7000 1000 .cfa: moot .ra: ambiguous\n", + ]; + f.add_symbols(String::from("module1"), symbols.concat()); + + f.raw.set_register("pc", 0x40005510); + f.raw.set_register("sp", 0x80000000); + f.raw.set_register("fp", 0x8112e110); + f.raw.iregs[4] = 0xb5d55e68; + f.raw.iregs[5] = 0xebd134f3; + f.raw.iregs[6] = 0xa31e74bc; + f.raw.iregs[7] = 0x2dcb16b3; + f.raw.iregs[8] = 0x2ada2137; + f.raw.iregs[9] = 0xbbbb557d; + f.raw.iregs[10] = 0x48bf8ca7; + + let raw_valid = MinidumpContextValidity::All; + + let expected = f.raw.clone(); + let expected_regs = CALLEE_SAVE_REGS; + let expected_valid = MinidumpContextValidity::Some(expected_regs.iter().copied().collect()); + + let stack = Section::new(); + stack + .start() + .set_const(f.raw.get_register("sp", &raw_valid).unwrap() as u64); + + (f, stack, expected, expected_valid) +} + +async fn check_cfi( + f: TestFixture, + stack: Section, + expected: CONTEXT_ARM, + expected_valid: MinidumpContextValidity, +) { + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 + if let MinidumpContextValidity::Some(ref expected_regs) = expected_valid { + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::CallFrameInfo); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), expected_regs.len()); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm(ctx) = &frame.context.raw { + for reg in expected_regs { + assert_eq!( + ctx.get_register(reg, valid), + expected.get_register(reg, &expected_valid), + "{reg} registers didn't match!" + ); + } + return; + } + } + } + unreachable!(); +} + +#[tokio::test] +async fn test_cfi_at_4000() { + let (mut f, mut stack, expected, expected_valid) = init_cfi_state(); + + stack = stack.append_repeated(0, 120); + + f.raw.set_register("pc", 0x40004000); + f.raw.set_register("lr", 0x40005510); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4001() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_sp = Label::new(); + stack = stack + .D32(0xb5d55e68) // saved r4 + .D32(0x8112e110) // saved fp + .D32(0x40005510) // return address + .mark(&frame1_sp) + .append_repeated(0, 120); + + expected.set_register("sp", frame1_sp.value().unwrap() as u32); + f.raw.set_register("pc", 0x40004001); + f.raw.iregs[4] = 0x635adc9f; + f.raw.set_register("fp", 0xbe145fc4); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4002() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_sp = Label::new(); + stack = stack + .D32(0xfb81ff3d) // no longer saved r4 + .D32(0x8112e110) // saved fp + .D32(0x40005510) // return address + .mark(&frame1_sp) + .append_repeated(0, 120); + + expected.set_register("sp", frame1_sp.value().unwrap() as u32); + f.raw.set_register("pc", 0x40004002); + f.raw.iregs[0] = 0xb5d55e68; // saved r4 + f.raw.iregs[1] = 0xebd134f3; // saved r5 + f.raw.iregs[2] = 0xa31e74bc; // saved r6 + f.raw.iregs[3] = 0x2dcb16b3; // saved r7 + f.raw.iregs[4] = 0xfdd35466; // distinct callee r4 + f.raw.iregs[5] = 0xf18c946c; // distinct callee r5 + f.raw.iregs[6] = 0xac2079e8; // distinct callee r6 + f.raw.iregs[7] = 0xa449829f; // distinct callee r7 + f.raw.set_register("fp", 0xbe145fc4); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4003() { + let (mut f, mut stack, mut expected, mut expected_valid) = init_cfi_state(); + + let frame1_sp = Label::new(); + stack = stack + .D32(0x48c8dd5a) // saved r1 (even though it's not callee-saves) + .D32(0xcb78040e) // no longer saved r4 + .D32(0x8112e110) // saved fp + .D32(0x40005510) // return address + .mark(&frame1_sp) + .append_repeated(0, 120); + + expected.set_register("sp", frame1_sp.value().unwrap() as u32); + expected.iregs[1] = 0x48c8dd5a; + if let MinidumpContextValidity::Some(ref mut which) = expected_valid { + which.insert("r1"); + } else { + unreachable!(); + } + + f.raw.set_register("pc", 0x40004003); + f.raw.iregs[1] = 0xfb756319; + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4004() { + // Should be the same as 4003 + let (mut f, mut stack, mut expected, mut expected_valid) = init_cfi_state(); + + let frame1_sp = Label::new(); + stack = stack + .D32(0x48c8dd5a) // saved r1 (even though it's not callee-saves) + .D32(0xcb78040e) // no longer saved r4 + .D32(0x8112e110) // saved fp + .D32(0x40005510) // return address + .mark(&frame1_sp) + .append_repeated(0, 120); + + expected.set_register("sp", frame1_sp.value().unwrap() as u32); + expected.iregs[1] = 0x48c8dd5a; + if let MinidumpContextValidity::Some(ref mut which) = expected_valid { + which.insert("r1"); + } else { + unreachable!(); + } + + f.raw.set_register("pc", 0x40004004); + f.raw.iregs[1] = 0xfb756319; + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4005() { + let (mut f, mut stack, mut expected, mut expected_valid) = init_cfi_state(); + + let frame1_sp = Label::new(); + stack = stack + .D32(0x48c8dd5a) // saved r1 (even though it's not callee-saves) + .D32(0xf013f841) // no longer saved r4 + .D32(0x8112e110) // saved fp + .D32(0x40005510) // return address + .mark(&frame1_sp) + .append_repeated(0, 120); + + expected.set_register("sp", frame1_sp.value().unwrap() as u32); + expected.iregs[1] = 0x48c8dd5a; + if let MinidumpContextValidity::Some(ref mut which) = expected_valid { + which.insert("r1"); + } else { + unreachable!(); + } + + f.raw.set_register("pc", 0x40004005); + f.raw.iregs[1] = 0xfb756319; + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4006() { + // Here we provide an explicit rule for the PC, and have the saved .ra be + // bogus. + + let (mut f, mut stack, mut expected, mut expected_valid) = init_cfi_state(); + + let frame1_sp = Label::new(); + stack = stack + .D32(0x40005510) // saved pc + .D32(0x48c8dd5a) // saved r1 (even though it's not callee-saves) + .D32(0xf013f841) // no longer saved r4 + .D32(0x8112e110) // saved fp + .D32(0xf8d15783) // .ra rule recovers this, which is garbage + .mark(&frame1_sp) + .append_repeated(0, 120); + + expected.set_register("sp", frame1_sp.value().unwrap() as u32); + expected.iregs[1] = 0x48c8dd5a; + if let MinidumpContextValidity::Some(ref mut which) = expected_valid { + which.insert("r1"); + } else { + unreachable!(); + } + + f.raw.set_register("pc", 0x40004006); + f.raw.iregs[1] = 0xfb756319; + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_reject_backwards() { + // Check that we reject rules that would cause the stack pointer to + // move in the wrong direction. + + let (mut f, mut stack, _expected, _expected_valid) = init_cfi_state(); + + stack = stack.append_repeated(0, 120); + + f.raw.set_register("pc", 0x40006000); + f.raw.set_register("sp", 0x80000000); + f.raw.set_register("lr", 0x40005510); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 1); +} + +#[tokio::test] +async fn test_cfi_reject_bad_exprs() { + // Check that we reject rules whose expressions' evaluation fails. + + let (mut f, mut stack, _expected, _expected_valid) = init_cfi_state(); + + stack = stack.append_repeated(0, 120); + + f.raw.set_register("pc", 0x40007000); + f.raw.set_register("sp", 0x80000000); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 1); +} + +#[tokio::test] +async fn test_frame_pointer_overflow() { + // Make sure we don't explode when trying frame pointer analysis on a value + // that will overflow. + + type Pointer = u32; + let stack_max: Pointer = Pointer::MAX; + let stack_size: Pointer = 1000; + let bad_frame_ptr: Pointer = stack_max; + + let mut f = TestFixture::new(); + let mut stack = Section::new(); + let stack_start: Pointer = stack_max - stack_size; + stack.start().set_const(stack_start as u64); + + stack = stack + // frame 0 + .append_repeated(0, stack_size as usize); // junk, not important to the test + + f.raw.set_register("pc", 0x7a100000); + f.raw.set_register("fp", bad_frame_ptr); + f.raw + .set_register("sp", stack.start().value().unwrap() as Pointer); + f.raw.set_register("lr", 0x7b302000); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 1); + + // As long as we don't panic, we're good! +} + +#[tokio::test] +async fn test_frame_pointer_overflow_nonsense_32bit_stack() { + // same as test_frame_pointer_overflow, but we're going to abuse the fact + // that rust-minidump prefers representing things in 64-bit to create + // impossible stack addresses that overflow 32-bit integers but appear + // valid in 64-bit. By doing this memory reads will "succeed" but + // pointer math done in the native pointer width will overflow and + // everything will be sad. + + type Pointer = u32; + let pointer_size: u64 = std::mem::size_of::() as u64; + let stack_max: u64 = Pointer::MAX as u64 + pointer_size * 2; + let stack_size: u64 = 1000; + let bad_frame_ptr: u64 = Pointer::MAX as u64 - pointer_size; + + let mut f = TestFixture::new(); + let mut stack = Section::new(); + let stack_start: u64 = stack_max - stack_size; + stack.start().set_const(stack_start); + + stack = stack + // frame 0 + .append_repeated(0, 1000); // junk, not important to the test + + f.raw.set_register("pc", 0x7a100000); + f.raw.set_register("fp", bad_frame_ptr as u32); + f.raw + .set_register("sp", stack.start().value().unwrap() as Pointer); + f.raw.set_register("lr", 0x7b302000); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 1); + + // As long as we don't panic, we're good! +} + +#[tokio::test] +async fn test_frame_pointer_barely_no_overflow() { + // This is a simple frame pointer test but with the all the values pushed + // as close to the upper memory boundary as possible, to confirm that + // our code doesn't randomly overflow *AND* isn't overzealous in + // its overflow guards. + + let mut f = TestFixture::new(); + let mut stack = Section::new(); + + type Pointer = u32; + let stack_max: Pointer = Pointer::MAX; + let pointer_size: Pointer = std::mem::size_of::() as Pointer; + let stack_size: Pointer = pointer_size * 3; + + let stack_start: Pointer = stack_max - stack_size; + let return_address: Pointer = 0x7b302000; + stack.start().set_const(stack_start as u64); + + let frame0_fp = Label::new(); + let frame1_sp = Label::new(); + let frame1_fp = Label::new(); + + stack = stack + // frame 0 + .mark(&frame0_fp) + .D32(&frame1_fp) // caller-pushed %rbp + .D32(return_address) // actual return address + // frame 1 + .mark(&frame1_sp) + .mark(&frame1_fp) // end of stack + .D32(0); + + f.raw.set_register("pc", 0x7a100000); + f.raw + .set_register("fp", frame0_fp.value().unwrap() as Pointer); + f.raw + .set_register("sp", stack.start().value().unwrap() as Pointer); + f.raw.set_register("lr", return_address); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + + if let MinidumpRawContext::Arm(ctx) = &frame.context.raw { + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame0_fp.value().unwrap() as Pointer + ); + } else { + unreachable!(); + } + } + + { + // Frame 1 + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), 3); + } else { + unreachable!(); + } + + if let MinidumpRawContext::Arm(ctx) = &frame.context.raw { + assert_eq!(ctx.get_register("pc", valid).unwrap(), return_address); + assert_eq!( + ctx.get_register("sp", valid).unwrap(), + frame1_sp.value().unwrap() as Pointer + ); + assert_eq!( + ctx.get_register("fp", valid).unwrap(), + frame1_fp.value().unwrap() as Pointer + ); + } else { + unreachable!(); + } + } +} diff --git a/third_party/rust/minidump-unwind/src/lib.rs b/third_party/rust/minidump-unwind/src/lib.rs new file mode 100644 index 000000000000..77fb45a19f66 --- /dev/null +++ b/third_party/rust/minidump-unwind/src/lib.rs @@ -0,0 +1,898 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +//! Unwind stack frames for a thread. + +#[cfg(all(doctest, feature = "http"))] +doc_comment::doctest!("../README.md"); + +mod amd64; +mod arm; +mod arm64; +mod arm64_old; +mod mips; +pub mod symbols; +pub mod system_info; +mod x86; + +use minidump::*; +use minidump_common::utils::basename; +use scroll::ctx::{SizeWith, TryFromCtx}; +use std::borrow::Cow; +use std::collections::{BTreeMap, BTreeSet, HashSet}; +use std::convert::TryFrom; +use std::io::{self, Write}; +use tracing::trace; + +pub use crate::symbols::*; +pub use crate::system_info::*; + +#[derive(Clone, Copy)] +struct GetCallerFrameArgs<'a, P> { + callee_frame: &'a StackFrame, + grand_callee_frame: Option<&'a StackFrame>, + stack_memory: UnifiedMemory<'a, 'a>, + modules: &'a MinidumpModuleList, + system_info: &'a SystemInfo, + symbol_provider: &'a P, +} + +impl

GetCallerFrameArgs<'_, P> { + fn valid(&self) -> &MinidumpContextValidity { + &self.callee_frame.context.valid + } +} + +mod impl_prelude { + pub(crate) use super::{ + CfiStackWalker, FrameTrust, GetCallerFrameArgs, StackFrame, SymbolProvider, + }; +} + +/// Indicates how well the instruction pointer derived during +/// stack walking is trusted. Since the stack walker can resort to +/// stack scanning, it can wind up with dubious frames. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum FrameTrust { + /// Unknown + None, + /// Scanned the stack, found this. + Scan, + /// Found while scanning stack using call frame info. + CfiScan, + /// Derived from frame pointer. + FramePointer, + /// Derived from call frame info. + CallFrameInfo, + /// Explicitly provided by some external stack walker. + PreWalked, + /// Given as instruction pointer in a context. + Context, +} + +impl FrameTrust { + /// Return a string describing how a stack frame was found + /// by the stackwalker. + pub fn description(&self) -> &'static str { + match *self { + FrameTrust::Context => "given as instruction pointer in context", + FrameTrust::PreWalked => "recovered by external stack walker", + FrameTrust::CallFrameInfo => "call frame info", + FrameTrust::CfiScan => "call frame info with scanning", + FrameTrust::FramePointer => "previous frame's frame pointer", + FrameTrust::Scan => "stack scanning", + FrameTrust::None => "unknown", + } + } + + pub fn as_str(&self) -> &'static str { + match *self { + FrameTrust::Context => "context", + FrameTrust::PreWalked => "prewalked", + FrameTrust::CallFrameInfo => "cfi", + FrameTrust::CfiScan => "cfi_scan", + FrameTrust::FramePointer => "frame_pointer", + FrameTrust::Scan => "scan", + FrameTrust::None => "non", + } + } +} + +/// The calling convention of a function. +#[derive(Debug, Clone)] +pub enum CallingConvention { + Cdecl, + WindowsThisCall, + OtherThisCall, +} + +/// Arguments for this function +#[derive(Debug, Clone)] +pub struct FunctionArgs { + /// What we assumed the calling convention was. + pub calling_convention: CallingConvention, + + /// The actual arguments. + pub args: Vec, +} + +/// A function argument. +#[derive(Debug, Clone)] +pub struct FunctionArg { + /// The name of the argument (usually actually just the type). + pub name: String, + /// The value of the argument. + pub value: Option, +} + +/// A stack frame for an inlined function. +/// +/// See [`StackFrame::inlines`][] for more details. +#[derive(Debug, Clone)] +pub struct InlineFrame { + /// The name of the function + pub function_name: String, + /// The file name of the stack frame + pub source_file_name: Option, + /// The line number of the stack frame + pub source_line: Option, +} + +/// A single stack frame produced from unwinding a thread's stack. +#[derive(Debug, Clone)] +pub struct StackFrame { + /// The program counter location as an absolute virtual address. + /// + /// - For the innermost called frame in a stack, this will be an exact + /// program counter or instruction pointer value. + /// + /// - For all other frames, this address is within the instruction that + /// caused execution to branch to this frame's callee (although it may + /// not point to the exact beginning of that instruction). This ensures + /// that, when we look up the source code location for this frame, we + /// get the source location of the call, not of the point at which + /// control will resume when the call returns, which may be on the next + /// line. (If the compiler knows the callee never returns, it may even + /// place the call instruction at the very end of the caller's machine + /// code, such that the "return address" (which will never be used) + /// immediately after the call instruction is in an entirely different + /// function, perhaps even from a different source file.) + /// + /// On some architectures, the return address as saved on the stack or in + /// a register is fine for looking up the point of the call. On others, it + /// requires adjustment. + pub instruction: u64, + + /// The instruction address (program counter) that execution of this function + /// would resume at, if the callee returns. + /// + /// This is exactly **the return address of the of the callee**. We use this + /// nonstandard terminology because just calling this "return address" + /// would be ambiguous and too easy to mix up. + /// + /// **Note:** you should strongly prefer using [`StackFrame::instruction`][], which should + /// be the address of the instruction before this one which called the callee. + /// That is the instruction that this function was logically "executing" when the + /// program's state was captured, and therefore what people expect from + /// backtraces. + /// + /// This is more than a matter of user expections: **there are situations + /// where this value is nonsensical but the [`StackFrame::instruction`][] is valid.** + /// + /// Specifically, if the callee is "noreturn" then *this function should + /// never resume execution*. The compiler has no obligation to emit any + /// instructions after such a CALL, but CALL still implicitly pushes the + /// instruction after itself to the stack. Such a return address may + /// therefore be outside the "bounds" of this function!!! + /// + /// Yes, compilers *can* just immediately jump into the callee for + /// noreturn calls, but it's genuinely very helpful for them to emit a + /// CALL because it keeps the stack reasonable for backtraces and + /// debuggers, which are more interested in [`StackFrame::instruction`][] anyway! + /// + /// (If this is the top frame of the call stack, then `resume_address` + /// and `instruction` are exactly equal and should reflect the actual + /// program counter of this thread.) + pub resume_address: u64, + + /// The module in which the instruction resides. + pub module: Option, + + /// Any unloaded modules which overlap with this address. + /// + /// This is currently only populated if `module` is None. + /// + /// Since unloaded modules may overlap, there may be more than + /// one module. Since a module may be unloaded and reloaded at + /// multiple positions, we keep track of all the offsets that + /// apply. BTrees are used to produce a more stable output. + /// + /// So this is a `BTreeMap>`. + pub unloaded_modules: BTreeMap>, + + /// The function name, may be omitted if debug symbols are not available. + pub function_name: Option, + + /// The start address of the function, may be omitted if debug symbols + /// are not available. + pub function_base: Option, + + /// The size, in bytes, of the arguments pushed on the stack for this function. + /// WIN STACK unwinding needs this value to work; it's otherwise uninteresting. + pub parameter_size: Option, + + /// The source file name, may be omitted if debug symbols are not available. + pub source_file_name: Option, + + /// The (1-based) source line number, may be omitted if debug symbols are + /// not available. + pub source_line: Option, + + /// The start address of the source line, may be omitted if debug symbols + /// are not available. + pub source_line_base: Option, + + /// Any inline frames that cover the frame address, ordered "inside to outside", + /// or "deepest callee to shallowest callee". This is the same order that StackFrames + /// appear in. + /// + /// These frames are "fake" in that they don't actually exist at runtime, and are only + /// known because the compiler added debuginfo saying they exist. + /// + /// As a result, many properties of these frames either don't exist or are + /// in some sense "inherited" from the parent real frame. For instance they + /// have the same instruction/module by definiton. + /// + /// If you were to print frames you would want to do something like: + /// + /// ```ignore + /// let mut frame_num = 0; + /// for frame in &thread.frames { + /// // Inlines come first + /// for inline in &frame.inlines { + /// print_inline(frame_num, frame, inline); + /// frame_num += 1; + /// } + /// print_frame(frame_num, frame); + /// frame_num += 1; + /// } + /// ``` + pub inlines: Vec, + + /// Amount of trust the stack walker has in the instruction pointer + /// of this frame. + pub trust: FrameTrust, + + /// The CPU context containing register state for this frame. + pub context: MinidumpContext, + + /// Any function args we recovered. + pub arguments: Option, +} + +impl StackFrame { + /// Create a `StackFrame` from a `MinidumpContext`. + pub fn from_context(context: MinidumpContext, trust: FrameTrust) -> StackFrame { + StackFrame { + instruction: context.get_instruction_pointer(), + // Initialized the same as `instruction`, but left unmodified during stack walking. + resume_address: context.get_instruction_pointer(), + module: None, + unloaded_modules: BTreeMap::new(), + function_name: None, + function_base: None, + parameter_size: None, + source_file_name: None, + source_line: None, + source_line_base: None, + inlines: Vec::new(), + arguments: None, + trust, + context, + } + } +} + +impl FrameSymbolizer for StackFrame { + fn get_instruction(&self) -> u64 { + self.instruction + } + fn set_function(&mut self, name: &str, base: u64, parameter_size: u32) { + self.function_name = Some(String::from(name)); + self.function_base = Some(base); + self.parameter_size = Some(parameter_size); + } + fn set_source_file(&mut self, file: &str, line: u32, base: u64) { + self.source_file_name = Some(String::from(file)); + self.source_line = Some(line); + self.source_line_base = Some(base); + } + /// This function can be called multiple times, for the inlines that cover the + /// address at various levels of inlining. The call order is from outside to + /// inside. + fn add_inline_frame(&mut self, name: &str, file: Option<&str>, line: Option) { + self.inlines.push(InlineFrame { + function_name: name.to_string(), + source_file_name: file.map(ToString::to_string), + source_line: line, + }) + } +} + +/// Information about the results of unwinding a thread's stack. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CallStackInfo { + /// Everything went great. + Ok, + /// No `MinidumpContext` was provided, couldn't do anything. + MissingContext, + /// No stack memory was provided, couldn't unwind past the top frame. + MissingMemory, + /// The CPU type is unsupported. + UnsupportedCpu, + /// This thread wrote the minidump, it was skipped. + DumpThreadSkipped, +} + +/// A stack of `StackFrame`s produced as a result of unwinding a thread. +#[derive(Debug, Clone)] +pub struct CallStack { + /// The stack frames. + /// By convention, the stack frame at index 0 is the innermost callee frame, + /// and the frame at the highest index in a call stack is the outermost + /// caller. + pub frames: Vec, + /// Information about this `CallStack`. + pub info: CallStackInfo, + /// The identifier of the thread. + pub thread_id: u32, + /// The name of the thread, if known. + pub thread_name: Option, + /// The GetLastError() value stored in the TEB. + pub last_error_value: Option, +} + +impl CallStack { + /// Construct a CallStack that just has the unsymbolicated context frame. + /// + /// This is the desired input for the stack walker. + pub fn with_context(context: MinidumpContext) -> Self { + Self { + frames: vec![StackFrame::from_context(context, FrameTrust::Context)], + info: CallStackInfo::Ok, + thread_id: 0, + thread_name: None, + last_error_value: None, + } + } + + /// Create a `CallStack` with `info` and no frames. + pub fn with_info(id: u32, info: CallStackInfo) -> CallStack { + CallStack { + info, + frames: vec![], + thread_id: id, + thread_name: None, + last_error_value: None, + } + } + + /// Write a human-readable description of the call stack to `f`. + /// + /// This is very verbose, it implements the output format used by + /// minidump_stackwalk. + pub fn print(&self, f: &mut T) -> io::Result<()> { + fn print_registers(f: &mut T, ctx: &MinidumpContext) -> io::Result<()> { + let registers: Cow> = match ctx.valid { + MinidumpContextValidity::All => { + let gpr = ctx.general_purpose_registers(); + let set: HashSet<&str> = gpr.iter().cloned().collect(); + Cow::Owned(set) + } + MinidumpContextValidity::Some(ref which) => Cow::Borrowed(which), + }; + + // Iterate over registers in a known order. + let mut output = String::new(); + for reg in ctx.general_purpose_registers() { + if registers.contains(reg) { + let reg_val = ctx.format_register(reg); + let next = format!(" {reg: >6} = {reg_val}"); + if output.chars().count() + next.chars().count() > 80 { + // Flush the buffer. + writeln!(f, " {output}")?; + output.truncate(0); + } + output.push_str(&next); + } + } + if !output.is_empty() { + writeln!(f, " {output}")?; + } + Ok(()) + } + + if self.frames.is_empty() { + writeln!(f, "")?; + } + let mut frame_count = 0; + for frame in &self.frames { + // First print out inlines + for inline in &frame.inlines { + // Frame number + let frame_idx = frame_count; + frame_count += 1; + write!(f, "{frame_idx:2} ")?; + + // Module name + if let Some(ref module) = frame.module { + write!(f, "{}", basename(&module.code_file()))?; + } + + // Function name + write!(f, "!{}", inline.function_name)?; + + // Source file and line + if let (Some(source_file), Some(source_line)) = + (&inline.source_file_name, &inline.source_line) + { + write!(f, " [{} : {}]", basename(source_file), source_line,)?; + } + writeln!(f)?; + // A fake `trust` + writeln!(f, " Found by: inlining")?; + } + + // Now print out the "real frame" + let frame_idx = frame_count; + frame_count += 1; + let addr = frame.instruction; + + // Frame number + write!(f, "{frame_idx:2} ")?; + if let Some(module) = &frame.module { + // Module name + write!(f, "{}", basename(&module.code_file()))?; + + if let (Some(func_name), Some(func_base)) = + (&frame.function_name, &frame.function_base) + { + // Function name + write!(f, "!{func_name}")?; + + if let (Some(src_file), Some(src_line), Some(src_base)) = ( + &frame.source_file_name, + &frame.source_line, + &frame.source_line_base, + ) { + // Source file, line, and offset + write!( + f, + " [{} : {} + {:#x}]", + basename(src_file), + src_line, + addr - src_base + )?; + } else { + // We didn't have source info, so just give a byte offset from the func + write!(f, " + {:#x}", addr - func_base)?; + } + } else { + // We didn't have a function name, so just give a byte offset from the module + write!(f, " + {:#x}", addr - module.base_address())?; + } + } else { + // We didn't even find a module, so just print the raw address + write!(f, "{addr:#x}")?; + + // List off overlapping unloaded modules. + + // First we need to collect them up by name so that we can print + // all the overlaps from one module together and dedupe them. + // (!!! was that code deleted?) + for (name, offsets) in &frame.unloaded_modules { + write!(f, " (unloaded {name}@")?; + let mut first = true; + for offset in offsets { + if first { + write!(f, "{offset:#x}")?; + } else { + // `|` is our separator for multiple entries + write!(f, "|{offset:#x}")?; + } + first = false; + } + write!(f, ")")?; + } + } + + // Print the valid registers + writeln!(f)?; + print_registers(f, &frame.context)?; + + // And the trust we have of this result + writeln!(f, " Found by: {}", frame.trust.description())?; + + // Now print out recovered args + if let Some(args) = &frame.arguments { + use MinidumpRawContext::*; + let pointer_width = match &frame.context.raw { + X86(_) | Ppc(_) | Sparc(_) | Arm(_) | Mips(_) => 4, + Ppc64(_) | Amd64(_) | Arm64(_) | OldArm64(_) => 8, + }; + + let cc_summary = match args.calling_convention { + CallingConvention::Cdecl => "cdecl [static function]", + CallingConvention::WindowsThisCall => "windows thiscall [C++ member function]", + CallingConvention::OtherThisCall => { + "non-windows thiscall [C++ member function]" + } + }; + + writeln!(f, " Arguments (assuming {cc_summary})")?; + for (idx, arg) in args.args.iter().enumerate() { + if let Some(val) = arg.value { + if pointer_width == 4 { + writeln!(f, " arg {} ({}) = 0x{:08x}", idx, arg.name, val)?; + } else { + writeln!(f, " arg {} ({}) = 0x{:016x}", idx, arg.name, val)?; + } + } else { + writeln!(f, " arg {} ({}) = ", idx, arg.name)?; + } + } + // Add an extra new-line between frames when there's function arguments to make + // it more readable. + writeln!(f)?; + } + } + Ok(()) + } +} + +struct CfiStackWalker<'a, C: CpuContext> { + instruction: u64, + has_grand_callee: bool, + grand_callee_parameter_size: u32, + + callee_ctx: &'a C, + callee_validity: &'a MinidumpContextValidity, + + caller_ctx: C, + caller_validity: HashSet<&'static str>, + + module: &'a MinidumpModule, + stack_memory: UnifiedMemory<'a, 'a>, +} + +impl<'a, C> CfiStackWalker<'a, C> +where + C: CpuContext + Clone, +{ + fn from_ctx_and_args( + ctx: &'a C, + args: &'a GetCallerFrameArgs<'a, P>, + callee_forwarded_regs: R, + ) -> Option + where + R: Fn(&MinidumpContextValidity) -> HashSet<&'static str>, + { + let module = args + .modules + .module_at_address(args.callee_frame.instruction)?; + let grand_callee = args.grand_callee_frame; + Some(Self { + instruction: args.callee_frame.instruction, + has_grand_callee: grand_callee.is_some(), + grand_callee_parameter_size: grand_callee.and_then(|f| f.parameter_size).unwrap_or(0), + + callee_ctx: ctx, + callee_validity: args.valid(), + + // Default to forwarding all callee-saved regs verbatim. + // The CFI evaluator may clear or overwrite these values. + // The stack pointer and instruction pointer are not included. + caller_ctx: ctx.clone(), + caller_validity: callee_forwarded_regs(args.valid()), + + module, + stack_memory: args.stack_memory, + }) + } +} + +impl<'a, C> FrameWalker for CfiStackWalker<'a, C> +where + C: CpuContext, + C::Register: TryFrom, + u64: TryFrom, + C::Register: TryFromCtx<'a, Endian, [u8], Error = scroll::Error> + SizeWith, +{ + fn get_instruction(&self) -> u64 { + self.instruction + } + fn has_grand_callee(&self) -> bool { + self.has_grand_callee + } + fn get_grand_callee_parameter_size(&self) -> u32 { + self.grand_callee_parameter_size + } + fn get_register_at_address(&self, address: u64) -> Option { + let result: Option = self.stack_memory.get_memory_at_address(address); + result.and_then(|val| u64::try_from(val).ok()) + } + fn get_callee_register(&self, name: &str) -> Option { + self.callee_ctx + .get_register(name, self.callee_validity) + .and_then(|val| u64::try_from(val).ok()) + } + fn set_caller_register(&mut self, name: &str, val: u64) -> Option<()> { + let memoized = self.caller_ctx.memoize_register(name)?; + let val = C::Register::try_from(val).ok()?; + self.caller_validity.insert(memoized); + self.caller_ctx.set_register(name, val) + } + fn clear_caller_register(&mut self, name: &str) { + self.caller_validity.remove(name); + } + fn set_cfa(&mut self, val: u64) -> Option<()> { + // NOTE: some things have alluded to architectures where this isn't + // how the CFA should be handled, but we apparently don't support them yet? + let stack_pointer_reg = self.caller_ctx.stack_pointer_register_name(); + let val = C::Register::try_from(val).ok()?; + self.caller_validity.insert(stack_pointer_reg); + self.caller_ctx.set_register(stack_pointer_reg, val) + } + fn set_ra(&mut self, val: u64) -> Option<()> { + let instruction_pointer_reg = self.caller_ctx.instruction_pointer_register_name(); + let val = C::Register::try_from(val).ok()?; + self.caller_validity.insert(instruction_pointer_reg); + self.caller_ctx.set_register(instruction_pointer_reg, val) + } +} + +#[tracing::instrument(name = "unwind_frame", level = "trace", skip_all, fields(idx = _frame_idx, fname = args.callee_frame.function_name.as_deref().unwrap_or("")))] +async fn get_caller_frame

( + _frame_idx: usize, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + match args.callee_frame.context.raw { + /* + MinidumpRawContext::PPC(ctx) => ctx.get_caller_frame(stack_memory), + MinidumpRawContext::PPC64(ctx) => ctx.get_caller_frame(stack_memory), + MinidumpRawContext::SPARC(ctx) => ctx.get_caller_frame(stack_memory), + */ + MinidumpRawContext::Arm(ref ctx) => arm::get_caller_frame(ctx, args).await, + MinidumpRawContext::Arm64(ref ctx) => arm64::get_caller_frame(ctx, args).await, + MinidumpRawContext::OldArm64(ref ctx) => arm64_old::get_caller_frame(ctx, args).await, + MinidumpRawContext::Amd64(ref ctx) => amd64::get_caller_frame(ctx, args).await, + MinidumpRawContext::X86(ref ctx) => x86::get_caller_frame(ctx, args).await, + MinidumpRawContext::Mips(ref ctx) => mips::get_caller_frame(ctx, args).await, + _ => None, + } +} + +async fn fill_source_line_info

( + frame: &mut StackFrame, + modules: &MinidumpModuleList, + symbol_provider: &P, +) where + P: SymbolProvider + Sync, +{ + // Find the module whose address range covers this frame's instruction. + if let Some(module) = modules.module_at_address(frame.instruction) { + // FIXME: this shouldn't need to clone, we should be able to use + // the same lifetime as the module list that's passed in. + frame.module = Some(module.clone()); + + // This is best effort, so ignore any errors. + let _ = symbol_provider.fill_symbol(module, frame).await; + + // If we got any inlines, reverse them! The symbol format makes it simplest to + // emit inlines from the shallowest callee to the deepest one ("inner to outer"), + // but we want inlines to be in the same order as the stackwalk itself, which means + // we want the deepest frame first (the callee-est frame). + frame.inlines.reverse(); + } +} + +/// An optional callback when walking frames. +/// +/// One may convert from other types to this callback type: +/// `FnMut(frame_idx: usize, frame: &StackFrame)` types can be converted to a +/// callback, and `()` can be converted to no callback (do nothing). +pub enum OnWalkedFrame<'a> { + None, + #[allow(clippy::type_complexity)] + Some(Box), +} + +impl From<()> for OnWalkedFrame<'_> { + fn from(_: ()) -> Self { + Self::None + } +} + +impl<'a, F: FnMut(usize, &StackFrame) + Send + 'a> From for OnWalkedFrame<'a> { + fn from(f: F) -> Self { + Self::Some(Box::new(f)) + } +} + +#[tracing::instrument(name = "unwind_thread", level = "trace", skip_all, fields(idx = _thread_idx, tid = stack.thread_id, tname = stack.thread_name.as_deref().unwrap_or("")))] +pub async fn walk_stack

( + _thread_idx: usize, + on_walked_frame: impl Into>, + stack: &mut CallStack, + stack_memory: Option>, + modules: &MinidumpModuleList, + system_info: &SystemInfo, + symbol_provider: &P, +) where + P: SymbolProvider + Sync, +{ + trace!( + "starting stack unwind of thread {} {}", + stack.thread_id, + stack.thread_name.as_deref().unwrap_or(""), + ); + + // All the unwinder code down below in `get_caller_frame` requires a valid `stack_memory`, + // where _valid_ means that we can actually read something from it. A call to `memory_range` will validate that, + // as it will reject empty stack memory or one with an overflowing `size`. + let stack_memory = + stack_memory.and_then(|stack_memory| stack_memory.memory_range().map(|_| stack_memory)); + + // Begin with the context frame, and keep getting callers until there are no more. + let mut has_new_frame = !stack.frames.is_empty(); + let mut on_walked_frame = on_walked_frame.into(); + while has_new_frame { + // Symbolicate the new frame + let frame_idx = stack.frames.len() - 1; + let frame = stack.frames.last_mut().unwrap(); + + fill_source_line_info(frame, modules, symbol_provider).await; + + // Report the frame as walked and symbolicated + if let OnWalkedFrame::Some(on_walked_frame) = &mut on_walked_frame { + on_walked_frame(frame_idx, frame); + } + + let Some(stack_memory) = stack_memory else { + break; + }; + + // Walk the new frame + let callee_frame = &stack.frames.last().unwrap(); + let grand_callee_frame = stack + .frames + .len() + .checked_sub(2) + .and_then(|idx| stack.frames.get(idx)); + match callee_frame.function_name.as_ref() { + Some(name) => trace!("unwinding {}", name), + None => trace!("unwinding 0x{:016x}", callee_frame.instruction), + } + let new_frame = get_caller_frame( + frame_idx, + &GetCallerFrameArgs { + callee_frame, + grand_callee_frame, + stack_memory, + modules, + system_info, + symbol_provider, + }, + ) + .await; + + // Check if we're done + if let Some(new_frame) = new_frame { + stack.frames.push(new_frame); + } else { + has_new_frame = false; + } + } + trace!( + "finished stack unwind of thread {} {}\n", + stack.thread_id, + stack.thread_name.as_deref().unwrap_or(""), + ); +} + +/// Checks if we can dismiss the validity of an instruction based on our symbols, +/// to refine the quality of each unwinder's instruction_seems_valid implementation. +async fn instruction_seems_valid_by_symbols

( + instruction: u64, + modules: &MinidumpModuleList, + symbol_provider: &P, +) -> bool +where + P: SymbolProvider + Sync, +{ + // Our input is a candidate return address, but we *really* want to validate the address + // of the call instruction *before* the return address. In theory this symbol-based + // analysis shouldn't *care* whether we're looking at the call or the instruction + // after it, but there is one corner case where the return address can be invalid + // but the instruction before it isn't: noreturn. + // + // If the *callee* is noreturn, then the caller has no obligation to have any instructions + // after the call! So e.g. on x86 if you CALL a noreturn function, the return address + // that's implicitly pushed *could* be one-past-the-end of the "function". + // + // This has been observed in practice with `+[NSThread exit]`! + // + // We don't otherwise need the instruction pointer to be terribly precise, so + // subtracting 1 from the address should be sufficient to handle this corner case. + let instruction = instruction.saturating_sub(1); + + // NULL pointer is definitely not valid + if instruction == 0 { + return false; + } + + if let Some(module) = modules.module_at_address(instruction) { + // Create a dummy frame symbolizing implementation to feed into + // our symbol provider with the address we're interested in. If + // it tries to set a non-empty function name, then we can reasonably + // assume the instruction address is valid. + //use crate::FrameSymbolizer; + + struct DummyFrame { + instruction: u64, + has_name: bool, + } + impl FrameSymbolizer for DummyFrame { + fn get_instruction(&self) -> u64 { + self.instruction + } + fn set_function(&mut self, name: &str, _base: u64, _parameter_size: u32) { + self.has_name = !name.is_empty(); + } + fn set_source_file(&mut self, _file: &str, _line: u32, _base: u64) { + // Do nothing + } + } + + let mut frame = DummyFrame { + instruction, + has_name: false, + }; + + if symbol_provider + .fill_symbol(module, &mut frame) + .await + .is_ok() + { + frame.has_name + } else { + // If the symbol provider returns an Error, this means that we + // didn't have any symbols for the *module*. Just assume the + // instruction is valid in this case so that scanning works + // when we have no symbols. + true + } + } else { + // We couldn't even map this address to a module. Reject the pointer + // so that we have *some* way to distinguish "normal" pointers + // from instruction address. + // + // FIXME: this will reject any pointer into JITed code which otherwise + // isn't part of a normal well-defined module. We can potentially use + // MemoryInfoListStream (windows) and /proc/self/maps (linux) to refine + // this analysis and allow scans to walk through JITed code. + false + } +} + +#[cfg(test)] +mod amd64_unittest; +#[cfg(test)] +mod arm64_unittest; +#[cfg(test)] +mod arm_unittest; +#[cfg(test)] +mod x86_unittest; diff --git a/third_party/rust/minidump-unwind/src/mips.rs b/third_party/rust/minidump-unwind/src/mips.rs new file mode 100644 index 000000000000..530529a677b7 --- /dev/null +++ b/third_party/rust/minidump-unwind/src/mips.rs @@ -0,0 +1,327 @@ +use super::impl_prelude::*; +use minidump::format::ContextFlagsCpu; +use minidump::{ + CpuContext, Endian, MinidumpContext, MinidumpContextValidity, MinidumpModuleList, + MinidumpRawContext, +}; +use scroll::ctx::{SizeWith, TryFromCtx}; +use std::collections::HashSet; +use std::convert::TryFrom; +use tracing::trace; + +type MipsContext = minidump::format::CONTEXT_MIPS; +type Pointer = ::Register; + +const STACK_POINTER: &str = "sp"; +const PROGRAM_COUNTER: &str = "pc"; +const CALLEE_SAVED_REGS: &[&str] = &[ + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "gp", "sp", "fp", +]; + +async fn get_caller_by_cfi<'a, C, P>( + ctx: &'a C, + args: &'a GetCallerFrameArgs<'a, P>, +) -> Option +where + P: SymbolProvider + Sync, + // all these bounds are essentially duplicated from `CfiStackWalker` :-( + C: CpuContext + IntoRawContext + Clone + Send + Sync, + C::Register: TryFrom, + u64: TryFrom, + C::Register: TryFromCtx<'a, Endian, [u8], Error = scroll::Error> + SizeWith, +{ + trace!("trying cfi"); + + let _last_sp = ctx.get_register(STACK_POINTER, args.valid())?; + + let mut stack_walker = CfiStackWalker::from_ctx_and_args(ctx, args, callee_forwarded_regs)?; + + args.symbol_provider + .walk_frame(stack_walker.module, &mut stack_walker) + .await?; + let caller_pc = stack_walker.caller_ctx.get_register_always(PROGRAM_COUNTER); + let caller_sp = stack_walker.caller_ctx.get_register_always(STACK_POINTER); + + trace!( + "cfi evaluation was successful -- caller_pc: 0x{caller_pc:016x}, caller_sp: 0x{caller_sp:016x}" + ); + + // Do absolutely NO validation! Yep! As long as CFI evaluation succeeds + // (which does include pc and sp resolving), just blindly assume the + // values are correct. I Don't Like This, but it's what breakpad does and + // we should start with a baseline of parity. + + let context = MinidumpContext { + raw: stack_walker.caller_ctx.into_ctx(), + valid: MinidumpContextValidity::Some(stack_walker.caller_validity), + }; + Some(StackFrame::from_context(context, FrameTrust::CallFrameInfo)) +} + +fn callee_forwarded_regs(valid: &MinidumpContextValidity) -> HashSet<&'static str> { + match valid { + MinidumpContextValidity::All => CALLEE_SAVED_REGS.iter().copied().collect(), + MinidumpContextValidity::Some(ref which) => CALLEE_SAVED_REGS + .iter() + .filter(|®| which.contains(reg)) + .copied() + .collect(), + } +} + +async fn get_caller_by_scan32

( + ctx: &Mips32Context, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + const MAX_STACK_SIZE: u32 = 1024; + const MIN_ARGS: u32 = 4; + const POINTER_WIDTH: u32 = 4; + trace!("trying scan"); + // Stack scanning is just walking from the end of the frame until we encounter + // a value on the stack that looks like a pointer into some code (it's an address + // in a range covered by one of our modules). If we find such an instruction, + // we assume it's a `ra` value that was saved on the stack by the callee in + // its function prologue, following a `jal` (call) instruction of the caller. + // The next frame is then assumed to end just before that `ra` value. + let mut last_sp = ctx.get_register(STACK_POINTER, args.valid())?; + + let mut count = MAX_STACK_SIZE / POINTER_WIDTH; + // In case of mips32 ABI the stack frame of a non-leaf function + // must have a minimum stack frame size for 4 arguments (4 words). + // Move stack pointer for 4 words to avoid reporting non-existing frames + // for all frames except the topmost one. + // There is no way of knowing if topmost frame belongs to a leaf or + // a non-leaf function. + if args.callee_frame.trust != FrameTrust::Context { + last_sp = last_sp.checked_add(MIN_ARGS * POINTER_WIDTH)?; + count -= MIN_ARGS; + } + + for i in 0..count { + let address_of_pc = last_sp.checked_add(i * POINTER_WIDTH)?; + let caller_pc: u32 = args + .stack_memory + .get_memory_at_address(address_of_pc as u64)?; + //trace!("unwind: trying addr 0x{address_of_pc:08x}: 0x{caller_pc:08x}"); + if instruction_seems_valid(caller_pc as u64, args.modules, args.symbol_provider).await { + // `ra` is usually saved directly at the bottom of the frame, + // so sp is just address_of_pc + ptr + let caller_sp = address_of_pc.checked_add(POINTER_WIDTH)?; + + // Don't do any more validation, and don't try to restore fp + // (that's what breakpad does!) + + trace!( + "scan seems valid -- caller_pc: 0x{caller_pc:016x}, caller_sp: 0x{caller_sp:016x}" + ); + + let mut caller_ctx = MipsContext::default(); + caller_ctx.set_register(PROGRAM_COUNTER, caller_pc as u64); + caller_ctx.set_register(STACK_POINTER, caller_sp as u64); + + let mut valid = HashSet::new(); + valid.insert(PROGRAM_COUNTER); + valid.insert(STACK_POINTER); + + let context = MinidumpContext { + raw: MinidumpRawContext::Mips(caller_ctx), + valid: MinidumpContextValidity::Some(valid), + }; + return Some(StackFrame::from_context(context, FrameTrust::Scan)); + } + } + + None +} + +async fn get_caller_by_scan64

( + ctx: &MipsContext, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + const MAX_STACK_SIZE: u64 = 1024; + const POINTER_WIDTH: u64 = 8; + trace!("trying scan"); + // Stack scanning is just walking from the end of the frame until we encounter + // a value on the stack that looks like a pointer into some code (it's an address + // in a range covered by one of our modules). If we find such an instruction, + // we assume it's a `ra` value that was saved on the stack by the callee in + // its function prologue, following a `jal` (call) instruction of the caller. + // The next frame is then assumed to end just before that `ra` value. + let last_sp = ctx.get_register(STACK_POINTER, args.valid())?; + + let count = MAX_STACK_SIZE / POINTER_WIDTH; + + for i in 0..count { + let address_of_pc = last_sp.checked_add(i * POINTER_WIDTH)?; + let caller_pc = args.stack_memory.get_memory_at_address(address_of_pc)?; + if instruction_seems_valid(caller_pc, args.modules, args.symbol_provider).await { + // `ra` is usually saved directly at the bottom of the frame, + // so sp is just address_of_pc + ptr + let caller_sp = address_of_pc.checked_add(POINTER_WIDTH)?; + + // Don't do any more validation, and don't try to restore fp + // (that's what breakpad does!) + + trace!( + "scan seems valid -- caller_pc: 0x{caller_pc:016x}, caller_sp: 0x{caller_sp:016x}" + ); + + let mut caller_ctx = MipsContext::default(); + caller_ctx.set_register(PROGRAM_COUNTER, caller_pc); + caller_ctx.set_register(STACK_POINTER, caller_sp); + + let mut valid = HashSet::new(); + valid.insert(PROGRAM_COUNTER); + valid.insert(STACK_POINTER); + + let context = MinidumpContext { + raw: MinidumpRawContext::Mips(caller_ctx), + valid: MinidumpContextValidity::Some(valid), + }; + return Some(StackFrame::from_context(context, FrameTrust::Scan)); + } + } + + None +} + +async fn instruction_seems_valid

( + instruction: Pointer, + modules: &MinidumpModuleList, + symbol_provider: &P, +) -> bool +where + P: SymbolProvider + Sync, +{ + if instruction < 0x1000 { + return false; + } + + super::instruction_seems_valid_by_symbols(instruction, modules, symbol_provider).await +} + +pub async fn get_caller_frame

( + ctx: &MipsContext, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + let ctx32 = Mips32Context::try_from(ctx.clone()); + + // .await doesn't like closures, so don't use Option chaining + let mut frame = None; + if frame.is_none() { + match &ctx32 { + Ok(mips32) => frame = get_caller_by_cfi(mips32, args).await, + Err(mips64) => frame = get_caller_by_cfi(mips64, args).await, + } + } + if frame.is_none() { + match &ctx32 { + Ok(mips32) => frame = get_caller_by_scan32(mips32, args).await, + Err(mips64) => frame = get_caller_by_scan64(mips64, args).await, + } + } + let mut frame = frame?; + + // We now check the frame to see if it looks like unwinding is complete, + // based on the frame we computed having a nonsense value. Returning + // None signals to the unwinder to stop unwinding. + + // if the instruction is within the first ~page of memory, it's basically + // null, and we can assume unwinding is complete. + if frame.context.get_instruction_pointer() < 4096 { + trace!("instruction pointer was nullish, assuming unwind complete"); + return None; + } + + // If the new stack pointer is at a lower address than the old, + // then that's clearly incorrect. Treat this as end-of-stack to + // enforce progress and avoid infinite loops. + + let sp = frame.context.get_stack_pointer(); + let last_sp = ctx.get_register_always(STACK_POINTER); + if sp <= last_sp { + // Mips leaf functions may not actually touch the stack (thanks + // to the return address register allowing you to "push" the return address + // to a register), so we need to permit the stack pointer to not + // change for the first frame of the unwind. After that we need + // more strict validation to avoid infinite loops. + let is_leaf = args.callee_frame.trust == FrameTrust::Context && sp == last_sp; + if !is_leaf { + trace!("stack pointer went backwards, assuming unwind complete"); + return None; + } + } + + // Ok, the frame now seems well and truly valid, do final cleanup. + + // The Mips `jal` instruction always sets $ra to PC + 8 + let ip = frame.context.get_instruction_pointer(); + frame.instruction = ip - 8; + + Some(frame) +} + +/// This is a hack to have a different [`CpuContext`] type/impl depending on the +/// context flags of the inner [`MipsContext`] +#[derive(Clone)] +struct Mips32Context(MipsContext); + +impl CpuContext for Mips32Context { + type Register = u32; + + const REGISTERS: &'static [&'static str] = ::REGISTERS; + + fn get_register_always(&self, reg: &str) -> Self::Register { + self.0.get_register_always(reg) as u32 + } + + fn set_register(&mut self, reg: &str, val: Self::Register) -> Option<()> { + self.0.set_register(reg, val.into()) + } + + fn stack_pointer_register_name(&self) -> &'static str { + self.0.stack_pointer_register_name() + } + + fn instruction_pointer_register_name(&self) -> &'static str { + self.0.instruction_pointer_register_name() + } +} + +impl IntoRawContext for Mips32Context { + fn into_ctx(self) -> MinidumpRawContext { + MinidumpRawContext::Mips(self.0) + } +} + +trait IntoRawContext { + fn into_ctx(self) -> MinidumpRawContext; +} + +impl IntoRawContext for MipsContext { + fn into_ctx(self) -> MinidumpRawContext { + MinidumpRawContext::Mips(self) + } +} + +impl TryFrom for Mips32Context { + type Error = MipsContext; + + fn try_from(ctx: MipsContext) -> Result { + if ContextFlagsCpu::from_flags(ctx.context_flags).contains(ContextFlagsCpu::CONTEXT_MIPS64) + { + Err(ctx) + } else { + Ok(Self(ctx)) + } + } +} diff --git a/third_party/rust/minidump-unwind/src/symbols/debuginfo.rs b/third_party/rust/minidump-unwind/src/symbols/debuginfo.rs new file mode 100644 index 000000000000..a8e1462f0a24 --- /dev/null +++ b/third_party/rust/minidump-unwind/src/symbols/debuginfo.rs @@ -0,0 +1,538 @@ +//! This module provides a `SymbolProvider` which uses local binary debuginfo. + +use super::{async_trait, FileError, FileKind, FillSymbolError, FrameSymbolizer, FrameWalker}; +use cachemap2::CacheMap; +use framehop::Unwinder; +use memmap2::Mmap; +use minidump::{MinidumpModuleList, MinidumpSystemInfo, Module}; +use std::cell::UnsafeCell; +use std::fs::File; +use std::path::{Path, PathBuf}; + +/// A symbol provider which gets information from the minidump modules on the local system. +/// +/// Note: this symbol provider will currently only restore the registers necessary for unwinding +/// the given platform. In the future this may be extended to restore all registers. +pub struct DebugInfoSymbolProvider { + unwinder: Box, + symbols: Box, + /// The caches and unwinder operate on the memory held by the mapped modules, so this field + /// must not be dropped until after they are dropped. + _mapped_modules: Box<[Mmap]>, +} + +pub struct DebugInfoSymbolProviderBuilder { + #[cfg(feature = "debuginfo-symbols")] + enable_symbols: bool, +} + +type ModuleData = std::borrow::Cow<'static, [u8]>; +type FHModule = framehop::Module; + +struct UnwinderImpl { + unwinder: U, + unwind_cache: PerThread, +} + +impl Default for UnwinderImpl { + fn default() -> Self { + UnwinderImpl { + unwinder: Default::default(), + unwind_cache: Default::default(), + } + } +} + +impl UnwinderImpl> { + pub fn x86_64() -> Box { + Box::::default() + } +} + +impl UnwinderImpl> { + pub fn aarch64() -> Box { + Box::::default() + } +} + +trait WalkerRegs: Sized { + fn regs_from_walker(walker: &(dyn FrameWalker + Send)) -> Option; + fn update_walker(self, walker: &mut (dyn FrameWalker + Send)) -> Option<()>; +} + +impl WalkerRegs for framehop::x86_64::UnwindRegsX86_64 { + fn regs_from_walker(walker: &(dyn FrameWalker + Send)) -> Option { + let sp = walker.get_callee_register("rsp")?; + let bp = walker.get_callee_register("rbp")?; + let ip = walker.get_callee_register("rip")?; + Some(Self::new(ip, sp, bp)) + } + + fn update_walker(self, walker: &mut (dyn FrameWalker + Send)) -> Option<()> { + walker.set_cfa(self.sp())?; + walker.set_caller_register("rbp", self.bp())?; + Some(()) + } +} + +impl WalkerRegs for framehop::aarch64::UnwindRegsAarch64 { + fn regs_from_walker(walker: &(dyn FrameWalker + Send)) -> Option { + let lr = walker.get_callee_register("lr")?; + let sp = walker.get_callee_register("sp")?; + let fp = walker.get_callee_register("fp")?; + // TODO PtrAuthMask on MacOS? + Some(Self::new(lr, sp, fp)) + } + + fn update_walker(self, walker: &mut (dyn FrameWalker + Send)) -> Option<()> { + walker.set_cfa(self.sp())?; + walker.set_caller_register("lr", self.lr())?; + walker.set_caller_register("fp", self.fp())?; + Some(()) + } +} + +trait UnwinderInterface { + fn add_module(&mut self, module: FHModule); + fn unwind_frame(&self, walker: &mut (dyn FrameWalker + Send)) -> Option<()>; +} + +impl> UnwinderInterface for UnwinderImpl +where + U::UnwindRegs: WalkerRegs, + U::Cache: Default, +{ + fn add_module(&mut self, module: FHModule) { + self.unwinder.add_module(module); + } + + fn unwind_frame(&self, walker: &mut (dyn FrameWalker + Send)) -> Option<()> { + let mut regs = U::UnwindRegs::regs_from_walker(walker)?; + let instruction = walker.get_instruction(); + let result = self.unwind_cache.with(|cache| { + self.unwinder.unwind_frame( + if walker.has_grand_callee() { + framehop::FrameAddress::from_return_address(instruction + 1).unwrap() + } else { + framehop::FrameAddress::from_instruction_pointer(instruction) + }, + &mut regs, + cache, + &mut |addr| walker.get_register_at_address(addr).ok_or(()), + ) + }); + let ra = match result { + Ok(ra) => ra, + Err(e) => { + tracing::error!("failed to unwind frame: {e}"); + return None; + } + }; + if let Some(ra) = ra { + walker.set_ra(ra); + } + regs.update_walker(walker)?; + Some(()) + } +} + +#[async_trait] +trait SymbolInterface { + async fn fill_symbol( + &self, + module: &(dyn Module + Sync), + frame: &mut (dyn FrameSymbolizer + Send), + ) -> Result<(), FillSymbolError>; +} + +/// A SymbolInterface that always returns `Ok(())` without doing anything. +struct NoSymbols; + +#[async_trait] +impl SymbolInterface for NoSymbols { + async fn fill_symbol( + &self, + _module: &(dyn Module + Sync), + _frame: &mut (dyn FrameSymbolizer + Send), + ) -> Result<(), FillSymbolError> { + Ok(()) + } +} + +#[cfg(feature = "debuginfo-symbols")] +mod wholesym_symbol_interface { + use super::*; + use futures_util::lock::Mutex; + use std::collections::HashMap; + use wholesym::{LookupAddress, SymbolManager, SymbolManagerConfig, SymbolMap}; + + pub struct Impl { + /// Indexed by module base address. + symbols: HashMap>, + } + + impl Impl { + pub async fn new(modules: &MinidumpModuleList) -> Self { + let mut symbols = HashMap::new(); + let symbol_manager = SymbolManager::with_config(SymbolManagerConfig::new()); + for module in modules.iter() { + let path = effective_debug_file(module, false); + if let Ok(sm) = symbol_manager + .load_symbol_map_for_binary_at_path(&path, None) + .await + { + symbols.insert(module.into(), Mutex::new(sm)); + } + } + Impl { symbols } + } + } + + #[async_trait] + impl SymbolInterface for Impl { + async fn fill_symbol( + &self, + module: &(dyn Module + Sync), + frame: &mut (dyn FrameSymbolizer + Send), + ) -> Result<(), FillSymbolError> { + let key = ModuleKey::for_module(module); + let symbol_map = self.symbols.get(&key).ok_or(FillSymbolError {})?; + + use std::convert::TryInto; + let addr = match (frame.get_instruction() - module.base_address()).try_into() { + Ok(a) => a, + Err(e) => { + tracing::error!("failed to downcast relative address offset: {e}"); + return Ok(()); + } + }; + + let address_info = symbol_map + .lock() + .await + .lookup(LookupAddress::Relative(addr)) + .await; + + if let Some(address_info) = address_info { + frame.set_function( + &address_info.symbol.name, + module.base_address() + address_info.symbol.address as u64, + 0, + ); + + if let Some(frames) = address_info.frames { + let mut iter = frames.into_iter().rev(); + if let Some(f) = iter.next() { + if let Some(path) = f.file_path { + frame.set_source_file( + path.raw_path(), + f.line_number.unwrap_or(0), + module.base_address() + address_info.symbol.address as u64, + ); + } + } + for f in iter { + frame.add_inline_frame( + f.function.as_deref().unwrap_or(""), + f.file_path.as_ref().map(|p| p.raw_path()), + f.line_number, + ); + } + } + } + Ok(()) + } + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct ModuleKey(u64); + +impl ModuleKey { + /// Create a module key for the given module. + pub fn for_module(module: &dyn Module) -> Self { + ModuleKey(module.base_address()) + } +} + +impl From<&dyn Module> for ModuleKey { + fn from(module: &dyn Module) -> Self { + Self::for_module(module) + } +} + +impl From<&minidump::MinidumpModule> for ModuleKey { + fn from(module: &minidump::MinidumpModule) -> Self { + Self::for_module(module) + } +} + +struct PerThread { + inner: CacheMap>, +} + +impl Default for PerThread { + fn default() -> Self { + PerThread { + inner: Default::default(), + } + } +} + +impl PerThread { + pub fn with(&self, f: F) -> R + where + F: FnOnce(&mut T) -> R, + { + // # Safety + // We guarantee unique access to the mutable reference because the values are indexed by + // thread id: each thread gets its own value which it can freely mutate. We prevent + // multiple mutable aliases from being created by requiring a callback function. + f(unsafe { &mut *self.inner.cache_default(std::thread::current().id()).get() }) + } +} + +mod object_section_info { + use framehop::ModuleSectionInfo; + use object::read::{Object, ObjectSection, ObjectSegment}; + use std::ops::Range; + + #[repr(transparent)] + pub struct ObjectSectionInfo<'a, O>(pub &'a O); + + impl<'a, O> std::ops::Deref for ObjectSectionInfo<'a, O> { + type Target = O; + + fn deref(&self) -> &Self::Target { + self.0 + } + } + + impl<'data: 'file, 'file, O, D> ModuleSectionInfo for ObjectSectionInfo<'file, O> + where + O: Object<'data>, + D: From<&'data [u8]>, + { + fn base_svma(&self) -> u64 { + if let Some(text_segment) = self.segments().find(|s| s.name() == Ok(Some("__TEXT"))) { + // This is a mach-O image. "Relative addresses" are relative to the + // vmaddr of the __TEXT segment. + return text_segment.address(); + } + + // For PE binaries, relative_address_base() returns the image base address. + // Otherwise it returns zero. This gives regular ELF images a base address of zero, + // which is what we want. + self.relative_address_base() + } + + fn section_svma_range(&mut self, name: &[u8]) -> Option> { + let section = self.section_by_name_bytes(name)?; + Some(section.address()..section.address() + section.size()) + } + + fn section_data(&mut self, name: &[u8]) -> Option { + let section = self.section_by_name_bytes(name)?; + section.data().ok().map(|data| data.into()) + } + + fn segment_svma_range(&mut self, name: &[u8]) -> Option> { + let segment = self.segments().find(|s| s.name_bytes() == Ok(Some(name)))?; + Some(segment.address()..segment.address() + segment.size()) + } + + fn segment_data(&mut self, name: &[u8]) -> Option { + let segment = self.segments().find(|s| s.name_bytes() == Ok(Some(name)))?; + segment.data().ok().map(|data| data.into()) + } + } +} + +/// Get the file path with debug information for the given module. +/// +/// If `unwind_info` is true, returns the path that should contain unwind information. +fn effective_debug_file(module: &dyn Module, unwind_info: bool) -> PathBuf { + // Windows x86_64 always stores the unwind info _only_ in the binary. + let ignore_debug_file = unwind_info && cfg!(all(windows, target_arch = "x86_64")); + + let code_file = module.code_file(); + let code_file_path: &Path = code_file.as_ref().as_ref(); + + if !ignore_debug_file { + if let Some(file) = module.debug_file() { + let file_path: &Path = file.as_ref().as_ref(); + // Anchor relative paths in the code file parent. + if file_path.is_relative() { + if let Some(parent) = code_file_path.parent() { + let path = parent.join(file_path); + if path.exists() { + return path; + } + } + } + if file_path.exists() { + return file_path.to_owned(); + } + } + // else fall back to code file below + } + + code_file_path.to_owned() +} + +fn load_unwind_module(module: &dyn Module) -> Option<(Mmap, framehop::Module)> { + let path = effective_debug_file(module, true); + let file = match File::open(&path) { + Ok(file) => file, + Err(e) => { + tracing::warn!("failed to open {} for debug info: {e}", path.display()); + return None; + } + }; + // # Safety + // The file is presumably read-only (being some binary or debug info file). + let mapped = match unsafe { Mmap::map(&file) } { + Ok(m) => m, + Err(e) => { + tracing::error!("failed to map {} for debug info: {e}", path.display()); + return None; + } + }; + + let objfile = match object::read::File::parse( + // # Safety + // We broaden the lifetime to static, but ensure that the Mmap which provides the data + // outlives all references. + unsafe { std::mem::transmute::<&[u8], &'static [u8]>(mapped.as_ref()) }, + ) { + Ok(o) => o, + Err(e) => { + tracing::error!("failed to parse object file {}: {e}", path.display()); + return None; + } + }; + + let base = module.base_address(); + let end = base + module.size(); + let fhmodule = framehop::Module::new( + path.display().to_string(), + base..end, + base, + object_section_info::ObjectSectionInfo(&objfile), + ); + + Some((mapped, fhmodule)) +} + +impl Default for DebugInfoSymbolProviderBuilder { + fn default() -> Self { + DebugInfoSymbolProviderBuilder { + #[cfg(feature = "debuginfo-symbols")] + enable_symbols: true, + } + } +} + +impl DebugInfoSymbolProviderBuilder { + /// Create a new builder. + /// + /// This returns the default builder. + pub fn new() -> Self { + Self::default() + } + + /// Enable or disable symbolication. + /// + /// This saves processing time if desired, only doing unwinding if symbols are disabled. This + /// option is only available when the `wholesym` feature (usually through the `debuginfo` + /// feature) is enabled, and defaults to `true`. + #[cfg(feature = "debuginfo-symbols")] + pub fn symbols(mut self, enable: bool) -> Self { + self.enable_symbols = enable; + self + } + + /// Create the DebugInfoSymbolProvider. + pub async fn build( + self, + system_info: &MinidumpSystemInfo, + modules: &MinidumpModuleList, + ) -> DebugInfoSymbolProvider { + let mut mapped_modules = Vec::new(); + use minidump::system_info::Cpu; + let mut unwinder = match system_info.cpu { + Cpu::X86_64 => UnwinderImpl::x86_64(), + Cpu::Arm64 => UnwinderImpl::aarch64(), + _ => unimplemented!(), + }; + + #[cfg(not(feature = "debuginfo-symbols"))] + let symbols: Box = Box::new(NoSymbols); + + #[cfg(feature = "debuginfo-symbols")] + let symbols: Box = if self.enable_symbols { + Box::new(wholesym_symbol_interface::Impl::new(modules).await) + } else { + Box::new(NoSymbols) + }; + + for module in modules.iter() { + if let Some((mapped, fhmodule)) = load_unwind_module(module) { + mapped_modules.push(mapped); + unwinder.add_module(fhmodule); + } + } + DebugInfoSymbolProvider { + unwinder, + symbols, + _mapped_modules: mapped_modules.into(), + } + } +} + +impl DebugInfoSymbolProvider { + /// Create a builder for the DebugInfoSymbolProvider. + pub fn builder() -> DebugInfoSymbolProviderBuilder { + Default::default() + } + + /// Create a new DebugInfoSymbolProvider with the default builder settings. + pub async fn new(system_info: &MinidumpSystemInfo, modules: &MinidumpModuleList) -> Self { + Self::builder().build(system_info, modules).await + } +} + +#[async_trait] +impl super::SymbolProvider for DebugInfoSymbolProvider { + async fn fill_symbol( + &self, + module: &(dyn Module + Sync), + frame: &mut (dyn FrameSymbolizer + Send), + ) -> Result<(), FillSymbolError> { + self.symbols.fill_symbol(module, frame).await + } + + async fn walk_frame( + &self, + _module: &(dyn Module + Sync), + walker: &mut (dyn FrameWalker + Send), + ) -> Option<()> { + self.unwinder.unwind_frame(walker) + } + + async fn get_file_path( + &self, + module: &(dyn Module + Sync), + file_kind: FileKind, + ) -> Result { + let path = match file_kind { + FileKind::BreakpadSym => None, + FileKind::Binary => Some(PathBuf::from(module.code_file().as_ref())), + FileKind::ExtraDebugInfo => module.debug_file().map(|p| PathBuf::from(p.as_ref())), + }; + match path { + Some(path) if path.exists() => Ok(path), + _ => Err(FileError::NotFound), + } + } +} diff --git a/third_party/rust/minidump-unwind/src/symbols/mod.rs b/third_party/rust/minidump-unwind/src/symbols/mod.rs new file mode 100644 index 000000000000..9f909d37e859 --- /dev/null +++ b/third_party/rust/minidump-unwind/src/symbols/mod.rs @@ -0,0 +1,348 @@ +//! This module defines the interface used by minidump-unwind to symbolize stack traces. +//! +//! minidump-unwind uses a series of traits to represent symbolizing functionality and interfaces: +//! +//! * [SymbolProvider][] - provides symbolication, cfi evaluation, and debug statistics +//! * Implemented by [Symbolizer][] and [debuginfo::DebugInfoSymbolProvider][] (requires the +//! `debuginfo` feature to be enabled). +//! +//! * [SymbolSupplier][] - maps a [Module][] to a [SymbolFile][] +//! * minidump-unwind does not directly use this, it's just there so the Symbolizer can +//! generically handle different symbol fetching strategies. +//! +//! * [FrameSymbolizer][] - callbacks that symbolication uses to return its results. +//! * Implemented by [StackFrame][crate::StackFrame] +//! * Implemented by DummyFrame (private, for a stack scanning heuristic) +//! * [FrameWalker][] - callbacks that cfi eval uses to read callee state and write caller state. +//! * Implemented by CfiStackWalker (private) +//! +//! +//! The following concrete functions are provided to allow configuration of the symbol fetching +//! strategy: +//! +//! * [http_symbol_supplier][] - a [SymbolSupplier][] that can find symbols over HTTP (and cache). +//! Requires the `http` feature to be enabled. +//! * [simple_symbol_supplier][] - a [SymbolSupplier][] that can find symbols on disk. +//! * [string_symbol_supplier][] - a mock [SymbolSupplier][] for tests. +//! +//! +//! The following concrete types are provided: +//! +//! * [Symbolizer][] - the main interface of the symbolizer, implementing [SymbolProvider][]. +//! * Wraps the [SymbolSupplier][] implementation that is selected. +//! * Queries the [SymbolSupplier] and manages the SymbolFiles however it pleases. +//! * [SymbolStats][] - debug statistic output. +//! * [SymbolFile][] - part of [LocateSymbolsResult][] that a [SymbolProvider][] returns to the +//! Symbolizer. +//! * Never handled by minidump-unwind, public for the trait. +//! * [SymbolError][] - possible errors a [SymbolProvider][] can yield. +//! * Never handled by minidump-unwind, public for the trait. +//! * [FillSymbolError][] - possible errors for `fill_symbol`. +//! * While this *is* handled by minidump-unwind, it doesn't actually look at the value. It's +//! just there to be an Error type for the sake of API design. +//! * [LocateSymbolsResult][] - a payload that a [SymbolProvider][] returns to the Symbolizer. +//! * Never handled by minidump-unwind, public for the trait. +//! * [DebugInfoResult][] - part of [LocateSymbolsResult][] that a [SymbolProvider][] returns to +//! the Symbolizer. +//! * Never handled by minidump-unwind, public for the trait. + +use std::collections::HashMap; +use std::path::PathBuf; + +use async_trait::async_trait; +use minidump::Module; + +pub use breakpad_symbols::{ + DebugInfoResult, FileError, FileKind, FillSymbolError, FrameSymbolizer, FrameWalker, + LocateSymbolsResult, PendingSymbolStats, SymbolError, SymbolFile, SymbolStats, SymbolSupplier, + Symbolizer, +}; + +#[cfg(feature = "debuginfo-unwind")] +pub mod debuginfo; + +/// The [`SymbolProvider`] is the main extension point for minidump processing. +/// +/// It is primarily used by the `process_minidump` function to do stack +/// unwinding via CFI (call frame information) of a [`Module`] using the +/// `walk_frame` function. +/// +/// The `fill_symbol` function is responsible for filling in the source location +/// (function, file, line triple) corresponding to an instruction address, as +/// well as a dual purpose of informing the stack scanning heuristic whether a +/// given instruction address might be valid inside of a [`Module`]. +/// +/// All the asynchronous trait methods can be called concurrently and need to +/// handle synchronization and request coalescing (based on the [`Module`]). +#[async_trait] +pub trait SymbolProvider { + /// Fill symbol information in [`FrameSymbolizer`] using the instruction + /// address from `frame`, and the module information from [`Module`]. + /// + /// An Error indicates that no symbols could be found for the relevant + /// module. + /// + /// This is used for filling in the resulting source location of the + /// frame as a (function, file, line) triple, as well as providing the + /// `parameter_size` which is used during CFI evaluation and stack walking. + /// + /// This function also serves a dual purpose in informing the stack scanning + /// heuristic whether a potential instruction address points to a valid + /// function or not. + async fn fill_symbol( + &self, + module: &(dyn Module + Sync), + frame: &mut (dyn FrameSymbolizer + Send), + ) -> Result<(), FillSymbolError>; + + /// Tries to use CFI to walk the stack frame of the [`FrameWalker`] + /// using the symbols of the given [`Module`]. + /// + /// Output should be written using the [`FrameWalker`]'s `set_caller_*` APIs. + async fn walk_frame( + &self, + module: &(dyn Module + Sync), + walker: &mut (dyn FrameWalker + Send), + ) -> Option<()>; + + /// Gets the path to the binary code file for a given module (or an Error). + /// + /// This might be used later on to inspect the assembly instructions of + /// a module. + async fn get_file_path( + &self, + module: &(dyn Module + Sync), + file_kind: FileKind, + ) -> Result; + + /// Collect various statistics on the symbols. + /// + /// Keys are implementation dependent. + /// For example the file name of the module (code_file's file name). + /// + /// This is only really intended to be queried after processing an + /// entire minidump, and may have non-trivial overhead to compute. + /// It's als possible we'd want it to also be able to contain stats + /// that don't really make sense in intermediate states. + /// + /// In a world where you might want to have one SymbolSupplier shared + /// by multiple instances of `process` running in parallel, it's unclear + /// if this is the right abstraction. Perhaps we should have some kind + /// of "session" abstraction so you can get stats about each individual + /// processing task? Of course all pooling/caching between the tasks + /// muddies things too. + fn stats(&self) -> HashMap { + HashMap::new() + } + + /// Collect various pending statistics on the symbols. + /// + /// This is intended to be queried during processing to give some + /// interactive feedback to the user, and so is fine to poll as + /// much as you want, whenever you want. + fn pending_stats(&self) -> PendingSymbolStats { + PendingSymbolStats::default() + } +} + +#[async_trait] +impl SymbolProvider for &(dyn SymbolProvider + Sync) { + async fn fill_symbol( + &self, + module: &(dyn Module + Sync), + frame: &mut (dyn FrameSymbolizer + Send), + ) -> Result<(), FillSymbolError> { + (*self).fill_symbol(module, frame).await + } + + async fn walk_frame( + &self, + module: &(dyn Module + Sync), + walker: &mut (dyn FrameWalker + Send), + ) -> Option<()> { + (*self).walk_frame(module, walker).await + } + + async fn get_file_path( + &self, + module: &(dyn Module + Sync), + file_kind: FileKind, + ) -> Result { + (*self).get_file_path(module, file_kind).await + } + + fn stats(&self) -> HashMap { + (*self).stats() + } + + fn pending_stats(&self) -> PendingSymbolStats { + (*self).pending_stats() + } +} + +#[derive(Default)] +pub struct MultiSymbolProvider { + providers: Vec>, +} + +impl MultiSymbolProvider { + pub fn new() -> MultiSymbolProvider { + Default::default() + } + + pub fn add(&mut self, provider: Box) { + self.providers.push(provider); + } +} + +#[async_trait] +impl SymbolProvider for MultiSymbolProvider { + async fn fill_symbol( + &self, + module: &(dyn Module + Sync), + frame: &mut (dyn FrameSymbolizer + Send), + ) -> Result<(), FillSymbolError> { + // Return Ok if *any* symbol provider came back with Ok, so that the user can + // distinguish between having no symbols at all and just not being able to + // symbolize this particular frame. + let mut best_result = Err(FillSymbolError {}); + for p in self.providers.iter() { + let new_result = p.fill_symbol(module, frame).await; + best_result = best_result.or(new_result); + } + best_result + } + + async fn walk_frame( + &self, + module: &(dyn Module + Sync), + walker: &mut (dyn FrameWalker + Send), + ) -> Option<()> { + for p in self.providers.iter() { + let result = p.walk_frame(module, walker).await; + if result.is_some() { + return result; + } + } + None + } + + async fn get_file_path( + &self, + module: &(dyn Module + Sync), + file_kind: FileKind, + ) -> Result { + // Return Ok if *any* symbol provider came back with Ok + let mut best_result = Err(FileError::NotFound); + for p in self.providers.iter() { + let new_result = p.get_file_path(module, file_kind).await; + best_result = best_result.or(new_result); + } + best_result + } + + fn stats(&self) -> HashMap { + let mut result = HashMap::new(); + for p in self.providers.iter() { + // FIXME: do more intelligent merging of the stats + // (currently doesn't matter as only one provider reports non-empty stats). + result.extend(p.stats()); + } + result + } + + fn pending_stats(&self) -> PendingSymbolStats { + let mut result = PendingSymbolStats::default(); + for p in self.providers.iter() { + // FIXME: do more intelligent merging of the stats + // (currently doesn't matter as only one provider reports non-empty stats). + result = p.pending_stats(); + } + result + } +} + +#[async_trait] +impl SymbolProvider for Symbolizer { + async fn fill_symbol( + &self, + module: &(dyn Module + Sync), + frame: &mut (dyn FrameSymbolizer + Send), + ) -> Result<(), FillSymbolError> { + self.fill_symbol(module, frame).await + } + async fn walk_frame( + &self, + module: &(dyn Module + Sync), + walker: &mut (dyn FrameWalker + Send), + ) -> Option<()> { + self.walk_frame(module, walker).await + } + async fn get_file_path( + &self, + module: &(dyn Module + Sync), + file_kind: FileKind, + ) -> Result { + self.get_file_path(module, file_kind).await + } + fn stats(&self) -> HashMap { + self.stats() + } + fn pending_stats(&self) -> PendingSymbolStats { + self.pending_stats() + } +} + +/// Gets a SymbolSupplier that looks up symbols by path or with urls. +/// +/// * `symbols_paths` is a list of paths to check for symbol files. Paths +/// are searched in order until one returns a payload. If none do, then +/// urls are used. +/// +/// * `symbols_urls` is a list of "base urls" that should all point to Tecken +/// servers. urls are queried in order until one returns a payload. If none +/// do, then it's an error. +/// +/// * `symbols_cache` is a directory where an on-disk cache should be located. +/// This should be assumed to be a "temp" directory that another process +/// you don't control is garbage-collecting old files from (to provide an LRU cache). +/// The cache is queried before paths and urls (otherwise it wouldn't be much of a cache). +/// +/// * `symbols_tmp` is a directory where symbol files should be downloaded to +/// before atomically swapping them into the cache. Has the same "temp" +/// assumptions as symbols_cache. +/// +/// * `timeout` a maximum time limit for a symbol file download. This +/// is primarily defined to avoid getting stuck on buggy infinite downloads. +/// As of this writing, minidump-stackwalk defaults this to 1000 seconds. In +/// the event of a timeout, the supplier may still try to parse the truncated +/// download. +#[cfg(feature = "http")] +pub fn http_symbol_supplier( + symbol_paths: Vec, + symbol_urls: Vec, + symbols_cache: PathBuf, + symbols_tmp: PathBuf, + timeout: std::time::Duration, +) -> impl SymbolSupplier { + breakpad_symbols::HttpSymbolSupplier::new( + symbol_urls, + symbols_cache, + symbols_tmp, + symbol_paths, + timeout, + ) +} + +/// Gets a SymbolSupplier that looks up symbols by path. +/// +/// Paths are queried in order until one returns a payload. +pub fn simple_symbol_supplier(symbol_paths: Vec) -> impl SymbolSupplier { + breakpad_symbols::SimpleSymbolSupplier::new(symbol_paths) +} + +/// Gets a mock SymbolSupplier that just maps module names +/// to a string containing an entire breakpad .sym file, for tests. +pub fn string_symbol_supplier(modules: HashMap) -> impl SymbolSupplier { + breakpad_symbols::StringSymbolSupplier::new(modules) +} diff --git a/third_party/rust/minidump-unwind/src/system_info.rs b/third_party/rust/minidump-unwind/src/system_info.rs new file mode 100644 index 000000000000..6966d4e5f288 --- /dev/null +++ b/third_party/rust/minidump-unwind/src/system_info.rs @@ -0,0 +1,46 @@ +use std::borrow::Cow; + +use minidump::system_info::{Cpu, Os}; + +/// Information about the system that produced a `Minidump`. +#[derive(Debug, Clone)] +pub struct SystemInfo { + /// The operating system that produced the minidump + pub os: Os, + /// A string identifying the version of the operating system. + /// + /// This may look like "5.1.2600" or "10.4.8", if present + pub os_version: Option, + /// A string identifying the exact build of the operating system. + /// + /// This may look like "Service Pack 2" or "8L2127", if present. On Windows, + /// this is the CSD version, on Linux, extended build information and macOS, + /// the product build version. + pub os_build: Option, + /// The CPU on which the dump was produced + pub cpu: Cpu, + /// A string further identifying the specific CPU + /// + /// For example, "GenuineIntel level 6 model 13 stepping 8", if present. + pub cpu_info: Option, + /// The microcode version of the cpu + pub cpu_microcode_version: Option, + /// The number of processors in the system + /// + /// Will be greater than one for multi-core systems. + pub cpu_count: usize, +} + +impl SystemInfo { + /// Returns the full available operating system version. + /// + /// Returns the version and the build, if available, otherwise just the version. + pub fn format_os_version(&self) -> Option> { + match (&self.os_version, &self.os_build) { + (Some(v), Some(b)) => Some(format!("{v} {b}").into()), + (Some(v), None) => Some(Cow::Borrowed(v)), + (None, Some(b)) => Some(Cow::Borrowed(b)), + (None, None) => None, + } + } +} diff --git a/third_party/rust/minidump-unwind/src/x86.rs b/third_party/rust/minidump-unwind/src/x86.rs new file mode 100644 index 000000000000..6a977509bab7 --- /dev/null +++ b/third_party/rust/minidump-unwind/src/x86.rs @@ -0,0 +1,409 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +// Note since x86 and Amd64 have basically the same ABI, this implementation +// is written to largely erase the details of the two wherever possible, +// so that it can be copied between the two with minimal changes. It's not +// worth the effort to *actually* unify the implementations. + +use super::impl_prelude::*; +use minidump::format::CONTEXT_X86; +use minidump::{MinidumpContext, MinidumpContextValidity, MinidumpModuleList, MinidumpRawContext}; +use std::collections::HashSet; +use tracing::trace; + +type Pointer = u32; +const POINTER_WIDTH: Pointer = 4; +const INSTRUCTION_REGISTER: &str = "eip"; +const STACK_POINTER_REGISTER: &str = "esp"; +const FRAME_POINTER_REGISTER: &str = "ebp"; +const CALLEE_SAVED_REGS: &[&str] = &["ebp", "ebx", "edi", "esi"]; + +async fn get_caller_by_cfi

( + ctx: &CONTEXT_X86, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + trace!("trying cfi"); + + if let MinidumpContextValidity::Some(ref which) = args.valid() { + if !which.contains(STACK_POINTER_REGISTER) { + return None; + } + } + + let mut stack_walker = CfiStackWalker::from_ctx_and_args(ctx, args, callee_forwarded_regs)?; + + args.symbol_provider + .walk_frame(stack_walker.module, &mut stack_walker) + .await?; + let caller_ip = stack_walker.caller_ctx.eip; + let caller_sp = stack_walker.caller_ctx.esp; + + trace!( + "cfi evaluation was successful -- caller_ip: 0x{:08x}, caller_sp: 0x{:08x}", + caller_ip, + caller_sp, + ); + + // Do absolutely NO validation! Yep! As long as CFI evaluation succeeds + // (which does include ip and sp resolving), just blindly assume the + // values are correct. I Don't Like This, but it's what breakpad does and + // we should start with a baseline of parity. + + // FIXME?: breakpad is actually a little weary of the output of STACK WIN + // cfi, and does check that instruction_seems_valid() for eip. However, + // it doesn't immediately discard the results. It tentatively tries to + // scan, and then if that doesn't return anything compelling, it just goes + // forward with whatever STACK WIN came up with. + // + // The current layering of this code means that we don't actually know what + // kind of cfi was used here, and the code that *does* can't do scanning. + // For now let's just trust the results unconditionally. We can do something + // more hacky/robust if we find a compelling need to. + // + // It also has some weird scanning to try to adjust the computed bp? + + trace!("cfi result seems valid"); + + let context = MinidumpContext { + raw: MinidumpRawContext::X86(stack_walker.caller_ctx), + valid: MinidumpContextValidity::Some(stack_walker.caller_validity), + }; + Some(StackFrame::from_context(context, FrameTrust::CallFrameInfo)) +} + +fn callee_forwarded_regs(valid: &MinidumpContextValidity) -> HashSet<&'static str> { + match valid { + MinidumpContextValidity::All => CALLEE_SAVED_REGS.iter().copied().collect(), + MinidumpContextValidity::Some(ref which) => CALLEE_SAVED_REGS + .iter() + .filter(|®| which.contains(reg)) + .copied() + .collect(), + } +} + +fn get_caller_by_frame_pointer

( + ctx: &CONTEXT_X86, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + trace!("trying frame pointer"); + if let MinidumpContextValidity::Some(ref which) = args.valid() { + if !which.contains(FRAME_POINTER_REGISTER) { + return None; + } + } + + let last_bp = ctx.ebp; + // Assume that the standard %bp-using x86 calling convention is in + // use. + // + // The typical x86 calling convention, when frame pointers are present, + // is for the calling procedure to use CALL, which pushes the return + // address onto the stack and sets the instruction pointer (%ip) to + // the entry point of the called routine. The called routine then + // PUSHes the calling routine's frame pointer (%bp) onto the stack + // before copying the stack pointer (%sp) to the frame pointer (%bp). + // Therefore, the calling procedure's frame pointer is always available + // by dereferencing the called procedure's frame pointer, and the return + // address is always available at the memory location immediately above + // the address pointed to by the called procedure's frame pointer. The + // calling procedure's stack pointer (%sp) is 2 pointers higher than the + // value of the called procedure's frame pointer at the time the calling + // procedure made the CALL: 1 pointer for the return address pushed by the + // CALL itself, and 1 pointer for the callee's PUSH of the caller's frame + // pointer. + // + // %ip_new = *(%bp_old + ptr) + // %bp_new = *(%bp_old) + // %sp_new = %bp_old + ptr*2 + + if last_bp >= u32::MAX - POINTER_WIDTH * 2 { + // Although this code generally works fine if the pointer math overflows, + // debug builds will still panic, and this guard protects against it without + // drowning the rest of the code in checked_add. + return None; + } + let caller_ip = args + .stack_memory + .get_memory_at_address(last_bp as u64 + POINTER_WIDTH as u64)?; + let caller_bp = args.stack_memory.get_memory_at_address(last_bp as u64)?; + let caller_sp = last_bp + POINTER_WIDTH * 2; + + // NOTE: minor divergence from x64 impl here: doing extra validation on the + // value of `caller_sp` and `caller_bp` here encourages the stack scanner + // to kick in and start outputting extra frames for `/testdata/test.dmp`. + // Since breakpad also doesn't output those frames, let's assume that's + // desirable. + + trace!( + "frame pointer seems valid -- caller_ip: 0x{:08x}, caller_sp: 0x{:08x}", + caller_ip, + caller_sp, + ); + + let caller_ctx = CONTEXT_X86 { + eip: caller_ip, + esp: caller_sp, + ebp: caller_bp, + ..CONTEXT_X86::default() + }; + let mut valid = HashSet::new(); + valid.insert(INSTRUCTION_REGISTER); + valid.insert(STACK_POINTER_REGISTER); + valid.insert(FRAME_POINTER_REGISTER); + let context = MinidumpContext { + raw: MinidumpRawContext::X86(caller_ctx), + valid: MinidumpContextValidity::Some(valid), + }; + Some(StackFrame::from_context(context, FrameTrust::FramePointer)) +} + +async fn get_caller_by_scan

( + ctx: &CONTEXT_X86, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + trace!("trying scan"); + // Stack scanning is just walking from the end of the frame until we encounter + // a value on the stack that looks like a pointer into some code (it's an address + // in a range covered by one of our modules). If we find such an instruction, + // we assume it's an ip value that was pushed by the CALL instruction that created + // the current frame. The next frame is then assumed to end just before that + // ip value. + let last_bp = match args.valid() { + MinidumpContextValidity::All => Some(ctx.ebp), + MinidumpContextValidity::Some(ref which) => { + if !which.contains(STACK_POINTER_REGISTER) { + trace!("cannot scan without stack pointer"); + return None; + } + if which.contains(FRAME_POINTER_REGISTER) { + Some(ctx.ebp) + } else { + None + } + } + }; + let last_sp = ctx.esp; + + // Number of pointer-sized values to scan through in our search. + let default_scan_range = 40; + let extended_scan_range = default_scan_range * 4; + + // Breakpad devs found that the first frame of an unwind can be really messed up, + // and therefore benefits from a longer scan. Let's do it too. + let scan_range = if let FrameTrust::Context = args.callee_frame.trust { + extended_scan_range + } else { + default_scan_range + }; + + for i in 0..scan_range { + let address_of_ip = last_sp.checked_add(i * POINTER_WIDTH)?; + let caller_ip = args + .stack_memory + .get_memory_at_address(address_of_ip as u64)?; + if instruction_seems_valid(caller_ip, args.modules, args.symbol_provider).await { + // ip is pushed by CALL, so sp is just address_of_ip + ptr + let caller_sp = address_of_ip.checked_add(POINTER_WIDTH)?; + + // Try to restore bp as well. This can be possible in two cases: + // + // 1. This function has the standard prologue that pushes bp and + // sets bp = sp. If this is the case, then the current bp should be + // immediately after (before in memory) address_of_ip. + // + // 2. This function does not use bp, and has just preserved it + // from the caller. If this is the case, bp should be before + // (after in memory) address_of_ip. + // + // We then try our best to eliminate bogus-looking bp's with some + // simple heuristics like "is a valid stack address". + let mut caller_bp = None; + + // Max reasonable size for a single x86 frame is 128 KB. This value is used in + // a heuristic for recovering of the EBP chain after a scan for return address. + // This value is based on a stack frame size histogram built for a set of + // popular third party libraries which suggests that 99.5% of all frames are + // smaller than 128 KB. + const MAX_REASONABLE_GAP_BETWEEN_FRAMES: Pointer = 128 * 1024; + + // If we're on the first iteration of the scan, there can't possibly be a frame pointer, + // because the entire stack frame is taken up by the return pointer. And if we're + // not on the first iteration, then the last iteration already loaded the location + // we expect the frame pointer to be in, so we can unconditionally load it here. + if i > 0 { + let address_of_bp = address_of_ip - POINTER_WIDTH; + let bp = args + .stack_memory + .get_memory_at_address(address_of_bp as u64)?; + + if bp > address_of_ip && bp - address_of_bp <= MAX_REASONABLE_GAP_BETWEEN_FRAMES { + // Sanity check that resulting bp is still inside stack memory. + if args + .stack_memory + .get_memory_at_address::(bp as u64) + .is_some() + { + caller_bp = Some(bp); + } + } else if let Some(last_bp) = last_bp { + if last_bp >= caller_sp { + // Sanity check that resulting bp is still inside stack memory. + if args + .stack_memory + .get_memory_at_address::(last_bp as u64) + .is_some() + { + caller_bp = Some(last_bp); + } + } + } + } + + trace!( + "scan seems valid -- caller_ip: 0x{:08x}, caller_sp: 0x{:08x}", + caller_ip, + caller_sp, + ); + + let caller_ctx = CONTEXT_X86 { + eip: caller_ip, + esp: caller_sp, + ebp: caller_bp.unwrap_or(0), + ..CONTEXT_X86::default() + }; + let mut valid = HashSet::new(); + valid.insert(INSTRUCTION_REGISTER); + valid.insert(STACK_POINTER_REGISTER); + if caller_bp.is_some() { + valid.insert(FRAME_POINTER_REGISTER); + } + let context = MinidumpContext { + raw: MinidumpRawContext::X86(caller_ctx), + valid: MinidumpContextValidity::Some(valid), + }; + return Some(StackFrame::from_context(context, FrameTrust::Scan)); + } + } + + None +} + +/// The most strict validation we have for instruction pointers. +/// +/// This is only used for stack-scanning, because it's explicitly +/// trying to distinguish between total garbage and correct values. +/// cfi and frame_pointer approaches do not use this validation +/// because by default they're working with plausible/trustworthy +/// data. +/// +/// Specifically, not using this validation allows cfi/fp methods +/// to unwind through frames we don't have mapped modules for (such as +/// OS APIs). This may seem confusing since we obviously don't have cfi +/// for unmapped modules! +/// +/// The way this works is that we will use cfi to unwind some frame we +/// know about and *end up* in a function we know nothing about, but with +/// all the right register values. At this point, frame pointers will +/// often do the correct thing even though we don't know what code we're +/// in -- until we get back into code we do know about and cfi kicks back in. +/// At worst, this sets scanning up in a better position for success! +/// +/// If we applied this more rigorous validation to cfi/fp methods, we +/// would just discard the correct register values from the known frame +/// and immediately start doing unreliable scans. +async fn instruction_seems_valid

( + instruction: Pointer, + modules: &MinidumpModuleList, + symbol_provider: &P, +) -> bool +where + P: SymbolProvider + Sync, +{ + if instruction == 0 { + return false; + } + + super::instruction_seems_valid_by_symbols(instruction as u64, modules, symbol_provider).await +} + +/* +// x86 is currently hyper-permissive, so we don't use this, +// but here it is in case we change our minds! +fn stack_seems_valid( + caller_sp: Pointer, + callee_sp: Pointer, + stack_memory: UnifiedMemory<'_, '_>, +) -> bool { + // The stack shouldn't *grow* when we unwind + if caller_sp <= callee_sp { + return false; + } + + // The stack pointer should be in the stack + stack_memory + .get_memory_at_address::(caller_sp as u64) + .is_some() +} +*/ + +pub async fn get_caller_frame

( + ctx: &CONTEXT_X86, + args: &GetCallerFrameArgs<'_, P>, +) -> Option +where + P: SymbolProvider + Sync, +{ + // .await doesn't like closures, so don't use Option chaining + let mut frame = None; + if frame.is_none() { + frame = get_caller_by_cfi(ctx, args).await; + } + if frame.is_none() { + frame = get_caller_by_frame_pointer(ctx, args); + } + if frame.is_none() { + frame = get_caller_by_scan(ctx, args).await; + } + let mut frame = frame?; + + // We now check the frame to see if it looks like unwinding is complete, + // based on the frame we computed having a nonsense value. Returning + // None signals to the unwinder to stop unwinding. + + // if the instruction is within the first ~page of memory, it's basically + // null, and we can assume unwinding is complete. + if frame.context.get_instruction_pointer() < 4096 { + trace!("instruction pointer was nullish, assuming unwind complete"); + return None; + } + // If the new stack pointer is at a lower address than the old, + // then that's clearly incorrect. Treat this as end-of-stack to + // enforce progress and avoid infinite loops. + if frame.context.get_stack_pointer() <= ctx.esp as u64 { + trace!("stack pointer went backwards, assuming unwind complete"); + return None; + } + + // Ok, the frame now seems well and truly valid, do final cleanup. + + // A caller's ip is the return address, which is the instruction + // *after* the CALL that caused us to arrive at the callee. Set + // the value to one less than that, so it points within the + // CALL instruction. This is important because we use this value + // to lookup the CFI we need to unwind the next frame. + let ip = frame.context.get_instruction_pointer(); + frame.instruction = ip - 1; + + Some(frame) +} diff --git a/third_party/rust/minidump-unwind/src/x86_unittest.rs b/third_party/rust/minidump-unwind/src/x86_unittest.rs new file mode 100644 index 000000000000..163d5027f22e --- /dev/null +++ b/third_party/rust/minidump-unwind/src/x86_unittest.rs @@ -0,0 +1,975 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +use crate::*; +use minidump::format::CONTEXT_X86; +use minidump::system_info::{Cpu, Os}; +use std::collections::HashMap; +use test_assembler::*; + +struct TestFixture { + pub raw: CONTEXT_X86, + pub modules: MinidumpModuleList, + pub symbols: HashMap, +} + +impl TestFixture { + pub fn new() -> TestFixture { + TestFixture { + raw: CONTEXT_X86::default(), + // Give the two modules reasonable standard locations and names + // for tests to play with. + modules: MinidumpModuleList::from_modules(vec![ + MinidumpModule::new(0x40000000, 0x10000, "module1"), + MinidumpModule::new(0x50000000, 0x10000, "module2"), + ]), + symbols: HashMap::new(), + } + } + + pub async fn walk_stack(&self, stack: Section) -> CallStack { + let context = MinidumpContext { + raw: MinidumpRawContext::X86(self.raw.clone()), + valid: MinidumpContextValidity::All, + }; + let base = stack.start().value().unwrap(); + let size = stack.size(); + let stack = stack.get_contents().unwrap(); + let stack_memory = MinidumpMemory { + desc: Default::default(), + base_address: base, + size, + bytes: &stack, + endian: scroll::LE, + }; + let system_info = SystemInfo { + os: Os::Windows, + os_version: None, + os_build: None, + cpu: Cpu::X86, + cpu_info: None, + cpu_microcode_version: None, + cpu_count: 1, + }; + let symbolizer = Symbolizer::new(string_symbol_supplier(self.symbols.clone())); + let mut stack = CallStack::with_context(context); + + walk_stack( + 0, + (), + &mut stack, + Some(UnifiedMemory::Memory(&stack_memory)), + &self.modules, + &system_info, + &symbolizer, + ) + .await; + + stack + } + + pub fn add_symbols(&mut self, name: String, symbols: String) { + self.symbols.insert(name, symbols); + } +} + +#[tokio::test] +async fn test_simple() { + let mut f = TestFixture::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + stack = stack.D32(0).D32(0); // end-of-stack marker + f.raw.eip = 0x40000200; + f.raw.ebp = 0x80000000; + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 1); + let f = &s.frames[0]; + let m = f.module.as_ref().unwrap(); + assert_eq!(m.code_file(), "module1"); +} + +// Walk a traditional frame. A traditional frame saves the caller's +// %ebp just below the return address, and has its own %ebp pointing +// at the saved %ebp. +#[tokio::test] +async fn test_traditional() { + let mut f = TestFixture::new(); + let frame0_ebp = Label::new(); + let frame1_ebp = Label::new(); + let mut stack = Section::new(); + stack.start().set_const(0x80000000); + stack = stack + .append_repeated(12, 0) // frame 0: space + .mark(&frame0_ebp) // frame 0 %ebp points here + .D32(&frame1_ebp) // frame 0: saved %ebp + .D32(0x40008679) // frame 0: resume address + .append_repeated(8, 0) // frame 1: space + .mark(&frame1_ebp) // frame 1 %ebp points here + .D32(0) // frame 1: saved %ebp (stack end) + .D32(0); // frame 1: return address (stack end) + f.raw.eip = 0x4000c7a5; + f.raw.esp = stack.start().value().unwrap() as u32; + f.raw.ebp = frame0_ebp.value().unwrap() as u32; + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + { + let f0 = &s.frames[0]; + assert_eq!(f0.trust, FrameTrust::Context); + assert_eq!(f0.context.valid, MinidumpContextValidity::All); + assert_eq!(f0.instruction, 0x4000c7a5); + assert_eq!(f0.resume_address, 0x4000c7a5); + // eip + // ebp + } + { + let f1 = &s.frames[1]; + assert_eq!(f1.trust, FrameTrust::FramePointer); + // ContextValidity + assert_eq!(f1.instruction, 0x40008678); + assert_eq!(f1.resume_address, 0x40008679); + // eip + // ebp + } +} + +// Walk a traditional frame, but use a bogus %ebp value, forcing a scan +// of the stack for something that looks like a return address. +#[tokio::test] +async fn test_traditional_scan() { + let mut f = TestFixture::new(); + let frame1_esp = Label::new(); + let frame1_ebp = Label::new(); + let mut stack = Section::new(); + let stack_start = 0x80000000; + stack.start().set_const(stack_start); + stack = stack + // frame 0 + .D32(0xf065dc76) // locals area: + .D32(0x46ee2167) // garbage that doesn't look like + .D32(0xbab023ec) // a return address + .D32(&frame1_ebp) // saved %ebp (%ebp fails to point here, forcing scan) + .D32(0x4000129d) // return address + // frame 1 + .mark(&frame1_esp) + .append_repeated(8, 0) // space + .mark(&frame1_ebp) // %ebp points here + .D32(0) // saved %ebp (stack end) + .D32(0); // return address (stack end) + + f.raw.eip = 0x4000f49d; + f.raw.esp = stack.start().value().unwrap() as u32; + // Make the frame pointer bogus, to make the stackwalker scan the stack + // for something that looks like a return address. + f.raw.ebp = 0xd43eed6e; + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // To avoid reusing locals by mistake + let f0 = &s.frames[0]; + assert_eq!(f0.trust, FrameTrust::Context); + assert_eq!(f0.context.valid, MinidumpContextValidity::All); + assert_eq!(f0.instruction, 0x4000f49d); + assert_eq!(f0.resume_address, 0x4000f49d); + + if let MinidumpRawContext::X86(ctx) = &f0.context.raw { + assert_eq!(ctx.eip, 0x4000f49d); + assert_eq!(ctx.esp, stack_start as u32); + assert_eq!(ctx.ebp, 0xd43eed6e); + } else { + unreachable!(); + } + } + + { + // To avoid reusing locals by mistake + let f1 = &s.frames[1]; + assert_eq!(f1.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = f1.context.valid { + assert!(which.contains("eip")); + assert!(which.contains("esp")); + assert!(which.contains("ebp")); + } else { + unreachable!(); + } + assert_eq!(f1.instruction + 1, 0x4000129d); + assert_eq!(f1.resume_address, 0x4000129d); + + if let MinidumpRawContext::X86(ctx) = &f1.context.raw { + assert_eq!(ctx.eip, 0x4000129d); + assert_eq!(ctx.esp, frame1_esp.value().unwrap() as u32); + assert_eq!(ctx.ebp, frame1_ebp.value().unwrap() as u32); + } else { + unreachable!(); + } + } +} + +// Force scanning for a return address a long way down the stack +#[tokio::test] +async fn test_traditional_scan_long_way() { + let mut f = TestFixture::new(); + let frame1_esp = Label::new(); + let frame1_ebp = Label::new(); + let mut stack = Section::new(); + let stack_start = 0x80000000; + stack.start().set_const(stack_start); + + stack = stack + // frame 0 + .D32(0xf065dc76) // locals area: + .D32(0x46ee2167) // garbage that doesn't look like + .D32(0xbab023ec) // a return address + .append_repeated(20 * 4, 0) // a bunch of space + .D32(&frame1_ebp) // saved %ebp (%ebp fails to point here, forcing scan) + .D32(0x4000129d) // return address + // frame 1 + .mark(&frame1_esp) + .append_repeated(8, 0) // space + .mark(&frame1_ebp) // %ebp points here + .D32(0) // saved %ebp (stack end) + .D32(0); // return address (stack end) + + f.raw.eip = 0x4000f49d; + f.raw.esp = stack.start().value().unwrap() as u32; + // Make the frame pointer bogus, to make the stackwalker scan the stack + // for something that looks like a return address. + f.raw.ebp = 0xd43eed6e; + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // To avoid reusing locals by mistake + let f0 = &s.frames[0]; + assert_eq!(f0.trust, FrameTrust::Context); + assert_eq!(f0.context.valid, MinidumpContextValidity::All); + assert_eq!(f0.instruction, 0x4000f49d); + + if let MinidumpRawContext::X86(ctx) = &f0.context.raw { + assert_eq!(ctx.eip, 0x4000f49d); + assert_eq!(ctx.esp, stack_start as u32); + assert_eq!(ctx.ebp, 0xd43eed6e); + } else { + unreachable!(); + } + } + + { + // To avoid reusing locals by mistake + let f1 = &s.frames[1]; + assert_eq!(f1.trust, FrameTrust::Scan); + if let MinidumpContextValidity::Some(ref which) = f1.context.valid { + assert!(which.contains("eip")); + assert!(which.contains("esp")); + assert!(which.contains("ebp")); + } else { + unreachable!(); + } + assert_eq!(f1.instruction + 1, 0x4000129d); + + if let MinidumpRawContext::X86(ctx) = &f1.context.raw { + assert_eq!(ctx.eip, 0x4000129d); + assert_eq!(ctx.esp, frame1_esp.value().unwrap() as u32); + assert_eq!(ctx.ebp, frame1_ebp.value().unwrap() as u32); + } else { + unreachable!(); + } + } +} + +const CALLEE_SAVE_REGS: &[&str] = &["eip", "esp", "ebp", "ebx", "edi", "esi"]; + +fn init_cfi_state() -> (TestFixture, Section, CONTEXT_X86, MinidumpContextValidity) { + let mut f = TestFixture::new(); + let symbols = [ + // The youngest frame's function. + "FUNC 4000 1000 10 enchiridion\n", + // Initially, just a return address. + "STACK CFI INIT 4000 100 .cfa: $esp 4 + .ra: .cfa 4 - ^\n", + // Push %ebx. + "STACK CFI 4001 .cfa: $esp 8 + $ebx: .cfa 8 - ^\n", + // Move %esi into %ebx. Weird, but permitted. + "STACK CFI 4002 $esi: $ebx\n", + // Allocate frame space, and save %edi. + "STACK CFI 4003 .cfa: $esp 20 + $edi: .cfa 16 - ^\n", + // Put the return address in %edi. + "STACK CFI 4005 .ra: $edi\n", + // Save %ebp, and use it as a frame pointer. + "STACK CFI 4006 .cfa: $ebp 8 + $ebp: .cfa 12 - ^\n", + // The calling function. + "FUNC 5000 1000 10 epictetus\n", + // Mark it as end of stack. + "STACK CFI INIT 5000 1000 .cfa: $esp .ra 0\n", + ]; + f.add_symbols(String::from("module1"), symbols.concat()); + + f.raw.set_register("esp", 0x80000000); + f.raw.set_register("eip", 0x40005510); + f.raw.set_register("ebp", 0xc0d4aab9); + f.raw.set_register("ebx", 0x60f20ce6); + f.raw.set_register("esi", 0x53d1379d); + f.raw.set_register("edi", 0xafbae234); + + let raw_valid = MinidumpContextValidity::All; + + let expected = f.raw.clone(); + let expected_regs = CALLEE_SAVE_REGS; + let expected_valid = MinidumpContextValidity::Some(expected_regs.iter().copied().collect()); + + let stack = Section::new(); + stack + .start() + .set_const(f.raw.get_register("esp", &raw_valid).unwrap() as u64); + + (f, stack, expected, expected_valid) +} + +async fn check_cfi( + f: TestFixture, + stack: Section, + expected: CONTEXT_X86, + expected_valid: MinidumpContextValidity, +) { + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // Frame 0 + let frame = &s.frames[0]; + assert_eq!(frame.trust, FrameTrust::Context); + assert_eq!(frame.context.valid, MinidumpContextValidity::All); + } + + { + // Frame 1 + if let MinidumpContextValidity::Some(ref expected_regs) = expected_valid { + let frame = &s.frames[1]; + let valid = &frame.context.valid; + assert_eq!(frame.trust, FrameTrust::CallFrameInfo); + if let MinidumpContextValidity::Some(ref which) = valid { + assert_eq!(which.len(), expected_regs.len()); + } else { + unreachable!(); + } + + if let MinidumpRawContext::X86(ctx) = &frame.context.raw { + for reg in expected_regs { + assert_eq!( + ctx.get_register(reg, valid), + expected.get_register(reg, &expected_valid), + "{reg} registers didn't match!" + ); + } + return; + } + } + } + unreachable!(); +} + +#[tokio::test] +async fn test_cfi_at_4000() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_rsp = Label::new(); + stack = stack + .D32(0x40005510) // return address + .mark(&frame1_rsp) + .append_repeated(0, 1000); + + expected.set_register("esp", frame1_rsp.value().unwrap() as u32); + f.raw.set_register("eip", 0x40004000); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4001() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_rsp = Label::new(); + stack = stack + .D32(0x60f20ce6) // saved %ebx + .D32(0x40005510) // return address + .mark(&frame1_rsp) + .append_repeated(0, 1000); + + expected.set_register("esp", frame1_rsp.value().unwrap() as u32); + f.raw.set_register("eip", 0x40004001); + f.raw.set_register("ebx", 0x91aa9a8b); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4002() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_rsp = Label::new(); + stack = stack + .D32(0x60f20ce6) // saved %ebx + .D32(0x40005510) // return address + .mark(&frame1_rsp) + .append_repeated(0, 1000); + + expected.set_register("esp", frame1_rsp.value().unwrap() as u32); + f.raw.set_register("eip", 0x40004002); + f.raw.set_register("ebx", 0x53d1379d); + f.raw.set_register("esi", 0xa5c790ed); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4003() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_rsp = Label::new(); + stack = stack + .D32(0x56ec3db7) // garbage + .D32(0xafbae234) // saved %edi + .D32(0x53d67131) // garbage + .D32(0x60f20ce6) // saved %ebx + .D32(0x40005510) // return address + .mark(&frame1_rsp) + .append_repeated(0, 1000); + + expected.set_register("esp", frame1_rsp.value().unwrap() as u32); + f.raw.set_register("eip", 0x40004003); + f.raw.set_register("ebx", 0x53d1379d); + f.raw.set_register("esi", 0xa97f229d); + f.raw.set_register("edi", 0xb05cc997); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4004() { + // Should be the same as 4003 + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_rsp = Label::new(); + stack = stack + .D32(0x56ec3db7) // garbage + .D32(0xafbae234) // saved %edi + .D32(0x53d67131) // garbage + .D32(0x60f20ce6) // saved %ebx + .D32(0x40005510) // return address + .mark(&frame1_rsp) + .append_repeated(0, 1000); + + expected.set_register("esp", frame1_rsp.value().unwrap() as u32); + f.raw.set_register("eip", 0x40004004); + f.raw.set_register("ebx", 0x53d1379d); + f.raw.set_register("esi", 0xa97f229d); + f.raw.set_register("edi", 0xb05cc997); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4005() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame1_rsp = Label::new(); + stack = stack + .D32(0xe29782c2) // garbage + .D32(0xafbae234) // saved %edi + .D32(0x5ba29ce9) // garbage + .D32(0x60f20ce6) // saved %ebx + .D32(0x8036cc02) // garbage + .mark(&frame1_rsp) + .append_repeated(0, 1000); + + expected.set_register("esp", frame1_rsp.value().unwrap() as u32); + f.raw.set_register("eip", 0x40004005); + f.raw.set_register("ebx", 0x53d1379d); + f.raw.set_register("esi", 0x0fb7dc4e); + f.raw.set_register("edi", 0x40005510); + + check_cfi(f, stack, expected, expected_valid).await; +} + +#[tokio::test] +async fn test_cfi_at_4006() { + let (mut f, mut stack, mut expected, expected_valid) = init_cfi_state(); + + let frame0_ebp = Label::new(); + let frame1_rsp = Label::new(); + stack = stack + .D32(0xdcdd25cd) // garbage + .D32(0xafbae234) // saved %edi + .D32(0xc0d4aab9) // saved %ebp + .mark(&frame0_ebp) // frame pointer points here + .D32(0x60f20ce6) // saved %ebx + .D32(0x8036cc02) // garbage + .mark(&frame1_rsp) + .append_repeated(0, 1000); + + expected.set_register("esp", frame1_rsp.value().unwrap() as u32); + f.raw + .set_register("ebp", frame0_ebp.value().unwrap() as u32); + f.raw.set_register("eip", 0x40004006); + f.raw.set_register("ebx", 0x53d1379d); + f.raw.set_register("esi", 0x743833c9); + f.raw.set_register("edi", 0x40005510); + + check_cfi(f, stack, expected, expected_valid).await; +} + +// Totally basic STACK WIN frame data, no weird stuff. +#[tokio::test] +async fn test_stack_win_frame_data_basic() { + let mut f = TestFixture::new(); + let symbols = [ + "STACK WIN 4 aa85 176 0 0 4 10 4 0 1", + " $T2 $esp .cbSavedRegs + =", + " $T0 .raSearchStart =", + " $eip $T0 ^ =", + " $esp $T0 4 + =", + " $ebx $T2 4 - ^ =", + " $edi $T2 8 - ^ =", + " $esi $T2 12 - ^ =", + " $ebp $T2 16 - ^ =\n", + ]; + f.add_symbols(String::from("module1"), symbols.concat()); + + let frame1_esp = Label::new(); + let frame1_ebp = Label::new(); + + let mut stack = Section::new(); + let stack_start = 0x80000000; + stack.start().set_const(stack_start); + + stack = stack + // frame 0 + .D32(&frame1_ebp) // saved regs: %ebp + .D32(0xa7120d1a) // %esi + .D32(0x630891be) // %edi + .D32(0x9068a878) // %ebx + .D32(0xa08ea45f) // locals: unused + .D32(0x40001350) // return address + // frame 1 + .mark(&frame1_esp) + .append_repeated(0, 12) // empty space + .mark(&frame1_ebp) + .D32(0) // saved %ebp (stack end) + .D32(0); // saved %eip (stack end) + + f.raw.set_register("eip", 0x4000aa85); + f.raw + .set_register("esp", stack.start().value().unwrap() as u32); + f.raw.set_register("ebp", 0xf052c1de); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + let f0 = &s.frames[0]; + assert_eq!(f0.trust, FrameTrust::Context); + assert_eq!(f0.context.valid, MinidumpContextValidity::All); + assert_eq!(f0.instruction, 0x4000aa85); + + if let MinidumpRawContext::X86(ctx) = &f0.context.raw { + assert_eq!(ctx.eip, 0x4000aa85); + assert_eq!(ctx.esp, stack_start as u32); + assert_eq!(ctx.ebp, 0xf052c1de); + } else { + unreachable!(); + } + } + + { + let f1 = &s.frames[1]; + assert_eq!(f1.trust, FrameTrust::CallFrameInfo); + if let MinidumpContextValidity::Some(ref which) = f1.context.valid { + assert!(which.contains("eip")); + assert!(which.contains("esp")); + assert!(which.contains("ebp")); + assert!(which.contains("ebx")); + assert!(which.contains("esi")); + assert!(which.contains("edi")); + } else { + unreachable!(); + } + assert_eq!(f1.instruction + 1, 0x40001350); + + if let MinidumpRawContext::X86(ctx) = &f1.context.raw { + assert_eq!(ctx.eip, 0x40001350); + assert_eq!(ctx.esp, frame1_esp.value().unwrap() as u32); + assert_eq!(ctx.ebp, frame1_ebp.value().unwrap() as u32); + assert_eq!(ctx.ebx, 0x9068a878); + assert_eq!(ctx.esi, 0xa7120d1a); + assert_eq!(ctx.edi, 0x630891be); + } else { + unreachable!(); + } + } +} + +// Totally basic STACK WIN frame data, no weird stuff. +#[tokio::test] +async fn test_stack_win_frame_data_overlapping() { + // Same as frame_data_basic but there are extra entries which technically overlap + // with this one, but in a way that is easily disambiguated by preferring the + // one with the higher base address. This happens frequently in real symbol files. + let mut f = TestFixture::new(); + let symbols = [ + // Entry that covers the "whole" function (junk!) + "STACK WIN 4 aa80 181 0 0 4 10 4 0 1", + " $eip .raSearchStart =\n", + // More precise (still junk!) + "STACK WIN 4 aa84 177 0 0 4 10 4 0 1", + " $eip .raSearchStart =\n", + // This is the one we want!!! + "STACK WIN 4 aa85 176 0 0 4 10 4 0 1", + " $T2 $esp .cbSavedRegs + =", + " $T0 .raSearchStart =", + " $eip $T0 ^ =", + " $esp $T0 4 + =", + " $ebx $T2 4 - ^ =", + " $edi $T2 8 - ^ =", + " $esi $T2 12 - ^ =", + " $ebp $T2 16 - ^ =\n", + // An even more precise one but past the address we care about (junk!) + "STACK WIN 4 aa86 175 0 0 4 10 4 0 1", + " $eip .raSearchStart =\n", + ]; + f.add_symbols(String::from("module1"), symbols.concat()); + + let frame1_esp = Label::new(); + let frame1_ebp = Label::new(); + + let mut stack = Section::new(); + let stack_start = 0x80000000; + stack.start().set_const(stack_start); + + stack = stack + // frame 0 + .D32(&frame1_ebp) // saved regs: %ebp + .D32(0xa7120d1a) // %esi + .D32(0x630891be) // %edi + .D32(0x9068a878) // %ebx + .D32(0xa08ea45f) // locals: unused + .D32(0x40001350) // return address + // frame 1 + .mark(&frame1_esp) + .append_repeated(0, 12) // empty space + .mark(&frame1_ebp) + .D32(0) // saved %ebp (stack end) + .D32(0); // saved %eip (stack end) + + f.raw.set_register("eip", 0x4000aa85); + f.raw + .set_register("esp", stack.start().value().unwrap() as u32); + f.raw.set_register("ebp", 0xf052c1de); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + let f0 = &s.frames[0]; + assert_eq!(f0.trust, FrameTrust::Context); + assert_eq!(f0.context.valid, MinidumpContextValidity::All); + assert_eq!(f0.instruction, 0x4000aa85); + + if let MinidumpRawContext::X86(ctx) = &f0.context.raw { + assert_eq!(ctx.eip, 0x4000aa85); + assert_eq!(ctx.esp, stack_start as u32); + assert_eq!(ctx.ebp, 0xf052c1de); + } else { + unreachable!(); + } + } + + { + let f1 = &s.frames[1]; + assert_eq!(f1.trust, FrameTrust::CallFrameInfo); + if let MinidumpContextValidity::Some(ref which) = f1.context.valid { + assert!(which.contains("eip")); + assert!(which.contains("esp")); + assert!(which.contains("ebp")); + assert!(which.contains("ebx")); + assert!(which.contains("esi")); + assert!(which.contains("edi")); + } else { + unreachable!(); + } + assert_eq!(f1.instruction + 1, 0x40001350); + + if let MinidumpRawContext::X86(ctx) = &f1.context.raw { + assert_eq!(ctx.eip, 0x40001350); + assert_eq!(ctx.esp, frame1_esp.value().unwrap() as u32); + assert_eq!(ctx.ebp, frame1_ebp.value().unwrap() as u32); + assert_eq!(ctx.ebx, 0x9068a878); + assert_eq!(ctx.esi, 0xa7120d1a); + assert_eq!(ctx.edi, 0x630891be); + } else { + unreachable!(); + } + } +} + +// Testing that grand_callee_parameter_size is properly computed. +#[tokio::test] +async fn test_stack_win_frame_data_parameter_size() { + let mut f = TestFixture::new(); + + let module1_symbols = ["FUNC 1000 100 c module1::wheedle\n"]; + + let module2_symbols = [ + // Note bogus parameter size in FUNC record; the stack walker + // should prefer the STACK WIN record, and see '4' below. + "FUNC aa85 176 beef module2::whine\n", + "STACK WIN 4 aa85 176 0 0 4 10 4 0 1", + " $T2 $esp .cbLocals + .cbSavedRegs + =", + " $T0 .raSearchStart =", + " $eip $T0 ^ =", + " $esp $T0 4 + =", + " $ebp $T0 20 - ^ =", + " $ebx $T0 8 - ^ =\n", + ]; + f.add_symbols(String::from("module1"), module1_symbols.concat()); + f.add_symbols(String::from("module2"), module2_symbols.concat()); + + let frame0_esp = Label::new(); + let frame0_ebp = Label::new(); + let frame1_esp = Label::new(); + let frame2_esp = Label::new(); + let frame2_ebp = Label::new(); + + let mut stack = Section::new(); + let stack_start = 0x80000000; + stack.start().set_const(stack_start); + + stack = stack + // frame 0, in module1::wheedle. Traditional frame. + .mark(&frame0_esp) + .append_repeated(0, 16) // frame space + .mark(&frame0_ebp) + .D32(0x6fa902e0) // saved %ebp. Not a frame pointer. + .D32(0x5000aa95) // return address, in module2::whine + // frame 1, in module2::whine. FrameData frame. + .mark(&frame1_esp) + .D32(0xbaa0cb7a) // argument 3 passed to module1::wheedle + .D32(0xbdc92f9f) // argument 2 + .D32(0x0b1d8442) // argument 1 + .D32(&frame2_ebp) // saved %ebp + .D32(0xb1b90a15) // unused + .D32(0xf18e072d) // unused + .D32(0x2558c7f3) // saved %ebx + .D32(0x0365e25e) // unused + .D32(0x2a179e38) // return address; $T0 points here + // frame 2, in no module + .mark(&frame2_esp) + .append_repeated(0, 12) // empty space + .mark(&frame2_ebp) + .D32(0) // saved %ebp (stack end) + .D32(0); // saved %eip (stack end) + + f.raw.set_register("eip", 0x40001004); + f.raw + .set_register("esp", stack.start().value().unwrap() as u32); + f.raw + .set_register("ebp", frame0_ebp.value().unwrap() as u32); + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 3); + + { + let f0 = &s.frames[0]; + assert_eq!(f0.trust, FrameTrust::Context); + assert_eq!(f0.context.valid, MinidumpContextValidity::All); + assert_eq!(f0.instruction, 0x40001004); + + if let MinidumpRawContext::X86(ctx) = &f0.context.raw { + assert_eq!(ctx.eip, 0x40001004); + assert_eq!(ctx.esp, frame0_esp.value().unwrap() as u32); + assert_eq!(ctx.ebp, frame0_ebp.value().unwrap() as u32); + } else { + unreachable!(); + } + } + + { + let f1 = &s.frames[1]; + assert_eq!(f1.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = f1.context.valid { + assert!(which.contains("eip")); + assert!(which.contains("esp")); + assert!(which.contains("ebp")); + } else { + unreachable!(); + } + assert_eq!(f1.instruction + 1, 0x5000aa95); + + if let MinidumpRawContext::X86(ctx) = &f1.context.raw { + assert_eq!(ctx.eip, 0x5000aa95); + assert_eq!(ctx.esp, frame1_esp.value().unwrap() as u32); + assert_eq!(ctx.ebp, 0x6fa902e0); + } else { + unreachable!(); + } + } + + { + let f2 = &s.frames[2]; + assert_eq!(f2.trust, FrameTrust::CallFrameInfo); + if let MinidumpContextValidity::Some(ref which) = f2.context.valid { + assert!(which.contains("eip")); + assert!(which.contains("esp")); + assert!(which.contains("ebp")); + assert!(which.contains("ebx")); + } else { + unreachable!(); + } + assert_eq!(f2.instruction + 1, 0x2a179e38); + + if let MinidumpRawContext::X86(ctx) = &f2.context.raw { + assert_eq!(ctx.eip, 0x2a179e38); + assert_eq!(ctx.esp, frame2_esp.value().unwrap() as u32); + assert_eq!(ctx.ebp, frame2_ebp.value().unwrap() as u32); + assert_eq!(ctx.ebx, 0x2558c7f3); + } else { + unreachable!(); + } + } +} + +#[tokio::test] +async fn test_frame_pointer_overflow() { + // Make sure we don't explode when trying frame pointer analysis on a value + // that will overflow. + + type Pointer = u32; + let stack_max: Pointer = Pointer::MAX; + let stack_size: Pointer = 1000; + let bad_frame_ptr: Pointer = stack_max; + + let mut f = TestFixture::new(); + let mut stack = Section::new(); + let stack_start: Pointer = stack_max - stack_size; + stack.start().set_const(stack_start as u64); + + stack = stack + // frame 0 + .append_repeated(0, stack_size as usize); // junk, not important to the test + + f.raw.eip = 0x7a100000; + f.raw.ebp = bad_frame_ptr; + f.raw.esp = stack.start().value().unwrap() as Pointer; + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 1); + + // As long as we don't panic, we're good! +} + +#[tokio::test] +async fn test_frame_pointer_overflow_nonsense_32bit_stack() { + // same as test_frame_pointer_overflow, but we're going to abuse the fact + // that rust-minidump prefers representing things in 64-bit to create + // impossible stack addresses that overflow 32-bit integers but appear + // valid in 64-bit. By doing this memory reads will "succeed" but + // pointer math done in the native pointer width will overflow and + // everything will be sad. + + type Pointer = u32; + let pointer_size: u64 = std::mem::size_of::() as u64; + let stack_max: u64 = Pointer::MAX as u64 + pointer_size * 2; + let stack_size: u64 = 1000; + let bad_frame_ptr: u64 = Pointer::MAX as u64 - pointer_size; + + let mut f = TestFixture::new(); + let mut stack = Section::new(); + let stack_start: u64 = stack_max - stack_size; + stack.start().set_const(stack_start); + + stack = stack + // frame 0 + .append_repeated(0, 1000); // junk, not important to the test + + f.raw.eip = 0x7a100000; + f.raw.ebp = bad_frame_ptr as u32; + f.raw.esp = stack.start().value().unwrap() as Pointer; + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 1); + + // As long as we don't panic, we're good! +} + +#[tokio::test] +async fn test_frame_pointer_barely_no_overflow() { + // This is test_tradition but with the all the values pushed + // as close to the upper memory boundary as possible, to confirm that + // our code doesn't randomly overflow *AND* isn't overzealous in + // its overflow guards. + + let mut f = TestFixture::new(); + let mut stack = Section::new(); + + type Pointer = u32; + let pointer_size: Pointer = std::mem::size_of::() as Pointer; + let stack_max: Pointer = Pointer::MAX; + let stack_size: Pointer = pointer_size * 3; + + let stack_start: Pointer = stack_max - stack_size; + let return_address: Pointer = 0x7b302000; + stack.start().set_const(stack_start as u64); + + let frame0_fp = Label::new(); + let frame1_sp = Label::new(); + let frame1_fp = Label::new(); + + stack = stack + // frame 0 + .mark(&frame0_fp) + .D32(&frame1_fp) // caller-pushed %rbp + .D32(return_address) // actual return address + // frame 1 + .mark(&frame1_sp) + .mark(&frame1_fp) // end of stack + .D32(0); + + f.raw.eip = 0x7a100000; + f.raw.ebp = frame0_fp.value().unwrap() as Pointer; + f.raw.esp = stack.start().value().unwrap() as Pointer; + + let s = f.walk_stack(stack).await; + assert_eq!(s.frames.len(), 2); + + { + // To avoid reusing locals by mistake + let f0 = &s.frames[0]; + assert_eq!(f0.trust, FrameTrust::Context); + assert_eq!(f0.context.valid, MinidumpContextValidity::All); + if let MinidumpRawContext::X86(ctx) = &f0.context.raw { + assert_eq!(ctx.ebp, frame0_fp.value().unwrap() as Pointer); + } else { + unreachable!(); + } + } + + { + // To avoid reusing locals by mistake + let f1 = &s.frames[1]; + assert_eq!(f1.trust, FrameTrust::FramePointer); + if let MinidumpContextValidity::Some(ref which) = f1.context.valid { + assert!(which.contains("eip")); + assert!(which.contains("esp")); + assert!(which.contains("ebp")); + } else { + unreachable!(); + } + if let MinidumpRawContext::X86(ctx) = &f1.context.raw { + assert_eq!(ctx.eip, return_address); + assert_eq!(ctx.esp, frame1_sp.value().unwrap() as Pointer); + assert_eq!(ctx.ebp, frame1_fp.value().unwrap() as Pointer); + } else { + unreachable!(); + } + } +} diff --git a/third_party/rust/minidump-writer/.cargo-checksum.json b/third_party/rust/minidump-writer/.cargo-checksum.json index 1036c5f77f3a..a403eecb155d 100644 --- a/third_party/rust/minidump-writer/.cargo-checksum.json +++ b/third_party/rust/minidump-writer/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"CHANGELOG.md":"dd79b025c48c70745cf60d41443191ad6ed92034efe8dd3db4b8c6cef031e4ab","Cargo.lock":"34813428631324758b699a604e0a077c56338cae5312b09f42949e8e341948fc","Cargo.toml":"84688c1ccef09106253992cb34a263e2eee168ad824fc52fe19e845d5169c169","LICENSE":"1ecdd8e8977af83c07c5f97bec87b47d27059b7ea323ca3160fbfa2314f5d99c","README.md":"71742b170ac34ceecf317d6d69456063bf5d8974453075e9cd2838785717fcdb","build.rs":"689cd32a441f5011f694a9f86bc03bc27c2a09bcb4130f47e04fe00bb069b1b5","deny.toml":"872e0c050720257cf85e660834ffe3d9e39f9fb7b3c795c0c44af3702ec2ff21","examples/synthetic.rs":"cd13bd0bba64a1d8c8c326bb1ce1818bead6904e5708418fd09edeaef1437c24","release.toml":"f554067378aec602383b96e5fa63427136533a7dd00137fd0664b279fb8fcc56","src/bin/test.rs":"dc408d74b7d142687b5d450c20ff0e9af7c508bd78af84988472e5dd4e940f29","src/dir_section.rs":"b7c83b16acefb4327b406612e1a667dffb91f86bec40f9330171dba8d10346ff","src/lib.rs":"6f393273ea6b354f412e70c9a4925fe2e646bc477591962c96a61aed9fde4e3b","src/linux.rs":"65d1249e80a3c3b6ab8c4361891b5ef35fe68f91253f6378ae03a0ad59ecd807","src/linux/android.rs":"9f2fd633de0d545b400c479a70f000cb9d04bb6e322f62098f4a7ebe7299f051","src/linux/app_memory.rs":"5f093e4ed0aecc6086366a9c09658761fdd3b0e6e9ff2111690719e56612df64","src/linux/auxv_reader.rs":"e9c75b0093c845da8dd321c6582e65678772c57658358213098aa6267f3679e1","src/linux/crash_context.rs":"069d949c7a9fc23a1e8710f7c018cacf71af2326a2dd6ca0283d492c5253107a","src/linux/crash_context/aarch64.rs":"158d02200b25a7ca029ff5132ba3b7f76a05c9976ff529a5658523aff370e4d2","src/linux/crash_context/arm.rs":"a4f41aae015937d3dccdf759df84bc657cf457d8baaf07711617d17f4b40f6f1","src/linux/crash_context/x86.rs":"d464680ee9df8a7f11dae2bb199cc4efb66dfc790a87c9b773c3a9b44b9339d8","src/linux/crash_context/x86_64.rs":"dceabbcf4d3c4ff5ad7658daa7dec3349cad9b61091385bcd868de6b11eb4957","src/linux/dso_debug.rs":"2f117049a71362797af88ce6f8aaa9d43ba4572a3fec46a1c5642c46f78a9abf","src/linux/dumper_cpu_info.rs":"76558ffc85386e416bbfc49adc550f61fb206dfbfb0b6a25f620a5fc91f32bc1","src/linux/dumper_cpu_info/arm.rs":"6fc140181f3a32504c3b66f0235136e730424be830602c87cfbeb1dfb4c1cec3","src/linux/dumper_cpu_info/x86_mips.rs":"1b7f554190c2c0f0575e36995aae5211a0fc9f29f3949ac7521a9d148566f6e9","src/linux/errors.rs":"f0bc2a400a16d322f8d0dd5c76afa61fa32cf8a86f1b4e937298f767de031cb8","src/linux/maps_reader.rs":"0831f0775336eb4123bdc92a734b8c4b1235c0d34a06fb09e363dcc83eda8b5c","src/linux/minidump_writer.rs":"048ffcebb95f899494bbeed84b79a2294375ce240f6293ad95fa5bba41187e95","src/linux/ptrace_dumper.rs":"c35a790e381d96b312bb3ffe864b4a55069da194fc4c6eeb1cf12a89d0a7ca9d","src/linux/sections.rs":"98aabd7e4b0542201783af90cafdc66bb3845585d0ed24af7476c1ea3ab40fb9","src/linux/sections/app_memory.rs":"52fdfe9622a395763fef94c5fea924935fcd86103c3eb1b0fb8d7ea61fd3e783","src/linux/sections/exception_stream.rs":"e1b73b5168506f8804479206ef0941aca5f792500cc56c5c17db55cd6b273a99","src/linux/sections/handle_data_stream.rs":"3aae030c009543142555d493f9273e3445454b18085dfdb3b44bc83d43b510dd","src/linux/sections/mappings.rs":"833f1783a4d9b761f1360ba438d6f7f032f831b8a1b15585dea1c90d7637f58a","src/linux/sections/memory_info_list_stream.rs":"bd927824e859e2fcbc85ef9d5e3d8b7be13c4918a636f70f0712070d6c65b002","src/linux/sections/memory_list_stream.rs":"f2d5b33fb4167b502dc8e74371d50b4ad66e0b48f541fe16bcbf5fb62491bf2d","src/linux/sections/systeminfo_stream.rs":"02373c97d4caeb66f4601c066d430ad2929b5584eaa0af3be1dc51ef818e5cb8","src/linux/sections/thread_list_stream.rs":"9ea966c00a878ef1a6e6781005ddf9909c18177f2c455cb250c4ac191040fc69","src/linux/sections/thread_names_stream.rs":"2a123335626e941cf0a3349f9327c3e7606af593a8370ef5f71325012107b91f","src/linux/thread_info.rs":"3588e6c9deedfd88bd024c30405172c05cea1bd37dcc64df687d6c218d2e4c5a","src/linux/thread_info/aarch64.rs":"fcbdd6b9185f00b4f42eef27076a8c9edab8afcaf22e1c005e32702be16e2264","src/linux/thread_info/arm.rs":"cf927d9f0ea48e07a874e8d6049e447c0f12a0715ed166448c3b07a410dd958a","src/linux/thread_info/mips.rs":"332d7888bb08a36d3b77023a4a0385193a07b1c6c5bf0e91aacb5d948ba8b903","src/linux/thread_info/x86.rs":"a759b2298f0831298c56e49717b4f188c213ebeb30b38be23af5bcbe6fd96369","src/mac.rs":"4671ad90c433db559ec633c880c3fe083f38a2e185ef4fc99577318526076519","src/mac/errors.rs":"696473d1187a0f003409e3ac0fc83a02cafbad8451a62bda868b08acc36e7443","src/mac/mach.rs":"f5f5b3bde9fd3ea85903b75c80ddb15931a56d5d4425f01fda9643fba8e419d3","src/mac/minidump_writer.rs":"d6f07685779c45f6b773d4163965f5dc5e0a73421937d9ec6b8e4d3320d23e85","src/mac/streams.rs":"82e1b9fefa26d98f1c0efdd98de0aed9a9ee9edf3e4b4f636212c73f8d3e464e","src/mac/streams/breakpad_info.rs":"9aafbc3f376050e39a2295e77a1b764223403ffdd47bded39efa9450a53b5a06","src/mac/streams/exception.rs":"6ad730ff0fbba4ec8a7c8576effb737138ca92e0f676eeff5d8e3c47d20a1434","src/mac/streams/memory_list.rs":"e507a1ada858ef535e3e211d91195901e9976ba0ff6cf601919d7cacb48c5e9f","src/mac/streams/misc_info.rs":"ed9fe394ded145cae62094adf3a9e454e23bfd5b6fab7871f0cf62c6a5e79017","src/mac/streams/module_list.rs":"959239425c1ea4c9766303fd10a12c82d1fed27b462f836a8a317a470ea0ace7","src/mac/streams/system_info.rs":"4bd0d171c9d7c97afbacf6fd144d5554f0e46d47ae78f00b85331d6d1a1d24b3","src/mac/streams/thread_list.rs":"2b003e3409daebbd38ebc8a49a2c24e7a75b296d28f58a2c85114cf611174c1b","src/mac/streams/thread_names.rs":"cc1464c2610e6022046b5322d9b48a72a6b5d35923d75afd7aa4b82d355c7dc4","src/mac/task_dumper.rs":"b22f42b11bb7e9af5976ec04cbf9f8bf4d1d70c3c7738b1072e7bd41e8d5af3c","src/mem_writer.rs":"44d594e4b8a4bedf28c61f89c34f13be42deb9a8820a1d3267dd9572d65d0c97","src/minidump_cpu.rs":"ccb3dc179699159883e539e29285f8f6ba936afb8ce8980dbcb5060a80b6618c","src/minidump_format.rs":"9d5940d71da3a543efa90279e287e0dbbe303de386a4d5aab15e8ccfdd556116","src/windows.rs":"7aae8747519b203f1889c369cb1122ff308d255e9514cf9d4d3f09257a3bb96c","src/windows/errors.rs":"9b8752122784417ed48b9c3fccc9bdabc6c4e6285cdb77c79723cba56cb0de82","src/windows/ffi.rs":"24a6f99de9f25ae7bc80f2763d8c5b97e65699682a99ec6265ebc435c3310999","src/windows/minidump_writer.rs":"fbbb576161f753d9572c5e8c78a85066070948917e0ee2c111e63f3fc14fd5d3","tests/common/mod.rs":"7f340f28206924af467b09f190393ff81be8f542af1b5f9d8847b2bc409bf828","tests/linux_minidump_writer.rs":"bc81920c3dc48b0026bce0a1da3994c541a712c3e36ea2cf1037887843876511","tests/mac_minidump_writer.rs":"355e28635e9f8ccc831627e3323fe37907915047cd525f07eefd15d658cf4823","tests/ptrace_dumper.rs":"5596925274589697153657a010dd77866f94852ac8db487c86b5a9ca0f5395ea","tests/task_dumper.rs":"3881899e4cfef0dae772d16b1a783497eebd2b58025394e0819cb03a0960d480","tests/windows_minidump_writer.rs":"138551179c4e610141d56ba99305a0fc05531c8e9924805dd071d380338f0f74"},"package":"e2abcd9c8a1e6e1e9d56ce3627851f39a17ea83e17c96bc510f29d7e43d78a7d"} \ No newline at end of file +{"files":{"CHANGELOG.md":"698290ca4be862c49c91531d98bae7d9525a4c7f849dae11c71479cac72f8730","Cargo.lock":"d7f9922e23a90345e3529238931e3ceb9f7a6c52ace656ecd02f5e9a332a9312","Cargo.toml":"b5b42d98a152ff7b6ba51109436b5fffd742069056a9af4db35520e9d5877f72","LICENSE":"1ecdd8e8977af83c07c5f97bec87b47d27059b7ea323ca3160fbfa2314f5d99c","README.md":"71742b170ac34ceecf317d6d69456063bf5d8974453075e9cd2838785717fcdb","build.rs":"689cd32a441f5011f694a9f86bc03bc27c2a09bcb4130f47e04fe00bb069b1b5","deny.toml":"ff818c8c23570207f8c3f5ea6d2840066926d8e39fe9c412b2eae20f8f74d0d9","examples/synthetic.rs":"cd13bd0bba64a1d8c8c326bb1ce1818bead6904e5708418fd09edeaef1437c24","release.toml":"f554067378aec602383b96e5fa63427136533a7dd00137fd0664b279fb8fcc56","src/bin/test.rs":"312f13061583335144f8c69742d7e6ee74ffd62e956637a3ce6b1281eedf230c","src/dir_section.rs":"b7c83b16acefb4327b406612e1a667dffb91f86bec40f9330171dba8d10346ff","src/lib.rs":"6f393273ea6b354f412e70c9a4925fe2e646bc477591962c96a61aed9fde4e3b","src/linux.rs":"47f71e27806ec5041a90aaeaf150b741b8cf6a1429d2099c40c3d70859f7e4f1","src/linux/android.rs":"9f2fd633de0d545b400c479a70f000cb9d04bb6e322f62098f4a7ebe7299f051","src/linux/app_memory.rs":"5f093e4ed0aecc6086366a9c09658761fdd3b0e6e9ff2111690719e56612df64","src/linux/auxv_reader.rs":"e9c75b0093c845da8dd321c6582e65678772c57658358213098aa6267f3679e1","src/linux/crash_context.rs":"069d949c7a9fc23a1e8710f7c018cacf71af2326a2dd6ca0283d492c5253107a","src/linux/crash_context/aarch64.rs":"158d02200b25a7ca029ff5132ba3b7f76a05c9976ff529a5658523aff370e4d2","src/linux/crash_context/arm.rs":"a4f41aae015937d3dccdf759df84bc657cf457d8baaf07711617d17f4b40f6f1","src/linux/crash_context/x86.rs":"d464680ee9df8a7f11dae2bb199cc4efb66dfc790a87c9b773c3a9b44b9339d8","src/linux/crash_context/x86_64.rs":"dceabbcf4d3c4ff5ad7658daa7dec3349cad9b61091385bcd868de6b11eb4957","src/linux/dso_debug.rs":"2f117049a71362797af88ce6f8aaa9d43ba4572a3fec46a1c5642c46f78a9abf","src/linux/dumper_cpu_info.rs":"76558ffc85386e416bbfc49adc550f61fb206dfbfb0b6a25f620a5fc91f32bc1","src/linux/dumper_cpu_info/arm.rs":"6fc140181f3a32504c3b66f0235136e730424be830602c87cfbeb1dfb4c1cec3","src/linux/dumper_cpu_info/x86_mips.rs":"181a16ae60f82d15901945b7b586b98a6af23e8a4da27acaad2e25f03330209e","src/linux/errors.rs":"8a0f5bcd41777d2dfc1965226eb1935c03280b19113e46ae67f9e893116ac6b4","src/linux/maps_reader.rs":"82696854d81c216458558267d209875157405a58967d157aa4ccb7aa84434e99","src/linux/minidump_writer.rs":"048ffcebb95f899494bbeed84b79a2294375ce240f6293ad95fa5bba41187e95","src/linux/module_reader.rs":"c37ac0d771208546ebd54e53d5d9310867d280381f3556e1ebd361f89f97512c","src/linux/ptrace_dumper.rs":"760fa971a57ea47f4aac43f2a1f449e2e6d645d49020df73fc03472d9a30346b","src/linux/sections.rs":"98aabd7e4b0542201783af90cafdc66bb3845585d0ed24af7476c1ea3ab40fb9","src/linux/sections/app_memory.rs":"52fdfe9622a395763fef94c5fea924935fcd86103c3eb1b0fb8d7ea61fd3e783","src/linux/sections/exception_stream.rs":"e1b73b5168506f8804479206ef0941aca5f792500cc56c5c17db55cd6b273a99","src/linux/sections/handle_data_stream.rs":"3aae030c009543142555d493f9273e3445454b18085dfdb3b44bc83d43b510dd","src/linux/sections/mappings.rs":"89787be2eb34a5f98de753890f05894ad8e8b29bc2996481c5865f147e03ffb6","src/linux/sections/memory_info_list_stream.rs":"bd927824e859e2fcbc85ef9d5e3d8b7be13c4918a636f70f0712070d6c65b002","src/linux/sections/memory_list_stream.rs":"f2d5b33fb4167b502dc8e74371d50b4ad66e0b48f541fe16bcbf5fb62491bf2d","src/linux/sections/systeminfo_stream.rs":"02373c97d4caeb66f4601c066d430ad2929b5584eaa0af3be1dc51ef818e5cb8","src/linux/sections/thread_list_stream.rs":"9ea966c00a878ef1a6e6781005ddf9909c18177f2c455cb250c4ac191040fc69","src/linux/sections/thread_names_stream.rs":"2a123335626e941cf0a3349f9327c3e7606af593a8370ef5f71325012107b91f","src/linux/thread_info.rs":"3588e6c9deedfd88bd024c30405172c05cea1bd37dcc64df687d6c218d2e4c5a","src/linux/thread_info/aarch64.rs":"fcbdd6b9185f00b4f42eef27076a8c9edab8afcaf22e1c005e32702be16e2264","src/linux/thread_info/arm.rs":"cf927d9f0ea48e07a874e8d6049e447c0f12a0715ed166448c3b07a410dd958a","src/linux/thread_info/mips.rs":"332d7888bb08a36d3b77023a4a0385193a07b1c6c5bf0e91aacb5d948ba8b903","src/linux/thread_info/x86.rs":"a759b2298f0831298c56e49717b4f188c213ebeb30b38be23af5bcbe6fd96369","src/mac.rs":"4671ad90c433db559ec633c880c3fe083f38a2e185ef4fc99577318526076519","src/mac/errors.rs":"696473d1187a0f003409e3ac0fc83a02cafbad8451a62bda868b08acc36e7443","src/mac/mach.rs":"f5f5b3bde9fd3ea85903b75c80ddb15931a56d5d4425f01fda9643fba8e419d3","src/mac/minidump_writer.rs":"d6f07685779c45f6b773d4163965f5dc5e0a73421937d9ec6b8e4d3320d23e85","src/mac/streams.rs":"82e1b9fefa26d98f1c0efdd98de0aed9a9ee9edf3e4b4f636212c73f8d3e464e","src/mac/streams/breakpad_info.rs":"9aafbc3f376050e39a2295e77a1b764223403ffdd47bded39efa9450a53b5a06","src/mac/streams/exception.rs":"6ad730ff0fbba4ec8a7c8576effb737138ca92e0f676eeff5d8e3c47d20a1434","src/mac/streams/memory_list.rs":"e507a1ada858ef535e3e211d91195901e9976ba0ff6cf601919d7cacb48c5e9f","src/mac/streams/misc_info.rs":"ed9fe394ded145cae62094adf3a9e454e23bfd5b6fab7871f0cf62c6a5e79017","src/mac/streams/module_list.rs":"959239425c1ea4c9766303fd10a12c82d1fed27b462f836a8a317a470ea0ace7","src/mac/streams/system_info.rs":"4bd0d171c9d7c97afbacf6fd144d5554f0e46d47ae78f00b85331d6d1a1d24b3","src/mac/streams/thread_list.rs":"2b003e3409daebbd38ebc8a49a2c24e7a75b296d28f58a2c85114cf611174c1b","src/mac/streams/thread_names.rs":"cc1464c2610e6022046b5322d9b48a72a6b5d35923d75afd7aa4b82d355c7dc4","src/mac/task_dumper.rs":"b22f42b11bb7e9af5976ec04cbf9f8bf4d1d70c3c7738b1072e7bd41e8d5af3c","src/mem_writer.rs":"44d594e4b8a4bedf28c61f89c34f13be42deb9a8820a1d3267dd9572d65d0c97","src/minidump_cpu.rs":"ccb3dc179699159883e539e29285f8f6ba936afb8ce8980dbcb5060a80b6618c","src/minidump_format.rs":"9d5940d71da3a543efa90279e287e0dbbe303de386a4d5aab15e8ccfdd556116","src/windows.rs":"7aae8747519b203f1889c369cb1122ff308d255e9514cf9d4d3f09257a3bb96c","src/windows/errors.rs":"9b8752122784417ed48b9c3fccc9bdabc6c4e6285cdb77c79723cba56cb0de82","src/windows/ffi.rs":"24a6f99de9f25ae7bc80f2763d8c5b97e65699682a99ec6265ebc435c3310999","src/windows/minidump_writer.rs":"fbbb576161f753d9572c5e8c78a85066070948917e0ee2c111e63f3fc14fd5d3","tests/common/mod.rs":"8d1f0110b74e7a31231d3767731070a345e95672945ff602b161f5fa5f355e0d","tests/linux_minidump_writer.rs":"5a389f8d3c11c16675ace41d00562419720e4a0f7a93232d063977b992eae1d4","tests/mac_minidump_writer.rs":"355e28635e9f8ccc831627e3323fe37907915047cd525f07eefd15d658cf4823","tests/ptrace_dumper.rs":"f3a19db33e2d1b8b4b88a58e5fa39b99aa2a41ea091ada0d90b5f1a97e0fcc1c","tests/task_dumper.rs":"3881899e4cfef0dae772d16b1a783497eebd2b58025394e0819cb03a0960d480","tests/windows_minidump_writer.rs":"138551179c4e610141d56ba99305a0fc05531c8e9924805dd071d380338f0f74"},"package":"77ef25fdfb9560aa90ad9b64e6901a86e63e7b3f125ca3083dd3d3936c12acc6"} \ No newline at end of file diff --git a/third_party/rust/minidump-writer/CHANGELOG.md b/third_party/rust/minidump-writer/CHANGELOG.md index 96c4584d78b5..672ce3b8a966 100644 --- a/third_party/rust/minidump-writer/CHANGELOG.md +++ b/third_party/rust/minidump-writer/CHANGELOG.md @@ -8,6 +8,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - ReleaseDate +## [0.9.0] - 2024-07-20 +### Fixed +- [PR#117](https://github.com/rust-minidump/minidump-writer/pull/117) resolved [#79](https://github.com/rust-minidump/minidump-writer/issues/79) by enabling reading of a module's build id and soname directly from the mapped process rather than relying on file reading, though that is still used as a fallback. + +### Changed +- [PR#126](https://github.com/rust-minidump/minidump-writer/pull/126) updated `minidump-common` -> 0.22. + ## [0.8.9] - 2024-04-01 ### Fixed - [PR#110](https://github.com/rust-minidump/minidump-writer/pull/110) changed it so that `SIGCONT` is sent regardless if the process was not able to be `SIGSTOP`ed quickly enough. @@ -135,7 +142,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Initial release, including basic support for `x86_64-unknown-linux-gnu/musl` and `x86_64-pc-windows-msvc` -[Unreleased]: https://github.com/rust-minidump/minidump-writer/compare/0.8.9...HEAD +[Unreleased]: https://github.com/rust-minidump/minidump-writer/compare/0.9.0...HEAD +[0.9.0]: https://github.com/rust-minidump/minidump-writer/compare/0.8.9...0.9.0 [0.8.9]: https://github.com/rust-minidump/minidump-writer/compare/0.8.8...0.8.9 [0.8.8]: https://github.com/rust-minidump/minidump-writer/compare/0.8.7...0.8.8 [0.8.7]: https://github.com/rust-minidump/minidump-writer/compare/0.8.6...0.8.7 diff --git a/third_party/rust/minidump-writer/Cargo.lock b/third_party/rust/minidump-writer/Cargo.lock index 0b6c79825c3b..8770dd7c3f4f 100644 --- a/third_party/rust/minidump-writer/Cargo.lock +++ b/third_party/rust/minidump-writer/Cargo.lock @@ -4,12 +4,21 @@ version = 3 [[package]] name = "addr2line" -version = "0.21.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +dependencies = [ + "gimli 0.29.0", +] + +[[package]] +name = "addr2line" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b9d03130b08257bc8110b0df827d8b137fdf67a95e2459eaace2e13fecf1d72" dependencies = [ "fallible-iterator 0.3.0", - "gimli", + "gimli 0.30.0", ] [[package]] @@ -20,20 +29,21 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "ahash" -version = "0.7.8" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ - "getrandom", + "cfg-if", "once_cell", "version_check", + "zerocopy", ] [[package]] name = "aho-corasick" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -54,10 +64,25 @@ dependencies = [ ] [[package]] -name = "anyhow" -version = "1.0.80" +name = "allocator-api2" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + +[[package]] +name = "arbitrary" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" +dependencies = [ + "derive_arbitrary", +] [[package]] name = "arrayvec" @@ -67,13 +92,14 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "async-compression" -version = "0.4.6" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a116f46a969224200a0a97f29cfd4c50e7534e4b4826bd23ea2c3c533039c82c" +checksum = "cd066d0b4ef8ecb03a55319dc13aa6910616d0f44008a045bb1835af830abff5" dependencies = [ "brotli", "flate2", "futures-core", + "futures-io", "memchr", "pin-project-lite", "tokio", @@ -81,28 +107,28 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.77" +version = "0.1.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" +checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn", ] [[package]] name = "autocfg" -version = "1.1.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" -version = "0.3.69" +version = "0.3.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" dependencies = [ - "addr2line", + "addr2line 0.22.0", "cc", "cfg-if", "libc", @@ -113,9 +139,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.7" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "binary-merge" @@ -131,9 +157,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.2" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "block-buffer" @@ -146,9 +172,9 @@ dependencies = [ [[package]] name = "breakpad-symbols" -version = "0.21.1" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14722511e9c032b38689b0e952a633826850873adb10de56ec775b217a519366" +checksum = "7e1ad3f5e2e5c8a42fccedd6792cc05968b39b69c3fe7b5544072ac052f3fe85" dependencies = [ "async-trait", "cachemap2", @@ -164,9 +190,9 @@ dependencies = [ [[package]] name = "brotli" -version = "3.4.0" +version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" +checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -175,9 +201,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "2.5.1" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -205,9 +231,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.15.3" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea184aa71bb362a1157c896979544cc23974e08fd265f29ea96b59f0b4a555b" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "byteorder" @@ -217,31 +243,19 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "cab" -version = "0.4.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae6b4de23c7d39c0631fd3cc952d87951c86c75a13812d7247cb7a896e7b3551" +checksum = "171228650e6721d5acc0868a462cd864f49ac5f64e4a42cde270406e64e404d2" dependencies = [ "byteorder", "flate2", - "lzxd 0.1.4", - "time", -] - -[[package]] -name = "cab" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e8636d870cf15918e416b7904f0db1cbd06de0ffe392986c3b16662552df00c" -dependencies = [ - "byteorder", - "flate2", - "lzxd 0.2.5", + "lzxd", "time", ] @@ -253,9 +267,14 @@ checksum = "68ccbd3153aa153b2f5eff557537ffce81e4dd6c50ae0eddc41dc8d0c388436f" [[package]] name = "cc" -version = "1.0.89" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0ba8f7aaa012f30d5b2861462f6708eccd49c3c39863fe083a308035f63d723" +checksum = "066fce287b1d4eafef758e89e09d724a24808a9196fe9756b8ca90e86d0719a2" +dependencies = [ + "jobserver", + "libc", + "once_cell", +] [[package]] name = "cfg-if" @@ -323,9 +342,9 @@ dependencies = [ [[package]] name = "crash-context" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b85cef661eeca0c6675116310936972c520ebb0a33ddef16fd7efc957f4c1288" +checksum = "5e8783e947fb92cd6d4f10c644aed05cc8308e916196fb8ad82f63b91df34b87" dependencies = [ "cfg-if", "libc", @@ -334,9 +353,9 @@ dependencies = [ [[package]] name = "crc" -version = "3.0.1" +version = "3.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86ec7a15cbe22e59248fc7eadb1907dab5ba09372595da4d73dd805ed4417dfe" +checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636" dependencies = [ "crc-catalog", ] @@ -349,9 +368,9 @@ checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" [[package]] name = "crc32fast" -version = "1.4.0" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" dependencies = [ "cfg-if", ] @@ -371,9 +390,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.12" +version = "0.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" dependencies = [ "crossbeam-utils", ] @@ -408,9 +427,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.19" +version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "crypto-common" @@ -448,14 +467,14 @@ dependencies = [ ] [[package]] -name = "derive_more" -version = "0.99.17" +name = "derive_arbitrary" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -468,33 +487,13 @@ dependencies = [ "crypto-common", ] -[[package]] -name = "dirs" -version = "4.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" -dependencies = [ - "dirs-sys 0.3.7", -] - [[package]] name = "dirs" version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" dependencies = [ - "dirs-sys 0.4.1", -] - -[[package]] -name = "dirs-sys" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" -dependencies = [ - "libc", - "redox_users", - "winapi", + "dirs-sys", ] [[package]] @@ -509,6 +508,17 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "dmsort" version = "1.0.2" @@ -517,17 +527,17 @@ checksum = "f0bc8fbe9441c17c9f46f75dfe27fa1ddb6c68a461ccaed0481419219d4f10d3" [[package]] name = "dump_syms" -version = "2.2.2" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f33fc75a82f02047c9f4cebd1c6f0e12872aea9c217d841a5b57dd42fdf82990" +checksum = "7f422400fd108f0bae551b7f81093d86712926d94376f70f0b679c2ac3a54e7c" dependencies = [ "anyhow", - "bitflags 2.4.2", - "cab 0.4.1", + "bitflags 2.6.0", + "cab", "crossbeam", - "dirs 4.0.0", - "goblin 0.7.1", - "hashbrown 0.12.3", + "dirs", + "goblin", + "hashbrown", "log", "lzma-rs", "num_cpus", @@ -541,6 +551,12 @@ dependencies = [ "uuid", ] +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + [[package]] name = "elementtree" version = "1.2.3" @@ -567,9 +583,9 @@ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" [[package]] name = "encoding_rs" -version = "0.8.33" +version = "0.8.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" dependencies = [ "cfg-if", ] @@ -582,9 +598,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" dependencies = [ "libc", "windows-sys 0.52.0", @@ -604,15 +620,15 @@ checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" [[package]] name = "fastrand" -version = "2.0.1" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" [[package]] name = "flate2" -version = "1.0.28" +version = "1.0.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" dependencies = [ "crc32fast", "miniz_oxide", @@ -635,29 +651,26 @@ dependencies = [ [[package]] name = "framehop" -version = "0.9.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38bb3ea0d42943711eafa7a6182b47a21d51247d2ecad6641ff61d9213d099ea" +checksum = "0fd28d2036d4fd99e3629487baca659e5af1c5d554e320168613be79028610fc" dependencies = [ "arrayvec", + "cfg-if", "fallible-iterator 0.3.0", - "gimli", + "gimli 0.30.0", "macho-unwind-info", - "object", "pe-unwind-info", - "thiserror", ] [[package]] name = "fs4" -version = "0.7.0" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29f9df8a11882c4e3335eb2d18a0137c505d9ca927470b0cac9c6f0ae07d28f7" +checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" dependencies = [ - "async-trait", "rustix", - "tokio", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -716,7 +729,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn", ] [[package]] @@ -761,9 +774,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.12" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", @@ -772,72 +785,42 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.1" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" + +[[package]] +name = "gimli" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e1d97fbe9722ba9bbd0c97051c2956e726562b61f86a25a4360398a40edfc9" dependencies = [ "fallible-iterator 0.3.0", - "indexmap", "stable_deref_trait", ] [[package]] name = "goblin" -version = "0.7.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f27c1b4369c2cd341b5de549380158b105a04c331be5db9110eef7b6d2742134" -dependencies = [ - "log", - "plain", - "scroll 0.11.0", -] - -[[package]] -name = "goblin" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb07a4ffed2093b118a525b1d8f5204ae274faed5604537caf7135d0f18d9887" +checksum = "1b363a30c165f666402fe6a3024d3bec7ebc898f96a4a23bd1c99f8dbf3f4f47" dependencies = [ "log", "plain", "scroll 0.12.0", ] -[[package]] -name = "h2" -version = "0.3.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http", - "indexmap", - "slab", - "tokio", - "tokio-util", - "tracing", -] - [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash", + "allocator-api2", "serde", ] -[[package]] -name = "hashbrown" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" - [[package]] name = "hermit-abi" version = "0.3.9" @@ -852,9 +835,9 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "http" -version = "0.2.12" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" dependencies = [ "bytes", "fnv", @@ -863,63 +846,88 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.6" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" dependencies = [ "bytes", "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http", + "http-body", "pin-project-lite", ] [[package]] name = "httparse" -version = "1.8.0" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" - -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" [[package]] name = "hyper" -version = "0.14.28" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" +checksum = "c4fe55fb7a772d59a5ff1dfbff4fe0258d19b89fec4b233e75d35d5d2316badc" dependencies = [ "bytes", "futures-channel", - "futures-core", "futures-util", - "h2", "http", "http-body", "httparse", - "httpdate", "itoa", "pin-project-lite", - "socket2", + "smallvec", "tokio", - "tower-service", - "tracing", "want", ] [[package]] name = "hyper-rustls" -version = "0.24.2" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" dependencies = [ "futures-util", "http", "hyper", + "hyper-util", "rustls", + "rustls-pki-types", "tokio", "tokio-rustls", + "tower-service", + "webpki-roots", +] + +[[package]] +name = "hyper-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ab92f4f49ee4fb4f997c784b7a2e0fa70050211e0b6a287f898c3c9785ca956" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "pin-project-lite", + "socket2", + "tokio", + "tower", + "tower-service", + "tracing", ] [[package]] @@ -940,12 +948,13 @@ checksum = "0cfe9645a18782869361d9c8732246be7b410ad4e919d3609ebabdac00ba12c3" [[package]] name = "indexmap" -version = "2.2.5" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", - "hashbrown 0.14.3", + "hashbrown", + "serde", ] [[package]] @@ -964,10 +973,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" [[package]] -name = "itoa" -version = "1.0.10" +name = "itertools" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "jobserver" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +dependencies = [ + "libc", +] [[package]] name = "joinery" @@ -986,9 +1013,9 @@ dependencies = [ [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "leb128" @@ -998,19 +1025,18 @@ checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" [[package]] name = "libc" -version = "0.2.153" +version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] name = "libredox" -version = "0.0.1" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.6.0", "libc", - "redox_syscall", ] [[package]] @@ -1021,24 +1047,26 @@ checksum = "bfae20f6b19ad527b550c223fddc3077a547fc70cda94b9b566575423fd303ee" [[package]] name = "linux-perf-data" -version = "0.8.2" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51e5edc4ed86c4995d6fe86ed4153727d27b4f8be7167d14b05d74e41c16b087" +checksum = "3cd34317b7ef6e67579faf5021099ff15faa873a082e8b2c46335acbd7147935" dependencies = [ "byteorder", "linear-map", "linux-perf-event-reader", "memchr", + "prost", + "prost-derive", "thiserror", ] [[package]] name = "linux-perf-event-reader" -version = "0.9.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e93938b52460a3e539c9465592c5f645c7ca3847da1d7dbee20e3564d55c5a90" +checksum = "a563f3068af081e11c8b03dc1fb8ff5654993ce538f7a4b1806e3461a8b86166" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", "byteorder", "memchr", "thiserror", @@ -1046,25 +1074,31 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "lock_api" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" dependencies = [ "autocfg", "scopeguard", ] [[package]] -name = "log" -version = "0.4.21" +name = "lockfree-object-pool" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "lzma-rs" @@ -1076,12 +1110,6 @@ dependencies = [ "crc", ] -[[package]] -name = "lzxd" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784462f20dddd9dfdb45de963fa4ad4a288cb10a7889ac5d2c34fb6481c6b213" - [[package]] name = "lzxd" version = "0.2.5" @@ -1116,9 +1144,9 @@ checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4" [[package]] name = "memchr" -version = "2.7.1" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memmap2" @@ -1131,9 +1159,9 @@ dependencies = [ [[package]] name = "memoffset" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" dependencies = [ "autocfg", ] @@ -1146,9 +1174,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "minidump" -version = "0.21.1" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64cb8353fdcd59c0caaf7525cb83b29ee02e6eac4c85d5a7944a9773d0f33f2e" +checksum = "aefb80650628de087057ed167e3e1ef5bed65dc4b1bd28d47cd707c3848adce2" dependencies = [ "debugid", "encoding_rs", @@ -1166,11 +1194,11 @@ dependencies = [ [[package]] name = "minidump-common" -version = "0.21.1" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bb6eaf88cc770fa58e6ae721cf2e40c2ca6a4c942ae8c7aa324d680bd3c6717" +checksum = "95a2b640f80e5514f49509ff1f97fb24693f95ef5be5ed810d70df4283a68acc" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.6.0", "debugid", "num-derive", "num-traits", @@ -1181,9 +1209,9 @@ dependencies = [ [[package]] name = "minidump-processor" -version = "0.21.1" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62bc8e39bb830c2e92ff0f3af8166f3fbc3ad2dc0c843c66de3b918b71190a6c" +checksum = "4d330a92d90c5699e8edd32f8036a1b5afadd6df000eb201fac258d149f8ca78" dependencies = [ "async-trait", "breakpad-symbols", @@ -1201,9 +1229,9 @@ dependencies = [ [[package]] name = "minidump-unwind" -version = "0.21.1" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2096876dbb3d5a0bbc5c42a913d44e935cb7f6139dcf9eda043ae1ce702ab529" +checksum = "afb5af4cbb631c54fe8c0c058799e9ac95b31c6e282f1afaaaaad10c2c441fcb" dependencies = [ "async-trait", "breakpad-symbols", @@ -1221,16 +1249,16 @@ dependencies = [ [[package]] name = "minidump-writer" -version = "0.8.9" +version = "0.9.0" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.6.0", "byteorder", "cfg-if", "crash-context", "current_platform", "dump_syms", "futures", - "goblin 0.8.0", + "goblin", "libc", "log", "mach2", @@ -1257,9 +1285,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.2" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" dependencies = [ "adler", ] @@ -1277,27 +1305,18 @@ dependencies = [ [[package]] name = "msvc-demangler" -version = "0.9.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfb67c6dd0fa9b00619c41c5700b6f92d5f418be49b45ddb9970fbd4569df3c8" +checksum = "c4c25a3bb7d880e8eceab4822f3141ad0700d20f025991c1f03bd3d00219a5fc" dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "msvc-demangler" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2588c982e3a7fbfbd73b21f824cacc43fc6392a1103c709ffd6001c0bf33fdb3" -dependencies = [ - "bitflags 2.4.2", + "bitflags 2.6.0", ] [[package]] name = "new_debug_unreachable" -version = "1.0.4" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" [[package]] name = "nix" @@ -1305,7 +1324,7 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.6.0", "cfg-if", "cfg_aliases", "libc", @@ -1348,14 +1367,14 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn", ] [[package]] name = "num-traits" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] @@ -1372,9 +1391,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.2" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce" dependencies = [ "flate2", "memchr", @@ -1395,9 +1414,9 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "parking_lot" -version = "0.12.1" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" dependencies = [ "lock_api", "parking_lot_core", @@ -1405,15 +1424,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -1447,7 +1466,7 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb51ef7ed9998e108891711812822831daac0b17d67768c3bdc69aa909366123" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.6.0", "elsa", "maybe-owned", "pdb2", @@ -1457,12 +1476,12 @@ dependencies = [ [[package]] name = "pdb2" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e30e131bcab0d41a2e471cf777ea9b1402f2a0764bcf1780251eab1b0d175d" +checksum = "51690a9810e8a4f711186ec92e4b376089e23c53e51380c340ea197fd4f99fe5" dependencies = [ - "fallible-iterator 0.2.0", - "scroll 0.11.0", + "fallible-iterator 0.3.0", + "scroll 0.12.0", "uuid", ] @@ -1473,7 +1492,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ec3b43050c38ffb9de87e17d874e9956e3a9131b343c9b7b7002597727c3891" dependencies = [ "arrayvec", - "bitflags 2.4.2", + "bitflags 2.6.0", "thiserror", "zerocopy", "zerocopy-derive", @@ -1495,10 +1514,30 @@ dependencies = [ ] [[package]] -name = "pin-project-lite" -version = "0.2.13" +name = "pin-project" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" +checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" [[package]] name = "pin-utils" @@ -1506,6 +1545,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + [[package]] name = "plain" version = "0.2.3" @@ -1518,6 +1563,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + [[package]] name = "precomputed-hash" version = "0.1.1" @@ -1526,9 +1577,9 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" [[package]] name = "proc-macro2" -version = "1.0.78" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] @@ -1539,19 +1590,118 @@ version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.6.0", "hex", ] [[package]] -name = "quote" -version = "1.0.35" +name = "prost" +version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" +dependencies = [ + "bytes", +] + +[[package]] +name = "prost-derive" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "quinn" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4ceeeeabace7857413798eb1ffa1e9c905a9946a57d81fb69b4b71c4d8eb3ad" +dependencies = [ + "bytes", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "quinn-proto" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddf517c03a109db8100448a4be38d498df8a210a99fe0e1b9eaf39e78c640efe" +dependencies = [ + "bytes", + "rand", + "ring", + "rustc-hash", + "rustls", + "slab", + "thiserror", + "tinyvec", + "tracing", +] + +[[package]] +name = "quinn-udp" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9096629c45860fc7fb143e125eb826b5e721e10be3263160c7d60ca832cf8c46" +dependencies = [ + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.52.0", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + [[package]] name = "range-collections" version = "0.2.4" @@ -1592,18 +1742,18 @@ checksum = "f60fcc7d6849342eff22c4350c8b9a989ee8ceabc4b481253e8946b9fe83d684" [[package]] name = "redox_syscall" -version = "0.4.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", ] [[package]] name = "redox_users" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" +checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" dependencies = [ "getrandom", "libredox", @@ -1612,33 +1762,33 @@ dependencies = [ [[package]] name = "ref-cast" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4846d4c50d1721b1a3bef8af76924eef20d5e723647333798c1b519b3a9473f" +checksum = "ccf0a6f84d5f1d581da8b41b47ec8600871962f2a528115b542b362d4b744931" dependencies = [ "ref-cast-impl", ] [[package]] name = "ref-cast-impl" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fddb4f8d99b0a2ebafc65a87a69a7b9875e4b1ae1f00db265d300ef7f28bccc" +checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn", ] [[package]] name = "regex" -version = "1.10.3" +version = "1.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.6", + "regex-automata 0.4.7", "regex-syntax", ] @@ -1650,9 +1800,9 @@ checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", @@ -1661,27 +1811,27 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "reqwest" -version = "0.11.24" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6920094eb85afde5e4a138be3f2de8bbdf28000f0029e72c45025a56b042251" +checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" dependencies = [ "async-compression", "base64", "bytes", - "encoding_rs", "futures-core", "futures-util", - "h2", "http", "http-body", + "http-body-util", "hyper", "hyper-rustls", + "hyper-util", "ipnet", "js-sys", "log", @@ -1689,13 +1839,14 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", + "quinn", "rustls", "rustls-pemfile", + "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", - "system-configuration", "tokio", "tokio-rustls", "tokio-util", @@ -1726,17 +1877,23 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.38.31" +version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", @@ -1745,75 +1902,85 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.10" +version = "0.23.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" +checksum = "4828ea528154ae444e5a642dbb7d5623354030dc9822b83fd9bb79683c7399d0" dependencies = [ - "log", + "once_cell", "ring", + "rustls-pki-types", "rustls-webpki", - "sct", + "subtle", + "zeroize", ] [[package]] name = "rustls-pemfile" -version = "1.0.4" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" dependencies = [ "base64", + "rustls-pki-types", ] [[package]] -name = "rustls-webpki" -version = "0.101.7" +name = "rustls-pki-types" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" + +[[package]] +name = "rustls-webpki" +version = "0.102.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9a6fccd794a42c2c105b513a2f62bc3fd8f3ba57a4593677ceb0bd035164d78" dependencies = [ "ring", + "rustls-pki-types", "untrusted", ] [[package]] name = "ruzstd" -version = "0.5.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58c4eb8a81997cf040a091d1f7e1938aeab6749d3a0dfa73af43cdc32393483d" +checksum = "5022b253619b1ba797f243056276bed8ed1a73b0f5a7ce7225d524067644bf8f" dependencies = [ "byteorder", - "derive_more", "twox-hash", ] [[package]] name = "ryu" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "samply-symbols" -version = "0.21.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b02bfab817ed66f35a0afa07216a02325b82faf777e20fdcd0dadd5ebfd8eb" +checksum = "bbcdc8625b92ae1b981f37d435599effed5464dd1f37c01f88ccf83c0e90b54d" dependencies = [ - "addr2line", - "bitflags 2.4.2", + "addr2line 0.23.0", + "bitflags 2.6.0", "cpp_demangle", "debugid", "elsa", "flate2", - "gimli", + "gimli 0.30.0", "linux-perf-data", "lzma-rs", "macho-unwind-info", "memchr", - "msvc-demangler 0.10.0", + "msvc-demangler", "nom", "object", "pdb-addr2line 0.11.0", "rangemap", "rustc-demangle", + "scala-native-demangle", "srcsrv", "thiserror", "uuid", @@ -1823,6 +1990,12 @@ dependencies = [ "zerocopy-derive", ] +[[package]] +name = "scala-native-demangle" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a4416eddc0eaf31e04aa4039bd3db4288ea1ba613955d86cf9c310049c5d1e2" + [[package]] name = "scopeguard" version = "1.2.0" @@ -1834,9 +2007,6 @@ name = "scroll" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" -dependencies = [ - "scroll_derive 0.11.1", -] [[package]] name = "scroll" @@ -1844,18 +2014,7 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ab8598aa408498679922eff7fa985c25d58a90771bd6be794434c5277eab1a6" dependencies = [ - "scroll_derive 0.12.0", -] - -[[package]] -name = "scroll_derive" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1db149f81d46d2deba7cd3c50772474707729550221e69588478ebf9ada425ae" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.52", + "scroll_derive", ] [[package]] @@ -1866,50 +2025,40 @@ checksum = "7f81c2fde025af7e69b1d1420531c8a8811ca898919db177141a85313b1cb932" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", -] - -[[package]] -name = "sct" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring", - "untrusted", + "syn", ] [[package]] name = "semver" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.197" +version = "1.0.204" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.197" +version = "1.0.204" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn", ] [[package]] name = "serde_json" -version = "1.0.114" +version = "1.0.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" +checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" dependencies = [ "itoa", "ryu", @@ -1940,10 +2089,16 @@ dependencies = [ ] [[package]] -name = "similar" -version = "2.4.0" +name = "simd-adler32" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32fea41aca09ee824cc9724996433064c89f7777e60762749a4170a14abbfa21" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + +[[package]] +name = "similar" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa42c91313f1d05da9b26f267f931cf178d4aba455b4c4622dd7355eb80c6640" dependencies = [ "bstr", "unicode-segmentation", @@ -1976,9 +2131,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.1" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "smart-default" @@ -1988,14 +2143,14 @@ checksum = "0eb01866308440fc64d6c44d9e86c5cc17adfe33c4d6eed55da9145044d0ffc1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn", ] [[package]] name = "socket2" -version = "0.5.6" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" dependencies = [ "libc", "windows-sys 0.52.0", @@ -2044,10 +2199,16 @@ dependencies = [ ] [[package]] -name = "symbolic" -version = "12.8.0" +name = "subtle" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05ec4f53c56d7ee8809c2322925d362e193bcc7bbe7e777a3304b34ea7e85a36" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "symbolic" +version = "12.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85aabcf85c883278298596217d678c8d3ca256445d732eac59303ce04863c46f" dependencies = [ "symbolic-cfi", "symbolic-common", @@ -2057,9 +2218,9 @@ dependencies = [ [[package]] name = "symbolic-cfi" -version = "12.8.0" +version = "12.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a3724d1d1a3e531038da32de84725fd93d0afeeee51de4d3871cf657f283ad9" +checksum = "63ed43f6b8769d681296cbbf6f108bed81465f04f3bc3358d0cd76dcc6d8cd27" dependencies = [ "symbolic-common", "symbolic-debuginfo", @@ -2068,9 +2229,9 @@ dependencies = [ [[package]] name = "symbolic-common" -version = "12.8.0" +version = "12.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cccfffbc6bb3bb2d3a26cd2077f4d055f6808d266f9d4d158797a4c60510dfe" +checksum = "71297dc3e250f7dbdf8adb99e235da783d690f5819fdeb4cce39d9cfb0aca9f1" dependencies = [ "debugid", "memmap2", @@ -2080,9 +2241,9 @@ dependencies = [ [[package]] name = "symbolic-debuginfo" -version = "12.8.0" +version = "12.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb52777be67777947c5a159f1b6e8bfe4473d91fad7e5d4aff85ee4d3963cc04" +checksum = "abdc791ca87a69a5d09913d87f1e5ac95229be414ec0ff6c0fe2ddff6199f3b6" dependencies = [ "debugid", "dmsort", @@ -2090,8 +2251,8 @@ dependencies = [ "elsa", "fallible-iterator 0.3.0", "flate2", - "gimli", - "goblin 0.7.1", + "gimli 0.30.0", + "goblin", "lazy_static", "nom", "nom-supreme", @@ -2099,7 +2260,7 @@ dependencies = [ "parking_lot", "pdb-addr2line 0.10.4", "regex", - "scroll 0.11.0", + "scroll 0.12.0", "serde", "serde_json", "smallvec", @@ -2108,26 +2269,27 @@ dependencies = [ "thiserror", "wasmparser", "zip", + "zstd", ] [[package]] name = "symbolic-demangle" -version = "12.8.0" +version = "12.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76a99812da4020a67e76c4eb41f08c87364c14170495ff780f30dd519c221a68" +checksum = "424fa2c9bf2c862891b9cfd354a752751a6730fd838a4691e7f6c2c7957b9daf" dependencies = [ "cc", "cpp_demangle", - "msvc-demangler 0.9.0", + "msvc-demangler", "rustc-demangle", "symbolic-common", ] [[package]] name = "symbolic-ppdb" -version = "12.8.0" +version = "12.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dace84623ccc926886fc880c36e2a81af4b17f8276abc4d77dc947ca3c6c8f8c" +checksum = "92ccffa1e6b313c007dddcc3a91166a64055a0a956e1429ee179a808fa3b2c62" dependencies = [ "flate2", "indexmap", @@ -2141,35 +2303,27 @@ dependencies = [ [[package]] name = "symsrv" -version = "0.3.0" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b6a9f40855a5ba9f9008b2c12eef13a67d2655459ba9f74970aea341e0fb5e9" +checksum = "bc9eb3fb03ab32475d1107c2b3d2efcd48cb5e9278e7b39186598df7a049a1da" dependencies = [ - "cab 0.5.0", - "dirs 5.0.1", + "async-compression", + "cab", + "dirs", "fs4", "futures-util", + "http", "reqwest", + "scopeguard", "thiserror", "tokio", ] [[package]] name = "syn" -version = "1.0.109" +version = "2.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.52" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" +checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16" dependencies = [ "proc-macro2", "quote", @@ -2178,9 +2332,9 @@ dependencies = [ [[package]] name = "sync_wrapper" -version = "0.1.2" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" [[package]] name = "synstructure" @@ -2190,28 +2344,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", -] - -[[package]] -name = "system-configuration" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" -dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "system-configuration-sys", -] - -[[package]] -name = "system-configuration-sys" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" -dependencies = [ - "core-foundation-sys", - "libc", + "syn", ] [[package]] @@ -2228,29 +2361,29 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.57" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.57" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn", ] [[package]] name = "time" -version = "0.3.34" +version = "0.3.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" dependencies = [ "deranged", "itoa", @@ -2269,9 +2402,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" dependencies = [ "num-conv", "time-core", @@ -2279,9 +2412,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.6.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" dependencies = [ "tinyvec_macros", ] @@ -2294,9 +2427,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.36.0" +version = "1.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" dependencies = [ "backtrace", "bytes", @@ -2309,28 +2442,49 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.24.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" dependencies = [ "rustls", + "rustls-pki-types", "tokio", ] [[package]] name = "tokio-util" -version = "0.7.10" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" +checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" dependencies = [ "bytes", "futures-core", "futures-sink", "pin-project-lite", "tokio", - "tracing", ] +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "pin-project", + "pin-project-lite", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" + [[package]] name = "tower-service" version = "0.3.2" @@ -2357,7 +2511,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn", ] [[package]] @@ -2426,9 +2580,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.0" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" dependencies = [ "form_urlencoded", "idna", @@ -2437,9 +2591,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.7.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" +checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" [[package]] name = "version_check" @@ -2483,7 +2637,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.52", + "syn", "wasm-bindgen-shared", ] @@ -2517,7 +2671,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2543,12 +2697,16 @@ dependencies = [ [[package]] name = "wasmparser" -version = "0.118.2" +version = "0.209.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77f1154f1ab868e2a01d9834a805faca7bf8b50d041b4ca714d005d0dab1c50c" +checksum = "07035cc9a9b41e62d3bb3a3815a66ab87c993c06fe1cf6b2a3f2a18499d937db" dependencies = [ + "ahash", + "bitflags 2.6.0", + "hashbrown", "indexmap", "semver", + "serde", ] [[package]] @@ -2573,15 +2731,18 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.25.4" +version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" +checksum = "bd7c23921eeb1713a4e851530e9b9756e4fb0e89978582942612524cf09f01cd" +dependencies = [ + "rustls-pki-types", +] [[package]] name = "wholesym" -version = "0.4.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9050809aead9c09ea73d2719d7639b55ea2803287b008dc1bb24b647661afa7c" +checksum = "0c9d822684c5f5eb54218ff1d70fa8d436f194b3459fb619af23ddaf40aa7edc" dependencies = [ "bytes", "core-foundation", @@ -2599,28 +2760,6 @@ dependencies = [ "yoke-derive", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-sys" version = "0.48.0" @@ -2636,7 +2775,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.6", ] [[package]] @@ -2656,17 +2795,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.4", - "windows_aarch64_msvc 0.52.4", - "windows_i686_gnu 0.52.4", - "windows_i686_msvc 0.52.4", - "windows_x86_64_gnu 0.52.4", - "windows_x86_64_gnullvm 0.52.4", - "windows_x86_64_msvc 0.52.4", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -2677,9 +2817,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -2689,9 +2829,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -2701,9 +2841,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -2713,9 +2859,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -2725,9 +2871,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -2737,9 +2883,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -2749,15 +2895,15 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winreg" -version = "0.50.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" dependencies = [ "cfg-if", "windows-sys 0.48.0", @@ -2765,9 +2911,9 @@ dependencies = [ [[package]] name = "yoke" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e71b2e4f287f467794c671e2b8f8a5f3716b3c829079a1c44740148eff07e4" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" dependencies = [ "serde", "stable_deref_trait", @@ -2776,21 +2922,21 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e6936f0cce458098a201c245a11bef556c6a0181129c7034d10d76d1ec3a2b8" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn", "synstructure", ] [[package]] name = "zerocopy" -version = "0.7.32" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ "byteorder", "zerocopy-derive", @@ -2798,29 +2944,82 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.7.32" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn", ] [[package]] name = "zerofrom" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655b0814c5c0b19ade497851070c640773304939a6c0fd5f5fb43da0696d05b7" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" + +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" [[package]] name = "zip" -version = "0.6.6" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" +checksum = "1dd56a4d5921bc2f99947ac5b3abe5f510b1be7376fdc5e9fce4a23c6a93e87c" dependencies = [ - "byteorder", + "arbitrary", "crc32fast", "crossbeam-utils", + "displaydoc", "flate2", + "indexmap", + "memchr", + "thiserror", + "zopfli", +] + +[[package]] +name = "zopfli" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5019f391bac5cf252e93bbcc53d039ffd62c7bfb7c150414d61369afe57e946" +dependencies = [ + "bumpalo", + "crc32fast", + "lockfree-object-pool", + "log", + "once_cell", + "simd-adler32", +] + +[[package]] +name = "zstd" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa556e971e7b568dc775c136fc9de8c779b1c2fc3a63defaafadffdbd3181afa" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.12+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +dependencies = [ + "cc", + "pkg-config", ] diff --git a/third_party/rust/minidump-writer/Cargo.toml b/third_party/rust/minidump-writer/Cargo.toml index d137677165dc..d64defd80b9a 100644 --- a/third_party/rust/minidump-writer/Cargo.toml +++ b/third_party/rust/minidump-writer/Cargo.toml @@ -12,7 +12,7 @@ [package] edition = "2021" name = "minidump-writer" -version = "0.8.9" +version = "0.9.0" authors = ["Martin Sirringhaus"] description = "Rust rewrite of Breakpad's minidump_writer" homepage = "https://github.com/rust-minidump/minidump-writer" @@ -39,7 +39,7 @@ version = "0.4" version = "0.9" [dependencies.minidump-common] -version = "0.21" +version = "0.22" [dependencies.scroll] version = "0.12" @@ -61,7 +61,7 @@ features = ["executor"] version = "0.9" [dev-dependencies.minidump] -version = "0.21" +version = "0.22" [target."cfg(any(target_os = \"linux\", target_os = \"android\"))".dependencies.nix] version = "0.28" @@ -86,11 +86,11 @@ version = "2.2" default-features = false [target."cfg(target_os = \"macos\")".dev-dependencies.minidump-processor] -version = "0.21" +version = "0.22" default-features = false [target."cfg(target_os = \"macos\")".dev-dependencies.minidump-unwind] -version = "0.21" +version = "0.22" features = ["debuginfo"] [target."cfg(target_os = \"macos\")".dev-dependencies.similar-asserts] diff --git a/third_party/rust/minidump-writer/deny.toml b/third_party/rust/minidump-writer/deny.toml index ed2811343207..bb8910de3ba4 100644 --- a/third_party/rust/minidump-writer/deny.toml +++ b/third_party/rust/minidump-writer/deny.toml @@ -1,3 +1,4 @@ +[graph] targets = [ { triple = "x86_64-unknown-linux-gnu" }, { triple = "x86_64-unknown-linux-musl" }, diff --git a/third_party/rust/minidump-writer/src/bin/test.rs b/third_party/rust/minidump-writer/src/bin/test.rs index df39b28655eb..d8ac8b6097c0 100644 --- a/third_party/rust/minidump-writer/src/bin/test.rs +++ b/third_party/rust/minidump-writer/src/bin/test.rs @@ -9,6 +9,7 @@ mod linux { use super::*; use minidump_writer::{ minidump_writer::STOP_TIMEOUT, + module_reader, ptrace_dumper::{PtraceDumper, AT_SYSINFO_EHDR}, LINUX_GATE_LIBRARY_NAME, }; @@ -90,7 +91,8 @@ mod linux { let ppid = getppid().as_raw(); let exe_link = format!("/proc/{}/exe", ppid); let exe_name = std::fs::read_link(exe_link)?.into_os_string(); - let mut dumper = PtraceDumper::new(getppid().as_raw(), STOP_TIMEOUT)?; + let mut dumper = PtraceDumper::new(ppid, STOP_TIMEOUT)?; + dumper.suspend_threads()?; let mut found_exe = None; for (idx, mapping) in dumper.mappings.iter().enumerate() { if mapping.name.as_ref().map(|x| x.into()).as_ref() == Some(&exe_name) { @@ -99,7 +101,8 @@ mod linux { } } let idx = found_exe.unwrap(); - let id = dumper.elf_identifier_for_mapping_index(idx)?; + let module_reader::BuildId(id) = dumper.from_process_memory_for_index(idx)?; + dumper.resume_threads()?; assert!(!id.is_empty()); assert!(id.iter().any(|&x| x > 0)); Ok(()) @@ -131,11 +134,12 @@ mod linux { let ppid = getppid().as_raw(); let mut dumper = PtraceDumper::new(ppid, STOP_TIMEOUT)?; let mut found_linux_gate = false; - for mut mapping in dumper.mappings.clone() { + for mapping in dumper.mappings.clone() { if mapping.name == Some(LINUX_GATE_LIBRARY_NAME.into()) { found_linux_gate = true; dumper.suspend_threads()?; - let id = PtraceDumper::elf_identifier_for_mapping(&mut mapping, ppid)?; + let module_reader::BuildId(id) = + dumper.from_process_memory_for_mapping(&mapping)?; test!(!id.is_empty(), "id-vec is empty")?; test!(id.iter().any(|&x| x > 0), "all id elements are 0")?; dumper.resume_threads()?; @@ -268,6 +272,7 @@ mod linux { pub(super) fn real_main(args: Vec) -> Result<()> { match args.len() { 1 => match args[0].as_ref() { + "nop" => Ok(()), "file_id" => test_file_id(), "setup" => test_setup(), "thread_list" => test_thread_list(), diff --git a/third_party/rust/minidump-writer/src/linux.rs b/third_party/rust/minidump-writer/src/linux.rs index b4c5b21131b1..0b68c125f7d7 100644 --- a/third_party/rust/minidump-writer/src/linux.rs +++ b/third_party/rust/minidump-writer/src/linux.rs @@ -11,6 +11,7 @@ mod dumper_cpu_info; pub mod errors; pub mod maps_reader; pub mod minidump_writer; +pub mod module_reader; pub mod ptrace_dumper; pub(crate) mod sections; pub mod thread_info; diff --git a/third_party/rust/minidump-writer/src/linux/dumper_cpu_info/x86_mips.rs b/third_party/rust/minidump-writer/src/linux/dumper_cpu_info/x86_mips.rs index cefba4fd2577..0c03d8e5f1b2 100644 --- a/third_party/rust/minidump-writer/src/linux/dumper_cpu_info/x86_mips.rs +++ b/third_party/rust/minidump-writer/src/linux/dumper_cpu_info/x86_mips.rs @@ -84,7 +84,7 @@ pub fn write_cpu_information(sys_info: &mut MDRawSystemInfo) -> Result<()> { // special case for vendor_id if field == vendor_id_name && !value.is_empty() { - vendor_id = value.to_owned(); + vendor_id = value.into(); } } } diff --git a/third_party/rust/minidump-writer/src/linux/errors.rs b/third_party/rust/minidump-writer/src/linux/errors.rs index b666fefa2b9c..29486e4738b2 100644 --- a/third_party/rust/minidump-writer/src/linux/errors.rs +++ b/third_party/rust/minidump-writer/src/linux/errors.rs @@ -23,6 +23,11 @@ pub enum InitError { #[derive(Error, Debug)] pub enum MapsReaderError { + #[error("Couldn't parse as ELF file")] + ELFParsingFailed(#[from] goblin::error::Error), + #[error("No soname found (filename: {})", .0.to_string_lossy())] + NoSoName(OsString, #[source] ModuleReaderError), + // parse_from_line() #[error("Map entry malformed: No {0} found")] MapEntryMalformed(&'static str), @@ -40,14 +45,6 @@ pub enum MapsReaderError { MmapSanityCheckFailed, #[error("Symlink does not match ({0} vs. {1})")] SymlinkError(std::path::PathBuf, std::path::PathBuf), - - // fixup_deleted_file() - #[error("Couldn't parse as ELF file")] - ELFParsingFailed(#[from] goblin::error::Error), - #[error("An anonymous mapping has no associated file")] - AnonymousMapping, - #[error("No soname found (filename: {})", .0.to_string_lossy())] - NoSoName(OsString), } #[derive(Debug, Error)] @@ -118,8 +115,8 @@ pub enum DumperError { TryFromSliceError(#[from] std::array::TryFromSliceError), #[error("Couldn't parse as ELF file")] ELFParsingFailed(#[from] goblin::error::Error), - #[error("No build-id found")] - NoBuildIDFound, + #[error("Could not read value from module")] + ModuleReaderError(#[from] ModuleReaderError), #[error("Not safe to open mapping: {}", .0.to_string_lossy())] NotSafeToOpenMapping(OsString), #[error("Failed integer conversion")] @@ -251,3 +248,56 @@ pub enum WriterError { #[error("Failed to get current timestamp when writing header of minidump")] SystemTimeError(#[from] std::time::SystemTimeError), } + +#[derive(Debug, Error)] +pub enum ModuleReaderError { + #[error("failed to read module memory: {length} bytes at {offset}: {error}")] + ReadModuleMemory { + offset: u64, + length: u64, + #[source] + error: std::io::Error, + }, + #[error("failed to parse ELF memory: {0}")] + Parsing(#[from] goblin::error::Error), + #[error("no build id notes in program headers")] + NoProgramHeaderNote, + #[error("no string table available to locate note sections")] + NoStrTab, + #[error("no build id note sections")] + NoSectionNote, + #[error("the ELF data contains no program headers")] + NoProgramHeaders, + #[error("the ELF data contains no sections")] + NoSections, + #[error("the ELF data does not have a .text section from which to generate a build id")] + NoTextSection, + #[error( + "failed to calculate build id\n\ + ... from program headers: {program_headers}\n\ + ... from sections: {section}\n\ + ... from the text section: {section}" + )] + NoBuildId { + program_headers: Box, + section: Box, + generated: Box, + }, + #[error("no dynamic string table section")] + NoDynStrSection, + #[error("a string in the strtab did not have a terminating nul byte")] + StrTabNoNulByte, + #[error("no SONAME found in dynamic linking information")] + NoSoNameEntry, + #[error("no dynamic linking information section")] + NoDynamicSection, + #[error( + "failed to retrieve soname\n\ + ... from program headers: {program_headers}\n\ + ... from sections: {section}" + )] + NoSoName { + program_headers: Box, + section: Box, + }, +} diff --git a/third_party/rust/minidump-writer/src/linux/maps_reader.rs b/third_party/rust/minidump-writer/src/linux/maps_reader.rs index b5b7fb23e6db..66fa2f461e07 100644 --- a/third_party/rust/minidump-writer/src/linux/maps_reader.rs +++ b/third_party/rust/minidump-writer/src/linux/maps_reader.rs @@ -1,12 +1,11 @@ use crate::auxv_reader::AuxvType; use crate::errors::MapsReaderError; -use crate::thread_info::Pid; use byteorder::{NativeEndian, ReadBytesExt}; use goblin::elf; use memmap2::{Mmap, MmapOptions}; use procfs_core::process::{MMPermissions, MMapPath, MemoryMaps}; use std::ffi::{OsStr, OsString}; -use std::os::unix::ffi::OsStrExt; +use std::os::unix::ffi::{OsStrExt, OsStringExt}; use std::{fs::File, mem::size_of, path::PathBuf}; pub const LINUX_GATE_LIBRARY_NAME: &str = "linux-gate.so"; @@ -64,6 +63,17 @@ fn is_mapping_a_path(pathname: Option<&OsStr>) -> bool { } } +/// Sanitize mapped paths. +/// +/// This removes a ` (deleted)` suffix, if present. +fn sanitize_path(pathname: OsString) -> OsString { + if let Some(bytes) = pathname.as_bytes().strip_suffix(DELETED_SUFFIX) { + OsString::from_vec(bytes.to_owned()) + } else { + pathname + } +} + impl MappingInfo { /// Return whether the `name` field is a path (contains a `/`). pub fn name_is_path(&self) -> bool { @@ -87,7 +97,7 @@ impl MappingInfo { let mut offset: usize = mm.offset.try_into()?; let mut pathname: Option = match mm.pathname { - MMapPath::Path(p) => Some(p.into()), + MMapPath::Path(p) => Some(sanitize_path(p.into())), MMapPath::Heap => Some("[heap]".into()), MMapPath::Stack => Some("[stack]".into()), MMapPath::TStack(i) => Some(format!("[stack:{i}]").into()), @@ -197,52 +207,6 @@ impl MappingInfo { Ok(mapped_file) } - /// Check whether the mapping refers to a deleted file, and if so try to find the file - /// elsewhere and return that path. - /// - /// Currently this only supports fixing a deleted file that was the main exe of the given - /// `pid`. - /// - /// Returns a tuple, where the first element is the file path (which is possibly different than - /// `self.name`), and the second element is the original file path if a different path was - /// used. If no mapping name exists, returns an error. - pub fn fixup_deleted_file(&self, pid: Pid) -> Result<(OsString, Option<&OsStr>)> { - // Check for ' (deleted)' in |path|. - // |path| has to be at least as long as "/x (deleted)". - let Some(path) = &self.name else { - return Err(MapsReaderError::AnonymousMapping); - }; - - let Some(old_path) = path.as_bytes().strip_suffix(DELETED_SUFFIX) else { - return Ok((path.clone(), None)); - }; - - // Check |path| against the /proc/pid/exe 'symlink'. - let exe_link = format!("/proc/{}/exe", pid); - let link_path = std::fs::read_link(&exe_link)?; - - // This is a no-op for now (until we want to support root_prefix for chroot-envs) - // if (!GetMappingAbsolutePath(new_mapping, new_path)) - // return false; - - if &link_path != path { - return Err(MapsReaderError::SymlinkError( - PathBuf::from(path), - link_path, - )); - } - - // Check to see if someone actually named their executable 'foo (deleted)'. - - // This makes currently no sense, as exe_link == new_path - // if let (Some(exe_stat), Some(new_path_stat)) = (nix::stat::stat(exe_link), nix::stat::stat(new_path)) { - // if exe_stat.st_dev == new_path_stat.st_dev && exe_stat.st_ino == new_path_stat.st_ino { - // return Err("".into()); - // } - // } - Ok((exe_link.into(), Some(OsStr::from_bytes(old_path)))) - } - pub fn stack_has_pointer_to_mapping(&self, stack_copy: &[u8], sp_offset: usize) -> bool { // Loop over all stack words that would have been on the stack in // the target process (i.e. are word aligned, and at addresses >= @@ -292,14 +256,13 @@ impl MappingInfo { /// Find the shared object name (SONAME) by examining the ELF information /// for the mapping. fn so_name(&self) -> Result { + use super::module_reader::{ReadFromModule, SoName}; + let mapped_file = MappingInfo::get_mmap(&self.name, self.offset)?; - - let elf_obj = elf::Elf::parse(&mapped_file)?; - - let soname = elf_obj.soname.ok_or_else(|| { - MapsReaderError::NoSoName(self.name.clone().unwrap_or_else(|| "None".into())) - })?; - Ok(soname.to_string()) + Ok(SoName::read_from_module(&*mapped_file) + .map_err(|e| MapsReaderError::NoSoName(self.name.clone().unwrap_or_default(), e))? + .0 + .to_string()) } #[inline] @@ -309,6 +272,7 @@ impl MappingInfo { pub fn get_mapping_effective_path_name_and_version( &self, + soname: Option, ) -> Result<(PathBuf, String, Option)> { let mut file_path = PathBuf::from(self.name.clone().unwrap_or_default()); @@ -318,7 +282,7 @@ impl MappingInfo { // filesystem name of the module. // Just use the filesystem name if no SONAME is present. - let Ok(file_name) = self.so_name() else { + let Some(file_name) = soname.or_else(|| self.so_name().ok()) else { // file_path := /path/to/libname.so // file_name := libname.so let file_name = file_path @@ -725,7 +689,7 @@ a4840000-a4873000 rw-p 09021000 08:12 393449 /data/app/org.mozilla.firefox-1 assert_eq!(mappings.len(), 1); let (file_path, file_name, _version) = mappings[0] - .get_mapping_effective_path_name_and_version() + .get_mapping_effective_path_name_and_version(None) .expect("Couldn't get effective name for mapping"); assert_eq!(file_name, "libmozgtk.so"); assert_eq!(file_path, PathBuf::from("/home/martin/Documents/mozilla/devel/mozilla-central/obj/widget/gtk/mozgtk/gtk3/libmozgtk.so")); @@ -763,19 +727,17 @@ a4840000-a4873000 rw-p 09021000 08:12 393449 /data/app/org.mozilla.firefox-1 let mappings = get_mappings_for( "\ 10000000-20000000 r--p 00000000 00:3e 27136458 libmoz gtk.so -20000000-30000000 r--p 00000000 00:3e 27136458 libmozgtk.so (deleted) 30000000-40000000 r--p 00000000 00:3e 27136458 \"libmoz gtk.so (deleted)\" 30000000-40000000 r--p 00000000 00:3e 27136458 ", 0x7ffe091bf000, ); - assert_eq!(mappings.len(), 4); + assert_eq!(mappings.len(), 3); assert_eq!(mappings[0].name, Some("libmoz gtk.so".into())); - assert_eq!(mappings[1].name, Some("libmozgtk.so (deleted)".into())); assert_eq!( - mappings[2].name, + mappings[1].name, Some("\"libmoz gtk.so (deleted)\"".into()) ); - assert_eq!(mappings[3].name, None); + assert_eq!(mappings[2].name, None); } } diff --git a/third_party/rust/minidump-writer/src/linux/module_reader.rs b/third_party/rust/minidump-writer/src/linux/module_reader.rs new file mode 100644 index 000000000000..a3c9a9aca002 --- /dev/null +++ b/third_party/rust/minidump-writer/src/linux/module_reader.rs @@ -0,0 +1,587 @@ +use crate::errors::ModuleReaderError as Error; +use crate::minidump_format::GUID; +use goblin::{ + container::{Container, Ctx, Endian}, + elf, +}; +use std::ffi::CStr; + +const NOTE_SECTION_NAME: &[u8] = b".note.gnu.build-id\0"; + +pub trait ModuleMemory { + type Memory: std::ops::Deref; + + /// Read memory from the module. + fn read_module_memory(&self, offset: u64, length: u64) -> std::io::Result; + + /// The base address of the module in memory, if loaded in the address space of a program. + /// The default implementation returns None. + fn base_address(&self) -> Option { + None + } + + /// Whether the module memory is from a module loaded in the address space of a program. + /// The default implementation assumes this to be true if a base address is provided. + fn is_loaded_in_program(&self) -> bool { + self.base_address().is_some() + } +} + +impl<'a> ModuleMemory for &'a [u8] { + type Memory = Self; + + fn read_module_memory(&self, offset: u64, length: u64) -> std::io::Result { + self.get(offset as usize..(offset + length) as usize) + .ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + format!("{} out of bounds", offset + length), + ) + }) + } +} + +/// Indicate that a ModuleMemory implementation is read from the address space of a program with +/// the given base address. +pub struct ModuleMemoryAtAddress(pub T, pub u64); + +impl ModuleMemory for ModuleMemoryAtAddress { + type Memory = T::Memory; + + fn read_module_memory(&self, offset: u64, length: u64) -> std::io::Result { + self.0.read_module_memory(offset, length) + } + + fn base_address(&self) -> Option { + Some(self.1) + } +} + +fn read(mem: &T, offset: u64, length: u64) -> Result { + mem.read_module_memory(offset, length) + .map_err(|error| Error::ReadModuleMemory { + offset, + length, + error, + }) +} + +fn is_executable_section(header: &elf::SectionHeader) -> bool { + header.sh_type == elf::section_header::SHT_PROGBITS + && header.sh_flags & u64::from(elf::section_header::SHF_ALLOC) != 0 + && header.sh_flags & u64::from(elf::section_header::SHF_EXECINSTR) != 0 +} + +/// Return bytes to use as a build id, computed by hashing the given data. +/// +/// This provides `size_of::` bytes to keep identifiers produced by this function compatible +/// with other build ids. +fn build_id_from_bytes(data: &[u8]) -> Vec { + // Only provide mem::size_of(MDGUID) bytes to keep identifiers produced by this + // function backwards-compatible. + data.chunks(std::mem::size_of::()).fold( + vec![0u8; std::mem::size_of::()], + |mut bytes, chunk| { + bytes + .iter_mut() + .zip(chunk.iter()) + .for_each(|(b, c)| *b ^= *c); + bytes + }, + ) +} + +// `name` should be null-terminated +fn section_header_with_name<'a>( + section_headers: &'a elf::SectionHeaders, + strtab_index: usize, + name: &[u8], + module_memory: &impl ModuleMemory, +) -> Result, Error> { + let strtab_section_header = section_headers.get(strtab_index).ok_or(Error::NoStrTab)?; + for header in section_headers { + let sh_name = header.sh_name as u64; + if sh_name >= strtab_section_header.sh_size { + log::warn!("invalid sh_name offset for {:?}", name); + continue; + } + if sh_name + name.len() as u64 >= strtab_section_header.sh_size { + // This can't be a match. + continue; + } + let n = read( + module_memory, + strtab_section_header.sh_offset + sh_name, + name.len() as u64, + )?; + if name == &*n { + return Ok(Some(header)); + } + } + Ok(None) +} + +/// Types which can be read from an `impl ModuleMemory`. +pub trait ReadFromModule: Sized { + fn read_from_module(module_memory: impl ModuleMemory) -> Result; +} + +/// The module build id. +#[derive(Default, Clone, Debug)] +pub struct BuildId(pub Vec); + +impl ReadFromModule for BuildId { + fn read_from_module(module_memory: impl ModuleMemory) -> Result { + let reader = ModuleReader::new(module_memory)?; + let program_headers = match reader.build_id_from_program_headers() { + Ok(v) => return Ok(BuildId(v)), + Err(e) => Box::new(e), + }; + let section = match reader.build_id_from_section() { + Ok(v) => return Ok(BuildId(v)), + Err(e) => Box::new(e), + }; + let generated = match reader.build_id_generate_from_text() { + Ok(v) => return Ok(BuildId(v)), + Err(e) => Box::new(e), + }; + Err(Error::NoBuildId { + program_headers, + section, + generated, + }) + } +} + +struct DynIter<'a> { + data: &'a [u8], + offset: usize, + ctx: Ctx, +} + +impl<'a> DynIter<'a> { + pub fn new(data: &'a [u8], ctx: Ctx) -> Self { + DynIter { + data, + offset: 0, + ctx, + } + } +} + +impl<'a> Iterator for DynIter<'a> { + type Item = Result; + + fn next(&mut self) -> Option { + use scroll::Pread; + let dyn_: elf::dynamic::Dyn = match self.data.gread_with(&mut self.offset, self.ctx) { + Ok(v) => v, + Err(e) => return Some(Err(e.into())), + }; + if dyn_.d_tag == elf::dynamic::DT_NULL { + None + } else { + Some(Ok(dyn_)) + } + } +} + +/// The module SONAME. +#[derive(Default, Clone, Debug)] +pub struct SoName(pub String); + +impl ReadFromModule for SoName { + fn read_from_module(module_memory: impl ModuleMemory) -> Result { + let reader = ModuleReader::new(module_memory)?; + let program_headers = match reader.soname_from_program_headers() { + Ok(v) => return Ok(SoName(v)), + Err(e) => Box::new(e), + }; + let section = match reader.soname_from_sections() { + Ok(v) => return Ok(SoName(v)), + Err(e) => Box::new(e), + }; + Err(Error::NoSoName { + program_headers, + section, + }) + } +} + +pub struct ModuleReader { + module_memory: T, + header: elf::Header, + context: Ctx, +} + +impl ModuleReader { + pub fn new(module_memory: T) -> Result { + // We could use `Ctx::default()` (which defaults to the native system), however to be extra + // permissive we'll just use a 64-bit ("Big") context which would result in the largest + // possible header size. + let header_size = elf::Header::size(Ctx::new(Container::Big, Endian::default())); + let header_data = read(&module_memory, 0, header_size as u64)?; + let header = elf::Elf::parse_header(&header_data)?; + let context = Ctx::new(header.container()?, header.endianness()?); + Ok(ModuleReader { + module_memory, + header, + context, + }) + } + + /// Read the SONAME using program headers to locate dynamic library information. + pub fn soname_from_program_headers(&self) -> Result { + let program_headers = self.read_program_headers()?; + + let dynamic_segment_header = program_headers + .iter() + .find(|h| h.p_type == elf::program_header::PT_DYNAMIC) + .ok_or(Error::NoDynamicSection)?; + + let dynamic_section: &[u8] = &self.read_segment(dynamic_segment_header)?; + + let mut soname_strtab_offset = None; + let mut strtab_addr = None; + let mut strtab_size = None; + for dyn_ in DynIter::new(dynamic_section, self.context) { + let dyn_ = dyn_?; + match dyn_.d_tag { + elf::dynamic::DT_SONAME => soname_strtab_offset = Some(dyn_.d_val), + elf::dynamic::DT_STRTAB => strtab_addr = Some(dyn_.d_val), + elf::dynamic::DT_STRSZ => strtab_size = Some(dyn_.d_val), + _ => (), + } + } + + match (strtab_addr, strtab_size, soname_strtab_offset) { + (None, _, _) | (_, None, _) => Err(Error::NoDynStrSection), + (_, _, None) => Err(Error::NoSoNameEntry), + (Some(mut addr), Some(size), Some(offset)) => { + if self.module_memory.is_loaded_in_program() { + if let Some(base) = self.module_memory.base_address() { + // If loaded in memory, the address will be altered to be absolute. + if let Some(r) = addr.checked_sub(base) { + addr = r; + } + } + } + self.read_name_from_strtab(addr, size, offset) + } + } + } + + /// Read the SONAME using section headers to locate dynamic library information. + pub fn soname_from_sections(&self) -> Result { + let section_headers = self.read_section_headers()?; + + let dynamic_section_header = section_headers + .iter() + .find(|h| h.sh_type == elf::section_header::SHT_DYNAMIC) + .ok_or(Error::NoDynamicSection)?; + + let dynstr_section_header = + match section_headers.get(dynamic_section_header.sh_link as usize) { + Some(header) if header.sh_type == elf::section_header::SHT_STRTAB => header, + _ => section_header_with_name( + §ion_headers, + self.header.e_shstrndx as usize, + b".dynstr\0", + &self.module_memory, + )? + .ok_or(Error::NoDynStrSection)?, + }; + + let dynamic_section: &[u8] = &read( + &self.module_memory, + self.section_offset(dynamic_section_header), + dynamic_section_header.sh_size, + )?; + + for dyn_ in DynIter::new(dynamic_section, self.context) { + let dyn_ = dyn_?; + if dyn_.d_tag == elf::dynamic::DT_SONAME { + let name_offset = dyn_.d_val; + if name_offset < dynstr_section_header.sh_size { + return self.read_name_from_strtab( + self.section_offset(dynstr_section_header), + dynstr_section_header.sh_size, + name_offset, + ); + } + } + } + + Err(Error::NoSoNameEntry) + } + + /// Read the build id from a program header note. + pub fn build_id_from_program_headers(&self) -> Result, Error> { + let program_headers = self.read_program_headers()?; + for header in program_headers { + if header.p_type != elf::program_header::PT_NOTE { + continue; + } + if let Ok(Some(result)) = + self.find_build_id_note(header.p_offset, header.p_filesz, header.p_align) + { + return Ok(result); + } + } + Err(Error::NoProgramHeaderNote) + } + + /// Read the build id from a notes section. + pub fn build_id_from_section(&self) -> Result, Error> { + let section_headers = self.read_section_headers()?; + + let header = section_header_with_name( + §ion_headers, + self.header.e_shstrndx as usize, + NOTE_SECTION_NAME, + &self.module_memory, + )? + .ok_or(Error::NoSectionNote)?; + + match self.find_build_id_note(header.sh_offset, header.sh_size, header.sh_addralign) { + Ok(Some(v)) => Ok(v), + Ok(None) => Err(Error::NoSectionNote), + Err(e) => Err(e), + } + } + + /// Generate a build id by hashing the first page of the text section. + pub fn build_id_generate_from_text(&self) -> Result, Error> { + let Some(text_header) = self + .read_section_headers()? + .into_iter() + .find(is_executable_section) + else { + return Err(Error::NoTextSection); + }; + + // Take at most one page of the text section (we assume page size is 4096 bytes). + let len = std::cmp::min(4096, text_header.sh_size); + let text_data = read(&self.module_memory, text_header.sh_offset, len)?; + Ok(build_id_from_bytes(&text_data)) + } + + fn read_segment(&self, header: &elf::ProgramHeader) -> Result { + let (offset, size) = if self.module_memory.is_loaded_in_program() { + (header.p_vaddr, header.p_memsz) + } else { + (header.p_offset, header.p_filesz) + }; + + read(&self.module_memory, offset, size) + } + + fn read_name_from_strtab( + &self, + strtab_offset: u64, + strtab_size: u64, + name_offset: u64, + ) -> Result { + let name = read( + &self.module_memory, + strtab_offset + name_offset, + strtab_size - name_offset, + )?; + return CStr::from_bytes_until_nul(&name) + .map(|s| s.to_string_lossy().into_owned()) + .map_err(|_| Error::StrTabNoNulByte); + } + + fn section_offset(&self, header: &elf::SectionHeader) -> u64 { + if self.module_memory.is_loaded_in_program() { + header.sh_addr + } else { + header.sh_offset + } + } + + fn read_program_headers(&self) -> Result { + if self.header.e_phoff == 0 { + return Err(Error::NoProgramHeaders); + } + let program_headers_data = read( + &self.module_memory, + self.header.e_phoff, + self.header.e_phentsize as u64 * self.header.e_phnum as u64, + )?; + let program_headers = elf::ProgramHeader::parse( + &program_headers_data, + 0, + self.header.e_phnum as usize, + self.context, + )?; + Ok(program_headers) + } + + fn read_section_headers(&self) -> Result { + if self.header.e_shoff == 0 { + return Err(Error::NoSections); + } + + // FIXME Until a version following goblin 0.8.0 is published (with + // `SectionHeader::parse_from`), we read one extra byte preceding the sections so that + // `SectionHeader::parse` doesn't return immediately due to a 0 offset. + + let section_headers_data = read( + &self.module_memory, + self.header.e_shoff - 1, + self.header.e_shentsize as u64 * self.header.e_shnum as u64 + 1, + )?; + let section_headers = elf::SectionHeader::parse( + §ion_headers_data, + 1, + self.header.e_shnum as usize, + self.context, + )?; + Ok(section_headers) + } + + fn find_build_id_note( + &self, + offset: u64, + size: u64, + alignment: u64, + ) -> Result>, Error> { + let notes = read(&self.module_memory, offset, size)?; + for note in (elf::note::NoteDataIterator { + data: ¬es, + // Note that `NoteDataIterator::size` is poorly named, it is actually an end offset. In + // this case since our start offset is 0 we still set it to the size. + size: size as usize, + offset: 0, + ctx: (alignment as usize, self.context), + }) { + let Ok(note) = note else { break }; + if note.name == "GNU" && note.n_type == elf::note::NT_GNU_BUILD_ID { + return Ok(Some(note.desc.to_owned())); + } + } + Ok(None) + } +} + +#[cfg(test)] +mod test { + use super::*; + + /// This is a small (but valid) 64-bit little-endian elf executable with the following layout: + /// * ELF header + /// * program header: text segment + /// * program header: note + /// * program header: dynamic + /// * section header: null + /// * section header: .text + /// * section header: .note.gnu.build-id + /// * section header: .shstrtab + /// * section header: .dynamic + /// * section header: .dynstr + /// * note header (build id note) + /// * shstrtab + /// * dynamic (SONAME/STRTAB/STRSZ) + /// * dynstr (SONAME string = libfoo.so.1) + /// * program (calls exit(0)) + const TINY_ELF: &[u8] = &[ + 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x02, 0x00, 0x3e, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x03, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x38, 0x00, 0x03, 0x00, 0x40, 0x00, + 0x06, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x0a, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x03, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x68, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x68, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xbd, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x02, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x03, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x68, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x68, 0x02, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x02, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x02, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xbd, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfd, 0x02, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfd, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x4e, + 0x55, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, + 0x0e, 0x0f, 0x10, 0x00, 0x2e, 0x74, 0x65, 0x78, 0x74, 0x00, 0x2e, 0x6e, 0x6f, 0x74, 0x65, + 0x2e, 0x67, 0x6e, 0x75, 0x2e, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x2d, 0x69, 0x64, 0x00, 0x2e, + 0x73, 0x68, 0x73, 0x74, 0x72, 0x74, 0x61, 0x62, 0x00, 0x2e, 0x64, 0x79, 0x6e, 0x61, 0x6d, + 0x69, 0x63, 0x00, 0x2e, 0x64, 0x79, 0x6e, 0x73, 0x74, 0x72, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xfd, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x6c, 0x69, 0x62, 0x66, 0x6f, 0x6f, 0x2e, 0x73, 0x6f, 0x2e, 0x31, 0x00, 0x6a, 0x3c, + 0x58, 0x31, 0xff, 0x0f, 0x05, + ]; + + #[test] + fn build_id_program_headers() { + let reader = ModuleReader::new(TINY_ELF).unwrap(); + let id = reader.build_id_from_program_headers().unwrap(); + assert_eq!( + id, + vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + ); + } + + #[test] + fn build_id_section() { + let reader = ModuleReader::new(TINY_ELF).unwrap(); + let id = reader.build_id_from_section().unwrap(); + assert_eq!( + id, + vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + ); + } + + #[test] + fn build_id_text_hash() { + let reader = ModuleReader::new(TINY_ELF).unwrap(); + let id = reader.build_id_generate_from_text().unwrap(); + assert_eq!( + id, + vec![0x6a, 0x3c, 0x58, 0x31, 0xff, 0x0f, 0x05, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } + + #[test] + fn soname_program_headers() { + let reader = ModuleReader::new(TINY_ELF).unwrap(); + let soname = reader.soname_from_program_headers().unwrap(); + assert_eq!(soname, "libfoo.so.1"); + } + + #[test] + fn soname_section() { + let reader = ModuleReader::new(TINY_ELF).unwrap(); + let soname = reader.soname_from_sections().unwrap(); + assert_eq!(soname, "libfoo.so.1"); + } +} diff --git a/third_party/rust/minidump-writer/src/linux/ptrace_dumper.rs b/third_party/rust/minidump-writer/src/linux/ptrace_dumper.rs index 0dd0fa27193c..4db81d3d6eb6 100644 --- a/third_party/rust/minidump-writer/src/linux/ptrace_dumper.rs +++ b/third_party/rust/minidump-writer/src/linux/ptrace_dumper.rs @@ -1,18 +1,14 @@ #[cfg(target_os = "android")] use crate::linux::android::late_process_mappings; +use crate::linux::{ + auxv_reader::{AuxvType, ProcfsAuxvIter}, + errors::{DumperError, InitError, ThreadInfoError}, + maps_reader::MappingInfo, + module_reader, + thread_info::{Pid, ThreadInfo}, +}; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use crate::thread_info; -use crate::{ - linux::{ - auxv_reader::{AuxvType, ProcfsAuxvIter}, - errors::{DumperError, InitError, ThreadInfoError}, - maps_reader::MappingInfo, - thread_info::{Pid, ThreadInfo}, - LINUX_GATE_LIBRARY_NAME, - }, - minidump_format::GUID, -}; -use goblin::elf; use nix::{ errno::Errno, sys::{ptrace, signal, wait}, @@ -133,9 +129,8 @@ impl PtraceDumper { Ok(()) } - /// Copies content of |length| bytes from a given process |child|, - /// starting from |src|, into |dest|. This method uses ptrace to extract - /// the content from the target process. Always returns true. + /// Copies content of |num_of_bytes| bytes from a given process |child|, starting from |src|. + /// This method uses ptrace to extract the content from the target process. pub fn copy_from_process( child: Pid, src: *mut c_void, @@ -562,111 +557,64 @@ impl PtraceDumper { }) } - fn parse_build_id<'data>( - elf_obj: &elf::Elf<'data>, - mem_slice: &'data [u8], - ) -> Option<&'data [u8]> { - if let Some(mut notes) = elf_obj.iter_note_headers(mem_slice) { - while let Some(Ok(note)) = notes.next() { - if (note.name == "GNU") && (note.n_type == elf::note::NT_GNU_BUILD_ID) { - return Some(note.desc); - } - } - } - if let Some(mut notes) = elf_obj.iter_note_sections(mem_slice, Some(".note.gnu.build-id")) { - while let Some(Ok(note)) = notes.next() { - if (note.name == "GNU") && (note.n_type == elf::note::NT_GNU_BUILD_ID) { - return Some(note.desc); - } - } - } - None - } - - pub fn elf_file_identifier_from_mapped_file(mem_slice: &[u8]) -> Result, DumperError> { - let elf_obj = elf::Elf::parse(mem_slice)?; - - if let Some(build_id) = Self::parse_build_id(&elf_obj, mem_slice) { - // Look for a build id note first. - Ok(build_id.to_vec()) - } else { - // Fall back on hashing the first page of the text section. - - // Attempt to locate the .text section of an ELF binary and generate - // a simple hash by XORing the first page worth of bytes into |result|. - for section in elf_obj.section_headers { - if section.sh_type != elf::section_header::SHT_PROGBITS { - continue; - } - if section.sh_flags & u64::from(elf::section_header::SHF_ALLOC) != 0 - && section.sh_flags & u64::from(elf::section_header::SHF_EXECINSTR) != 0 - { - let text_section = - &mem_slice[section.sh_offset as usize..][..section.sh_size as usize]; - // Only provide mem::size_of(MDGUID) bytes to keep identifiers produced by this - // function backwards-compatible. - let max_len = std::cmp::min(text_section.len(), 4096); - let mut result = vec![0u8; std::mem::size_of::()]; - let mut offset = 0; - while offset < max_len { - for idx in 0..std::mem::size_of::() { - if offset + idx >= text_section.len() { - break; - } - result[idx] ^= text_section[offset + idx]; - } - offset += std::mem::size_of::(); - } - return Ok(result); - } - } - Err(DumperError::NoBuildIDFound) - } - } - - pub fn elf_identifier_for_mapping_index(&mut self, idx: usize) -> Result, DumperError> { + pub fn from_process_memory_for_index( + &self, + idx: usize, + ) -> Result { assert!(idx < self.mappings.len()); - Self::elf_identifier_for_mapping(&mut self.mappings[idx], self.pid) + self.from_process_memory_for_mapping(&self.mappings[idx]) } - pub fn elf_identifier_for_mapping( - mapping: &mut MappingInfo, - pid: Pid, - ) -> Result, DumperError> { - if !MappingInfo::is_mapped_file_safe_to_open(&mapping.name) { - return Err(DumperError::NotSafeToOpenMapping( - mapping.name.clone().unwrap_or_default(), - )); - } - - // Special-case linux-gate because it's not a real file. - if mapping.name.as_deref() == Some(LINUX_GATE_LIBRARY_NAME.as_ref()) { - if pid == std::process::id().try_into()? { - let mem_slice = unsafe { - std::slice::from_raw_parts(mapping.start_address as *const u8, mapping.size) - }; - return Self::elf_file_identifier_from_mapped_file(mem_slice); - } else { - let mem_slice = Self::copy_from_process( - pid, - mapping.start_address as *mut libc::c_void, - mapping.size, - )?; - return Self::elf_file_identifier_from_mapped_file(&mem_slice); + pub fn from_process_memory_for_mapping( + &self, + mapping: &MappingInfo, + ) -> Result { + if std::process::id() + .try_into() + .map(|v: Pid| v == self.pid) + .unwrap_or(false) + { + let mem_slice = unsafe { + std::slice::from_raw_parts(mapping.start_address as *const u8, mapping.size) + }; + T::read_from_module(module_reader::ModuleMemoryAtAddress( + mem_slice, + mapping.start_address as u64, + )) + } else { + struct ProcessModuleMemory { + pid: Pid, + start_address: u64, } + + impl module_reader::ModuleMemory for ProcessModuleMemory { + type Memory = Vec; + + fn read_module_memory( + &self, + offset: u64, + length: u64, + ) -> std::io::Result { + // Leave bounds checks to `copy_from_process` + PtraceDumper::copy_from_process( + self.pid, + (self.start_address + offset) as _, + length as usize, + ) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)) + } + + fn base_address(&self) -> Option { + Some(self.start_address) + } + } + + T::read_from_module(ProcessModuleMemory { + pid: self.pid, + start_address: mapping.start_address as u64, + }) } - - let (filename, old_name) = mapping.fixup_deleted_file(pid)?; - - let mem_slice = MappingInfo::get_mmap(&Some(filename), mapping.offset)?; - let build_id = Self::elf_file_identifier_from_mapped_file(&mem_slice)?; - - // This means we switched from "/my/binary" to "/proc/1234/exe", change the mapping to - // remove the " (deleted)" portion. - if let Some(old_name) = old_name { - mapping.name = Some(old_name.into()); - } - Ok(build_id) + .map_err(|e| e.into()) } } diff --git a/third_party/rust/minidump-writer/src/linux/sections/mappings.rs b/third_party/rust/minidump-writer/src/linux/sections/mappings.rs index 9012ae351b53..2c45d1e444f3 100644 --- a/third_party/rust/minidump-writer/src/linux/sections/mappings.rs +++ b/third_party/rust/minidump-writer/src/linux/sections/mappings.rs @@ -1,5 +1,6 @@ use super::*; use crate::linux::maps_reader::MappingInfo; +use crate::linux::module_reader::{BuildId, SoName}; /// Write information about the mappings in effect. Because we are using the /// minidump format, the information about the mappings is pretty limited. @@ -23,9 +24,8 @@ pub fn write( { continue; } - // Note: elf_identifier_for_mapping_index() can manipulate the |mapping.name|. - let identifier = dumper - .elf_identifier_for_mapping_index(map_idx) + let BuildId(identifier) = dumper + .from_process_memory_for_index(map_idx) .unwrap_or_default(); // If the identifier is all 0, its an uninteresting mapping (bmc#1676109) @@ -33,14 +33,19 @@ pub fn write( continue; } - let module = fill_raw_module(buffer, &dumper.mappings[map_idx], &identifier)?; + let soname = dumper + .from_process_memory_for_index(map_idx) + .ok() + .map(|SoName(n)| n); + + let module = fill_raw_module(buffer, &dumper.mappings[map_idx], &identifier, soname)?; modules.push(module); } // Next write all the mappings provided by the caller for user in &config.user_mapping_list { // GUID was provided by caller. - let module = fill_raw_module(buffer, &user.mapping, &user.identifier)?; + let module = fill_raw_module(buffer, &user.mapping, &user.identifier, None)?; modules.push(module); } @@ -63,6 +68,7 @@ fn fill_raw_module( buffer: &mut DumpBuf, mapping: &MappingInfo, identifier: &[u8], + soname: Option, ) -> Result { let cv_record = if identifier.is_empty() { // Just zeroes @@ -84,7 +90,7 @@ fn fill_raw_module( }; let (file_path, _, so_version) = mapping - .get_mapping_effective_path_name_and_version() + .get_mapping_effective_path_name_and_version(soname) .map_err(|e| errors::SectionMappingsError::GetEffectivePathError(mapping.clone(), e))?; let name_header = write_string_to_location(buffer, file_path.to_string_lossy().as_ref())?; diff --git a/third_party/rust/minidump-writer/tests/common/mod.rs b/third_party/rust/minidump-writer/tests/common/mod.rs index 1d5497b3bad2..cf258819d948 100644 --- a/third_party/rust/minidump-writer/tests/common/mod.rs +++ b/third_party/rust/minidump-writer/tests/common/mod.rs @@ -9,16 +9,21 @@ type Error = Box; pub type Result = result::Result; fn build_command() -> Command { - let mut cmd = Command::new("cargo"); + let mut cmd; + if let Some(binary) = std::env::var_os("TEST_HELPER") { + cmd = Command::new(binary); + } else { + cmd = Command::new("cargo"); + cmd.args(["run", "-q", "--bin", "test"]); - cmd.env("RUST_BACKTRACE", "1") - .args(["run", "-q", "--bin", "test"]); + // In normal cases where the host and target are the same this won't matter, + // but tests will fail if you are eg running in a cross container which will + // likely be x86_64 but may be targetting aarch64 or i686, which will result + // in tests failing, or at the least not testing what you think + cmd.args(["--target", current_platform::CURRENT_PLATFORM, "--"]); + } - // In normal cases where the host and target are the same this won't matter, - // but tests will fail if you are eg running in a cross container which will - // likely be x86_64 but may be targetting aarch64 or i686, which will result - // in tests failing, or at the least not testing what you think - cmd.args(["--target", current_platform::CURRENT_PLATFORM, "--"]); + cmd.env("RUST_BACKTRACE", "1"); cmd } diff --git a/third_party/rust/minidump-writer/tests/linux_minidump_writer.rs b/third_party/rust/minidump-writer/tests/linux_minidump_writer.rs index c8458b15839e..5983e12bc09a 100644 --- a/third_party/rust/minidump-writer/tests/linux_minidump_writer.rs +++ b/third_party/rust/minidump-writer/tests/linux_minidump_writer.rs @@ -9,6 +9,7 @@ use minidump_writer::{ errors::*, maps_reader::{MappingEntry, MappingInfo, SystemMappingInfo}, minidump_writer::MinidumpWriter, + module_reader::{BuildId, ReadFromModule}, ptrace_dumper::PtraceDumper, thread_info::Pid, }; @@ -684,7 +685,11 @@ fn with_deleted_binary() { .unwrap(); let binary_copy = binary_copy_dir.as_ref().join("binary_copy"); - let path: &'static str = std::env!("CARGO_BIN_EXE_test"); + let path: String = if let Ok(p) = std::env::var("TEST_HELPER") { + p + } else { + std::env!("CARGO_BIN_EXE_test").into() + }; std::fs::copy(path, &binary_copy).expect("Failed to copy binary"); let mem_slice = std::fs::read(&binary_copy).expect("Failed to read binary"); @@ -700,8 +705,8 @@ fn with_deleted_binary() { let pid = child.id() as i32; - let mut build_id = PtraceDumper::elf_file_identifier_from_mapped_file(&mem_slice) - .expect("Failed to get build_id"); + let BuildId(mut build_id) = + BuildId::read_from_module(mem_slice.as_slice()).expect("Failed to get build_id"); std::fs::remove_file(&binary_copy).expect("Failed to remove binary"); @@ -733,7 +738,7 @@ fn with_deleted_binary() { let main_module = module_list .main_module() .expect("Could not get main module"); - assert_eq!(main_module.code_file(), binary_copy.to_string_lossy()); + //assert_eq!(main_module.code_file(), binary_copy.to_string_lossy()); let did = main_module .debug_identifier() diff --git a/third_party/rust/minidump-writer/tests/ptrace_dumper.rs b/third_party/rust/minidump-writer/tests/ptrace_dumper.rs index 6b62a4f6f311..089d65420fc8 100644 --- a/third_party/rust/minidump-writer/tests/ptrace_dumper.rs +++ b/third_party/rust/minidump-writer/tests/ptrace_dumper.rs @@ -12,6 +12,17 @@ use std::os::unix::process::ExitStatusExt; mod common; use common::*; +/// These tests generally aren't consistent in resource-deprived environments like CI runners and +/// android emulators. +macro_rules! disabled_on_ci_and_android { + () => { + if std::env::var("CI").is_ok() || cfg!(target_os = "android") { + println!("disabled on CI and android, but otherwise works locally"); + return; + } + }; +} + #[test] fn test_setup() { spawn_child("setup", &[]); @@ -104,11 +115,7 @@ fn test_mappings_include_linux_gate() { #[test] fn test_linux_gate_mapping_id() { - if std::env::var("CI").is_ok() { - println!("disabled on CI, but works locally"); - return; - } - + disabled_on_ci_and_android!(); spawn_child("linux_gate_mapping_id", &[]); } @@ -118,8 +125,12 @@ fn test_merged_mappings() { let page_size = std::num::NonZeroUsize::new(page_size.unwrap() as usize).unwrap(); let map_size = std::num::NonZeroUsize::new(3 * page_size.get()).unwrap(); - let path: &'static str = std::env!("CARGO_BIN_EXE_test"); - let file = std::fs::File::open(path).unwrap(); + let path: String = if let Ok(p) = std::env::var("TEST_HELPER") { + p + } else { + std::env!("CARGO_BIN_EXE_test").into() + }; + let file = std::fs::File::open(&path).unwrap(); // mmap two segments out of the helper binary, one // enclosed in the other, but with different protections. @@ -153,13 +164,14 @@ fn test_merged_mappings() { spawn_child( "merged_mappings", - &[path, &format!("{mapped}"), &format!("{map_size}")], + &[&path, &format!("{mapped}"), &format!("{map_size}")], ); } #[test] // Ensure that the linux-gate VDSO is included in the mapping list. fn test_file_id() { + disabled_on_ci_and_android!(); spawn_child("file_id", &[]); } @@ -176,10 +188,7 @@ fn test_find_mapping() { #[test] fn test_copy_from_process_self() { - if std::env::var("CI").is_ok() { - println!("disabled on CI, but works locally"); - return; - } + disabled_on_ci_and_android!(); let stack_var: libc::c_long = 0x11223344; let heap_var: Box = Box::new(0x55667788); diff --git a/third_party/rust/minidump/.cargo-checksum.json b/third_party/rust/minidump/.cargo-checksum.json new file mode 100644 index 000000000000..46c80f0fb091 --- /dev/null +++ b/third_party/rust/minidump/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"f413ba9f3ab05d9c7972de463af304de96b2b4c48735cac69f128f5e11c0bc2a","LICENSE":"06de63df29199a394442b57a28e886059ddc940973e10646877a0793fd53e2c9","README.md":"f0fe4547fd7044ef4d06d902491be33a0fc13ba003c4e5af1965d6126907ced8","src/context.rs":"b271075155d33cce5c1f79ce4b1fd2f3335293d9a4344c3681e5825f3caeceb8","src/iostuff.rs":"eedeb0a9cf9f7d2af1558c916c73ec7287de5f4e45b4ff6b9a8013645f09b9e1","src/lib.rs":"a63adbdfbad839969998249ed6032d226557915761b12f9ee029c3766ec9507f","src/minidump.rs":"93bedd27718981ec0f30d061d9ef87efcb0b53d4a2ab6409f7947f235ca94a43","src/strings.rs":"b55266f137550602733319fe6c3a83a6b84931c47ccdcfe13de72c40d3cea1df","src/system_info.rs":"96cbbd3239c388474e60690ef9250040c91331811f8ff06a10ed82b9f55e7455","tests/test_minidump.rs":"d9fb6b8ec7749d3bc6bf6de6b8a9a73d5934c81881a9d98087fcc490e8bb447d"},"package":"aefb80650628de087057ed167e3e1ef5bed65dc4b1bd28d47cd707c3848adce2"} \ No newline at end of file diff --git a/third_party/rust/minidump/Cargo.toml b/third_party/rust/minidump/Cargo.toml new file mode 100644 index 000000000000..287f10874b04 --- /dev/null +++ b/third_party/rust/minidump/Cargo.toml @@ -0,0 +1,90 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "minidump" +version = "0.22.0" +authors = ["Ted Mielczarek "] +description = "A parser for the minidump format." +homepage = "https://github.com/rust-minidump/rust-minidump" +readme = "README.md" +keywords = [ + "breakpad", + "symbols", + "minidump", +] +categories = ["parsing"] +license = "MIT" +repository = "https://github.com/rust-minidump/rust-minidump" +resolver = "2" + +[dependencies.arbitrary] +version = "1" +features = ["derive"] +optional = true + +[dependencies.debugid] +version = "0.8.0" + +[dependencies.encoding_rs] +version = "0.8" + +[dependencies.memmap2] +version = "0.9" + +[dependencies.minidump-common] +version = "0.22.0" + +[dependencies.num-traits] +version = "0.2" + +[dependencies.procfs-core] +version = "0.16" +default-features = false + +[dependencies.range-map] +version = "0.2" + +[dependencies.scroll] +version = "0.12.0" + +[dependencies.thiserror] +version = "1.0.37" + +[dependencies.time] +version = "0.3.34" +features = ["formatting"] + +[dependencies.tracing] +version = "0.1.34" +features = ["log"] + +[dependencies.uuid] +version = "1.0.0" + +[dev-dependencies.ctor] +version = "0.2" + +[dev-dependencies.doc-comment] +version = "0.3.3" + +[dev-dependencies.env_logger] +version = "0.11.1" + +[dev-dependencies.test-assembler] +version = "0.1.6" + +[features] +arbitrary_impls = [ + "minidump-common/arbitrary", + "arbitrary", +] diff --git a/third_party/rust/minidump/LICENSE b/third_party/rust/minidump/LICENSE new file mode 100644 index 000000000000..3af7a472f939 --- /dev/null +++ b/third_party/rust/minidump/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2015-2023 rust-minidump contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/rust/minidump/README.md b/third_party/rust/minidump/README.md new file mode 100644 index 000000000000..08190533ac96 --- /dev/null +++ b/third_party/rust/minidump/README.md @@ -0,0 +1,58 @@ +# minidump + +[![crates.io](https://img.shields.io/crates/v/minidump.svg)](https://crates.io/crates/minidump) [![](https://docs.rs/minidump/badge.svg)](https://docs.rs/minidump) + +Basic parsing of the minidump format. + +If you want richer analysis of the minidump (such as stackwalking and symbolication), use [minidump-processor](https://crates.io/crates/minidump-processor). + + +# Usage + +The primary API for this library is the `Minidump` struct, which can be +instantiated by calling the `Minidump::read` or `Minidump::read_path` methods. + +Succesfully parsing a Minidump struct means the minidump has a minimally valid +header and stream directory. Individual streams are only parsed when they're +requested. + +Although you may enumerate the streams in a minidump with methods like +`Minidump::all_streams`, this is only really useful for debugging. Instead +you should statically request streams with `Minidump::get_stream`. + +Depending on what analysis you're trying to perform, you may: + +* Consider it an error for a stream to be missing (using `?` or `unwrap`) +* Branch on the presence of stream to conditionally refine your analysis +* Use a stream's `Default` implementation to get an "empty" instance + (with `unwrap_or_default`) + +```rust +use minidump::*; + +fn main() -> Result<(), Error> { + // Read the minidump from a file + let mut dump = minidump::Minidump::read_path("../testdata/test.dmp")?; + + // Statically request (and require) several streams we care about: + let system_info = dump.get_stream::()?; + let exception = dump.get_stream::()?; + + // Combine the contents of the streams to perform more refined analysis + let crash_reason = exception.get_crash_reason(system_info.os, system_info.cpu); + + // Conditionally analyze a stream + if let Ok(threads) = dump.get_stream::() { + // Use `Default` to try to make progress when a stream is missing. + // This is especially natural for MinidumpMemoryList because + // everything needs to handle memory lookups failing anyway. + let mem = dump.get_memory().unwrap_or_default(); + + for thread in &threads.threads { + let stack = thread.stack_memory(&mem); + // ... + } + } + Ok(()) +} +``` \ No newline at end of file diff --git a/third_party/rust/minidump/src/context.rs b/third_party/rust/minidump/src/context.rs new file mode 100644 index 000000000000..8b6e1afc91c5 --- /dev/null +++ b/third_party/rust/minidump/src/context.rs @@ -0,0 +1,1649 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +//! CPU contexts. + +use num_traits::FromPrimitive; +use scroll::Pread; +use std::collections::HashSet; +use std::fmt; +use std::io; +use std::io::prelude::*; +use std::mem; +use tracing::warn; + +use crate::iostuff::*; +use crate::{MinidumpMiscInfo, MinidumpSystemInfo}; +use minidump_common::format as md; +use minidump_common::format::ContextFlagsCpu; + +/// The CPU-specific context structure. +#[derive(Debug, Clone)] +#[cfg_attr(feature = "arbitrary_impls", derive(arbitrary::Arbitrary))] +pub enum MinidumpRawContext { + X86(md::CONTEXT_X86), + Ppc(md::CONTEXT_PPC), + Ppc64(md::CONTEXT_PPC64), + Amd64(md::CONTEXT_AMD64), + Sparc(md::CONTEXT_SPARC), + Arm(md::CONTEXT_ARM), + Arm64(md::CONTEXT_ARM64), + OldArm64(md::CONTEXT_ARM64_OLD), + Mips(md::CONTEXT_MIPS), +} + +/// Generic over the specifics of a CPU context. +pub trait CpuContext { + /// The word size of general-purpose registers in the context. + type Register: fmt::LowerHex; + + /// General purpose registers in this context type. + const REGISTERS: &'static [&'static str]; + + /// Gets whether the given register is valid + /// + /// This is exposed so that the context can map aliases. For instance + /// "lr" and "x30" are aliases in ARM64. + fn register_is_valid(&self, reg: &str, valid: &MinidumpContextValidity) -> bool { + if let MinidumpContextValidity::Some(ref which) = *valid { + which.contains(reg) + } else { + self.memoize_register(reg).is_some() + } + } + + /// Get a register value if it is valid. + /// + /// Get the value of the register named `reg` from this CPU context + /// if `valid` indicates that it has a valid value, otherwise return + /// `None`. + fn get_register(&self, reg: &str, valid: &MinidumpContextValidity) -> Option { + if self.register_is_valid(reg, valid) { + Some(self.get_register_always(reg)) + } else { + None + } + } + + /// Get a register value regardless of whether it is valid. + fn get_register_always(&self, reg: &str) -> Self::Register; + + /// Set a register value, if that register name it exists. + /// + /// Returns None if the register name isn't supported. + fn set_register(&mut self, reg: &str, val: Self::Register) -> Option<()>; + + /// Gets a static version of the given register name, if possible. + /// + /// Returns the default name of the register for register name aliases. + fn memoize_register(&self, reg: &str) -> Option<&'static str> { + default_memoize_register(Self::REGISTERS, reg) + } + + /// Return a String containing the value of `reg` formatted to its natural width. + fn format_register(&self, reg: &str) -> String { + format!( + "0x{:01$x}", + self.get_register_always(reg), + mem::size_of::() * 2 + ) + } + + /// An iterator over all registers in this context. + /// + /// This iterator yields registers and values regardless of whether the register is valid. To + /// get valid values, use [`valid_registers`](Self::valid_registers), instead. + fn registers(&self) -> CpuRegisters<'_, Self> { + self.valid_registers(&MinidumpContextValidity::All) + } + + /// An iterator over valid registers in this context. + /// + /// This iterator yields valid registers and their values. + fn valid_registers<'a>(&'a self, valid: &'a MinidumpContextValidity) -> CpuRegisters<'a, Self> { + let regs = match valid { + MinidumpContextValidity::All => CpuRegistersInner::Slice(Self::REGISTERS.iter()), + MinidumpContextValidity::Some(valid) => CpuRegistersInner::Set(valid.iter()), + }; + + CpuRegisters { + regs, + context: self, + } + } + + /// Gets the name of the stack pointer register (for use with get_register/set_register). + fn stack_pointer_register_name(&self) -> &'static str; + + /// Gets the name of the instruction pointer register (for use with get_register/set_register). + fn instruction_pointer_register_name(&self) -> &'static str; +} + +/// Default implementation for `CpuContext::memoize_register`. +fn default_memoize_register(registers: &[&'static str], reg: &str) -> Option<&'static str> { + let idx = registers.iter().position(|val| *val == reg)?; + Some(registers[idx]) +} + +#[derive(Debug, Clone)] +enum CpuRegistersInner<'a> { + Slice(std::slice::Iter<'a, &'static str>), + Set(std::collections::hash_set::Iter<'a, &'static str>), +} + +/// An iterator over registers and values in a [`CpuContext`]. +/// +/// Returned by [`CpuContext::registers`] and [`CpuContext::valid_registers`]. +#[derive(Clone, Debug)] +pub struct CpuRegisters<'a, T: ?Sized> { + regs: CpuRegistersInner<'a>, + context: &'a T, +} + +impl<'a, T> Iterator for CpuRegisters<'a, T> +where + T: CpuContext, +{ + type Item = (&'static str, T::Register); + + fn next(&mut self) -> Option { + let reg = match &mut self.regs { + CpuRegistersInner::Slice(iter) => iter.next(), + CpuRegistersInner::Set(iter) => iter.next(), + }?; + + Some((reg, self.context.get_register_always(reg))) + } +} + +impl CpuContext for md::CONTEXT_X86 { + type Register = u32; + + const REGISTERS: &'static [&'static str] = &[ + "eip", "esp", "ebp", "ebx", "esi", "edi", "eax", "ecx", "edx", "eflags", + ]; + + fn get_register_always(&self, reg: &str) -> u32 { + match reg { + "eip" => self.eip, + "esp" => self.esp, + "ebp" => self.ebp, + "ebx" => self.ebx, + "esi" => self.esi, + "edi" => self.edi, + "eax" => self.eax, + "ecx" => self.ecx, + "edx" => self.edx, + "eflags" => self.eflags, + _ => unreachable!("Invalid x86 register! {}", reg), + } + } + + fn set_register(&mut self, reg: &str, val: Self::Register) -> Option<()> { + match reg { + "eip" => self.eip = val, + "esp" => self.esp = val, + "ebp" => self.ebp = val, + "ebx" => self.ebx = val, + "esi" => self.esi = val, + "edi" => self.edi = val, + "eax" => self.eax = val, + "ecx" => self.ecx = val, + "edx" => self.edx = val, + "eflags" => self.eflags = val, + _ => return None, + } + Some(()) + } + + fn stack_pointer_register_name(&self) -> &'static str { + "esp" + } + + fn instruction_pointer_register_name(&self) -> &'static str { + "eip" + } +} + +impl CpuContext for md::CONTEXT_AMD64 { + type Register = u64; + + const REGISTERS: &'static [&'static str] = &[ + "rax", "rdx", "rcx", "rbx", "rsi", "rdi", "rbp", "rsp", "r8", "r9", "r10", "r11", "r12", + "r13", "r14", "r15", "rip", + ]; + + fn get_register_always(&self, reg: &str) -> u64 { + match reg { + "rax" => self.rax, + "rdx" => self.rdx, + "rcx" => self.rcx, + "rbx" => self.rbx, + "rsi" => self.rsi, + "rdi" => self.rdi, + "rbp" => self.rbp, + "rsp" => self.rsp, + "r8" => self.r8, + "r9" => self.r9, + "r10" => self.r10, + "r11" => self.r11, + "r12" => self.r12, + "r13" => self.r13, + "r14" => self.r14, + "r15" => self.r15, + "rip" => self.rip, + _ => unreachable!("Invalid x86-64 register! {}", reg), + } + } + + fn set_register(&mut self, reg: &str, val: Self::Register) -> Option<()> { + match reg { + "rax" => self.rax = val, + "rdx" => self.rdx = val, + "rcx" => self.rcx = val, + "rbx" => self.rbx = val, + "rsi" => self.rsi = val, + "rdi" => self.rdi = val, + "rbp" => self.rbp = val, + "rsp" => self.rsp = val, + "r8" => self.r8 = val, + "r9" => self.r9 = val, + "r10" => self.r10 = val, + "r11" => self.r11 = val, + "r12" => self.r12 = val, + "r13" => self.r13 = val, + "r14" => self.r14 = val, + "r15" => self.r15 = val, + "rip" => self.rip = val, + _ => return None, + } + Some(()) + } + + fn stack_pointer_register_name(&self) -> &'static str { + "rsp" + } + + fn instruction_pointer_register_name(&self) -> &'static str { + "rip" + } +} + +impl CpuContext for md::CONTEXT_ARM { + type Register = u32; + + const REGISTERS: &'static [&'static str] = &[ + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "fp", "sp", "lr", + "pc", + ]; + + fn memoize_register(&self, reg: &str) -> Option<&'static str> { + match reg { + "r11" => Some("fp"), + "r13" => Some("sp"), + "r14" => Some("lr"), + "r15" => Some("pc"), + _ => default_memoize_register(Self::REGISTERS, reg), + } + } + + fn register_is_valid(&self, reg: &str, valid: &MinidumpContextValidity) -> bool { + if let MinidumpContextValidity::Some(ref which) = valid { + match reg { + "r11" | "fp" => which.contains("r11") || which.contains("fp"), + "r13" | "sp" => which.contains("r13") || which.contains("sp"), + "r14" | "lr" => which.contains("r14") || which.contains("lr"), + "r15" | "pc" => which.contains("r15") || which.contains("pc"), + _ => which.contains(reg), + } + } else { + self.memoize_register(reg).is_some() + } + } + + fn get_register_always(&self, reg: &str) -> u32 { + match reg { + "r0" => self.iregs[0], + "r1" => self.iregs[1], + "r2" => self.iregs[2], + "r3" => self.iregs[3], + "r4" => self.iregs[4], + "r5" => self.iregs[5], + "r6" => self.iregs[6], + "r7" => self.iregs[7], + "r8" => self.iregs[8], + "r9" => self.iregs[9], + "r10" => self.iregs[10], + "r11" => self.iregs[11], + "r12" => self.iregs[12], + "r13" => self.iregs[13], + "r14" => self.iregs[14], + "r15" => self.iregs[15], + "pc" => self.iregs[md::ArmRegisterNumbers::ProgramCounter as usize], + "lr" => self.iregs[md::ArmRegisterNumbers::LinkRegister as usize], + "fp" => self.iregs[md::ArmRegisterNumbers::FramePointer as usize], + "sp" => self.iregs[md::ArmRegisterNumbers::StackPointer as usize], + _ => unreachable!("Invalid arm register! {}", reg), + } + } + + fn set_register(&mut self, reg: &str, val: Self::Register) -> Option<()> { + match reg { + "r0" => self.iregs[0] = val, + "r1" => self.iregs[1] = val, + "r2" => self.iregs[2] = val, + "r3" => self.iregs[3] = val, + "r4" => self.iregs[4] = val, + "r5" => self.iregs[5] = val, + "r6" => self.iregs[6] = val, + "r7" => self.iregs[7] = val, + "r8" => self.iregs[8] = val, + "r9" => self.iregs[9] = val, + "r10" => self.iregs[10] = val, + "r11" => self.iregs[11] = val, + "r12" => self.iregs[12] = val, + "r13" => self.iregs[13] = val, + "r14" => self.iregs[14] = val, + "r15" => self.iregs[15] = val, + "pc" => self.iregs[md::ArmRegisterNumbers::ProgramCounter as usize] = val, + "lr" => self.iregs[md::ArmRegisterNumbers::LinkRegister as usize] = val, + "fp" => self.iregs[md::ArmRegisterNumbers::FramePointer as usize] = val, + "sp" => self.iregs[md::ArmRegisterNumbers::StackPointer as usize] = val, + _ => return None, + } + Some(()) + } + + fn stack_pointer_register_name(&self) -> &'static str { + "sp" + } + + fn instruction_pointer_register_name(&self) -> &'static str { + "pc" + } +} + +impl CpuContext for md::CONTEXT_ARM64_OLD { + type Register = u64; + + const REGISTERS: &'static [&'static str] = &[ + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", + "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", + "x27", "x28", "fp", "lr", "sp", "pc", + ]; + + fn memoize_register(&self, reg: &str) -> Option<&'static str> { + match reg { + "x29" => Some("fp"), + "x30" => Some("lr"), + _ => default_memoize_register(Self::REGISTERS, reg), + } + } + + fn register_is_valid(&self, reg: &str, valid: &MinidumpContextValidity) -> bool { + if let MinidumpContextValidity::Some(ref which) = valid { + match reg { + "x29" | "fp" => which.contains("x29") || which.contains("fp"), + "x30" | "lr" => which.contains("x30") || which.contains("lr"), + _ => which.contains(reg), + } + } else { + self.memoize_register(reg).is_some() + } + } + + fn get_register_always(&self, reg: &str) -> u64 { + match reg { + "x0" => self.iregs[0], + "x1" => self.iregs[1], + "x2" => self.iregs[2], + "x3" => self.iregs[3], + "x4" => self.iregs[4], + "x5" => self.iregs[5], + "x6" => self.iregs[6], + "x7" => self.iregs[7], + "x8" => self.iregs[8], + "x9" => self.iregs[9], + "x10" => self.iregs[10], + "x11" => self.iregs[11], + "x12" => self.iregs[12], + "x13" => self.iregs[13], + "x14" => self.iregs[14], + "x15" => self.iregs[15], + "x16" => self.iregs[16], + "x17" => self.iregs[17], + "x18" => self.iregs[18], + "x19" => self.iregs[19], + "x20" => self.iregs[20], + "x21" => self.iregs[21], + "x22" => self.iregs[22], + "x23" => self.iregs[23], + "x24" => self.iregs[24], + "x25" => self.iregs[25], + "x26" => self.iregs[26], + "x27" => self.iregs[27], + "x28" => self.iregs[28], + "x29" => self.iregs[29], + "x30" => self.iregs[30], + "pc" => self.pc, + "sp" => self.sp, + "lr" => self.iregs[md::Arm64RegisterNumbers::LinkRegister as usize], + "fp" => self.iregs[md::Arm64RegisterNumbers::FramePointer as usize], + _ => unreachable!("Invalid aarch64 register! {}", reg), + } + } + + fn set_register(&mut self, reg: &str, val: Self::Register) -> Option<()> { + match reg { + "x0" => self.iregs[0] = val, + "x1" => self.iregs[1] = val, + "x2" => self.iregs[2] = val, + "x3" => self.iregs[3] = val, + "x4" => self.iregs[4] = val, + "x5" => self.iregs[5] = val, + "x6" => self.iregs[6] = val, + "x7" => self.iregs[7] = val, + "x8" => self.iregs[8] = val, + "x9" => self.iregs[9] = val, + "x10" => self.iregs[10] = val, + "x11" => self.iregs[11] = val, + "x12" => self.iregs[12] = val, + "x13" => self.iregs[13] = val, + "x14" => self.iregs[14] = val, + "x15" => self.iregs[15] = val, + "x16" => self.iregs[16] = val, + "x17" => self.iregs[17] = val, + "x18" => self.iregs[18] = val, + "x19" => self.iregs[19] = val, + "x20" => self.iregs[20] = val, + "x21" => self.iregs[21] = val, + "x22" => self.iregs[22] = val, + "x23" => self.iregs[23] = val, + "x24" => self.iregs[24] = val, + "x25" => self.iregs[25] = val, + "x26" => self.iregs[26] = val, + "x27" => self.iregs[27] = val, + "x28" => self.iregs[28] = val, + "x29" => self.iregs[29] = val, + "x30" => self.iregs[30] = val, + "pc" => self.pc = val, + "sp" => self.sp = val, + "lr" => self.iregs[md::Arm64RegisterNumbers::LinkRegister as usize] = val, + "fp" => self.iregs[md::Arm64RegisterNumbers::FramePointer as usize] = val, + _ => return None, + } + Some(()) + } + + fn stack_pointer_register_name(&self) -> &'static str { + "sp" + } + + fn instruction_pointer_register_name(&self) -> &'static str { + "pc" + } +} + +impl CpuContext for md::CONTEXT_ARM64 { + type Register = u64; + + const REGISTERS: &'static [&'static str] = &[ + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", + "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", + "x27", "x28", "fp", "lr", "sp", "pc", + ]; + + fn memoize_register(&self, reg: &str) -> Option<&'static str> { + match reg { + "x29" => Some("fp"), + "x30" => Some("lr"), + _ => default_memoize_register(Self::REGISTERS, reg), + } + } + + fn register_is_valid(&self, reg: &str, valid: &MinidumpContextValidity) -> bool { + if let MinidumpContextValidity::Some(ref which) = valid { + match reg { + "x29" | "fp" => which.contains("x29") || which.contains("fp"), + "x30" | "lr" => which.contains("x30") || which.contains("lr"), + _ => which.contains(reg), + } + } else { + self.memoize_register(reg).is_some() + } + } + + fn get_register_always(&self, reg: &str) -> u64 { + match reg { + "x0" => self.iregs[0], + "x1" => self.iregs[1], + "x2" => self.iregs[2], + "x3" => self.iregs[3], + "x4" => self.iregs[4], + "x5" => self.iregs[5], + "x6" => self.iregs[6], + "x7" => self.iregs[7], + "x8" => self.iregs[8], + "x9" => self.iregs[9], + "x10" => self.iregs[10], + "x11" => self.iregs[11], + "x12" => self.iregs[12], + "x13" => self.iregs[13], + "x14" => self.iregs[14], + "x15" => self.iregs[15], + "x16" => self.iregs[16], + "x17" => self.iregs[17], + "x18" => self.iregs[18], + "x19" => self.iregs[19], + "x20" => self.iregs[20], + "x21" => self.iregs[21], + "x22" => self.iregs[22], + "x23" => self.iregs[23], + "x24" => self.iregs[24], + "x25" => self.iregs[25], + "x26" => self.iregs[26], + "x27" => self.iregs[27], + "x28" => self.iregs[28], + "x29" => self.iregs[29], + "x30" => self.iregs[30], + "pc" => self.pc, + "sp" => self.sp, + "lr" => self.iregs[md::Arm64RegisterNumbers::LinkRegister as usize], + "fp" => self.iregs[md::Arm64RegisterNumbers::FramePointer as usize], + _ => unreachable!("Invalid aarch64 register! {}", reg), + } + } + + fn set_register(&mut self, reg: &str, val: Self::Register) -> Option<()> { + match reg { + "x0" => self.iregs[0] = val, + "x1" => self.iregs[1] = val, + "x2" => self.iregs[2] = val, + "x3" => self.iregs[3] = val, + "x4" => self.iregs[4] = val, + "x5" => self.iregs[5] = val, + "x6" => self.iregs[6] = val, + "x7" => self.iregs[7] = val, + "x8" => self.iregs[8] = val, + "x9" => self.iregs[9] = val, + "x10" => self.iregs[10] = val, + "x11" => self.iregs[11] = val, + "x12" => self.iregs[12] = val, + "x13" => self.iregs[13] = val, + "x14" => self.iregs[14] = val, + "x15" => self.iregs[15] = val, + "x16" => self.iregs[16] = val, + "x17" => self.iregs[17] = val, + "x18" => self.iregs[18] = val, + "x19" => self.iregs[19] = val, + "x20" => self.iregs[20] = val, + "x21" => self.iregs[21] = val, + "x22" => self.iregs[22] = val, + "x23" => self.iregs[23] = val, + "x24" => self.iregs[24] = val, + "x25" => self.iregs[25] = val, + "x26" => self.iregs[26] = val, + "x27" => self.iregs[27] = val, + "x28" => self.iregs[28] = val, + "x29" => self.iregs[29] = val, + "x30" => self.iregs[30] = val, + "pc" => self.pc = val, + "sp" => self.sp = val, + "lr" => self.iregs[md::Arm64RegisterNumbers::LinkRegister as usize] = val, + "fp" => self.iregs[md::Arm64RegisterNumbers::FramePointer as usize] = val, + _ => return None, + } + Some(()) + } + + fn stack_pointer_register_name(&self) -> &'static str { + "sp" + } + + fn instruction_pointer_register_name(&self) -> &'static str { + "pc" + } +} + +impl CpuContext for md::CONTEXT_PPC { + type Register = u32; + + const REGISTERS: &'static [&'static str] = &[ + "srr0", "srr1", "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", + "r25", "r26", "r27", "r28", "r29", "r30", "r31", "cr", "xer", "lr", "ctr", "mq", "vrsave", + ]; + + fn get_register_always(&self, reg: &str) -> Self::Register { + match reg { + "srr0" => self.srr0, + "srr1" => self.srr1, + "r0" => self.gpr[0], + "r1" => self.gpr[1], + "r2" => self.gpr[2], + "r3" => self.gpr[3], + "r4" => self.gpr[4], + "r5" => self.gpr[5], + "r6" => self.gpr[6], + "r7" => self.gpr[7], + "r8" => self.gpr[8], + "r9" => self.gpr[9], + "r10" => self.gpr[10], + "r11" => self.gpr[11], + "r12" => self.gpr[12], + "r13" => self.gpr[13], + "r14" => self.gpr[14], + "r15" => self.gpr[15], + "r16" => self.gpr[16], + "r17" => self.gpr[17], + "r18" => self.gpr[18], + "r19" => self.gpr[19], + "r20" => self.gpr[20], + "r21" => self.gpr[21], + "r22" => self.gpr[22], + "r23" => self.gpr[23], + "r24" => self.gpr[24], + "r25" => self.gpr[25], + "r26" => self.gpr[26], + "r27" => self.gpr[27], + "r28" => self.gpr[28], + "r29" => self.gpr[29], + "r30" => self.gpr[30], + "r31" => self.gpr[31], + "cr" => self.cr, + "xer" => self.xer, + "lr" => self.lr, + "ctr" => self.ctr, + "mq" => self.mq, + "vrsave" => self.vrsave, + _ => unreachable!("Invalid ppc register! {}", reg), + } + } + + fn set_register(&mut self, reg: &str, val: Self::Register) -> Option<()> { + match reg { + "srr0" => self.srr0 = val, + "srr1" => self.srr1 = val, + "r0" => self.gpr[0] = val, + "r1" => self.gpr[1] = val, + "r2" => self.gpr[2] = val, + "r3" => self.gpr[3] = val, + "r4" => self.gpr[4] = val, + "r5" => self.gpr[5] = val, + "r6" => self.gpr[6] = val, + "r7" => self.gpr[7] = val, + "r8" => self.gpr[8] = val, + "r9" => self.gpr[9] = val, + "r10" => self.gpr[10] = val, + "r11" => self.gpr[11] = val, + "r12" => self.gpr[12] = val, + "r13" => self.gpr[13] = val, + "r14" => self.gpr[14] = val, + "r15" => self.gpr[15] = val, + "r16" => self.gpr[16] = val, + "r17" => self.gpr[17] = val, + "r18" => self.gpr[18] = val, + "r19" => self.gpr[19] = val, + "r20" => self.gpr[20] = val, + "r21" => self.gpr[21] = val, + "r22" => self.gpr[22] = val, + "r23" => self.gpr[23] = val, + "r24" => self.gpr[24] = val, + "r25" => self.gpr[25] = val, + "r26" => self.gpr[26] = val, + "r27" => self.gpr[27] = val, + "r28" => self.gpr[28] = val, + "r29" => self.gpr[29] = val, + "r30" => self.gpr[30] = val, + "r31" => self.gpr[31] = val, + "cr" => self.cr = val, + "xer" => self.xer = val, + "lr" => self.lr = val, + "ctr" => self.ctr = val, + "mq" => self.mq = val, + "vrsave" => self.vrsave = val, + _ => return None, + } + Some(()) + } + + fn stack_pointer_register_name(&self) -> &'static str { + "r1" + } + + fn instruction_pointer_register_name(&self) -> &'static str { + "srr0" + } +} + +impl CpuContext for md::CONTEXT_PPC64 { + type Register = u64; + + const REGISTERS: &'static [&'static str] = &[ + "srr0", "srr1", "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", + "r25", "r26", "r27", "r28", "r29", "r30", "r31", "cr", "xer", "lr", "ctr", "vrsave", + ]; + + fn get_register_always(&self, reg: &str) -> Self::Register { + match reg { + "srr0" => self.srr0, + "srr1" => self.srr1, + "r0" => self.gpr[0], + "r1" => self.gpr[1], + "r2" => self.gpr[2], + "r3" => self.gpr[3], + "r4" => self.gpr[4], + "r5" => self.gpr[5], + "r6" => self.gpr[6], + "r7" => self.gpr[7], + "r8" => self.gpr[8], + "r9" => self.gpr[9], + "r10" => self.gpr[10], + "r11" => self.gpr[11], + "r12" => self.gpr[12], + "r13" => self.gpr[13], + "r14" => self.gpr[14], + "r15" => self.gpr[15], + "r16" => self.gpr[16], + "r17" => self.gpr[17], + "r18" => self.gpr[18], + "r19" => self.gpr[19], + "r20" => self.gpr[20], + "r21" => self.gpr[21], + "r22" => self.gpr[22], + "r23" => self.gpr[23], + "r24" => self.gpr[24], + "r25" => self.gpr[25], + "r26" => self.gpr[26], + "r27" => self.gpr[27], + "r28" => self.gpr[28], + "r29" => self.gpr[29], + "r30" => self.gpr[30], + "r31" => self.gpr[31], + "cr" => self.cr, + "xer" => self.xer, + "lr" => self.lr, + "ctr" => self.ctr, + "vrsave" => self.vrsave, + _ => unreachable!("Invalid ppc64 register! {}", reg), + } + } + + fn set_register(&mut self, reg: &str, val: Self::Register) -> Option<()> { + match reg { + "srr0" => self.srr0 = val, + "srr1" => self.srr1 = val, + "r0" => self.gpr[0] = val, + "r1" => self.gpr[1] = val, + "r2" => self.gpr[2] = val, + "r3" => self.gpr[3] = val, + "r4" => self.gpr[4] = val, + "r5" => self.gpr[5] = val, + "r6" => self.gpr[6] = val, + "r7" => self.gpr[7] = val, + "r8" => self.gpr[8] = val, + "r9" => self.gpr[9] = val, + "r10" => self.gpr[10] = val, + "r11" => self.gpr[11] = val, + "r12" => self.gpr[12] = val, + "r13" => self.gpr[13] = val, + "r14" => self.gpr[14] = val, + "r15" => self.gpr[15] = val, + "r16" => self.gpr[16] = val, + "r17" => self.gpr[17] = val, + "r18" => self.gpr[18] = val, + "r19" => self.gpr[19] = val, + "r20" => self.gpr[20] = val, + "r21" => self.gpr[21] = val, + "r22" => self.gpr[22] = val, + "r23" => self.gpr[23] = val, + "r24" => self.gpr[24] = val, + "r25" => self.gpr[25] = val, + "r26" => self.gpr[26] = val, + "r27" => self.gpr[27] = val, + "r28" => self.gpr[28] = val, + "r29" => self.gpr[29] = val, + "r30" => self.gpr[30] = val, + "r31" => self.gpr[31] = val, + "cr" => self.cr = val, + "xer" => self.xer = val, + "lr" => self.lr = val, + "ctr" => self.ctr = val, + "vrsave" => self.vrsave = val, + _ => return None, + } + Some(()) + } + + fn stack_pointer_register_name(&self) -> &'static str { + "r1" + } + + fn instruction_pointer_register_name(&self) -> &'static str { + "srr0" + } +} + +impl CpuContext for md::CONTEXT_MIPS { + type Register = u64; + + const REGISTERS: &'static [&'static str] = &[ + "gp", "sp", "fp", "ra", "pc", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + ]; + + fn get_register_always(&self, reg: &str) -> Self::Register { + match reg { + "gp" => self.iregs[md::MipsRegisterNumbers::GlobalPointer as usize], + "sp" => self.iregs[md::MipsRegisterNumbers::StackPointer as usize], + "fp" => self.iregs[md::MipsRegisterNumbers::FramePointer as usize], + "ra" => self.iregs[md::MipsRegisterNumbers::ReturnAddress as usize], + "pc" => self.epc, + "s0" => self.iregs[md::MipsRegisterNumbers::S0 as usize], + "s1" => self.iregs[md::MipsRegisterNumbers::S1 as usize], + "s2" => self.iregs[md::MipsRegisterNumbers::S2 as usize], + "s3" => self.iregs[md::MipsRegisterNumbers::S3 as usize], + "s4" => self.iregs[md::MipsRegisterNumbers::S4 as usize], + "s5" => self.iregs[md::MipsRegisterNumbers::S5 as usize], + "s6" => self.iregs[md::MipsRegisterNumbers::S6 as usize], + "s7" => self.iregs[md::MipsRegisterNumbers::S7 as usize], + _ => unreachable!("Invalid mips register! {}", reg), + } + } + + fn set_register(&mut self, reg: &str, val: Self::Register) -> Option<()> { + match reg { + "gp" => self.iregs[md::MipsRegisterNumbers::GlobalPointer as usize] = val, + "sp" => self.iregs[md::MipsRegisterNumbers::StackPointer as usize] = val, + "fp" => self.iregs[md::MipsRegisterNumbers::FramePointer as usize] = val, + "ra" => self.iregs[md::MipsRegisterNumbers::ReturnAddress as usize] = val, + "pc" => self.epc = val, + "s0" => self.iregs[md::MipsRegisterNumbers::S0 as usize] = val, + "s1" => self.iregs[md::MipsRegisterNumbers::S1 as usize] = val, + "s2" => self.iregs[md::MipsRegisterNumbers::S2 as usize] = val, + "s3" => self.iregs[md::MipsRegisterNumbers::S3 as usize] = val, + "s4" => self.iregs[md::MipsRegisterNumbers::S4 as usize] = val, + "s5" => self.iregs[md::MipsRegisterNumbers::S5 as usize] = val, + "s6" => self.iregs[md::MipsRegisterNumbers::S6 as usize] = val, + "s7" => self.iregs[md::MipsRegisterNumbers::S7 as usize] = val, + _ => return None, + } + Some(()) + } + + fn stack_pointer_register_name(&self) -> &'static str { + "sp" + } + + fn instruction_pointer_register_name(&self) -> &'static str { + "pc" + } +} + +impl CpuContext for md::CONTEXT_SPARC { + type Register = u64; + + const REGISTERS: &'static [&'static str] = &[ + "g_r0", "g_r1", "g_r2", "g_r3", "g_r4", "g_r5", "g_r6", "g_r7", "g_r8", "g_r9", "g_r10", + "g_r11", "g_r12", "g_r13", "g_r14", "g_r15", "g_r16", "g_r17", "g_r18", "g_r19", "g_r20", + "g_r21", "g_r22", "g_r23", "g_r24", "g_r25", "g_r26", "g_r27", "g_r28", "g_r29", "g_r30", + "g_r31", "ccr", "pc", "npc", "y", "asi", "fprs", + ]; + + fn get_register_always(&self, reg: &str) -> Self::Register { + match reg { + "g_r0" | "g0" => self.g_r[0], + "g_r1" | "g1" => self.g_r[1], + "g_r2" | "g2" => self.g_r[2], + "g_r3" | "g3" => self.g_r[3], + "g_r4" | "g4" => self.g_r[4], + "g_r5" | "g5" => self.g_r[5], + "g_r6" | "g6" => self.g_r[6], + "g_r7" | "g7" => self.g_r[7], + "g_r8" | "o0" => self.g_r[8], + "g_r9" | "o1" => self.g_r[9], + "g_r10" | "o2" => self.g_r[10], + "g_r11" | "o3" => self.g_r[11], + "g_r12" | "o4" => self.g_r[12], + "g_r13" | "o5" => self.g_r[13], + "g_r14" | "o6" => self.g_r[14], + "g_r15" | "o7" => self.g_r[15], + "g_r16" | "l0" => self.g_r[16], + "g_r17" | "l1" => self.g_r[17], + "g_r18" | "l2" => self.g_r[18], + "g_r19" | "l3" => self.g_r[19], + "g_r20" | "l4" => self.g_r[20], + "g_r21" | "l5" => self.g_r[21], + "g_r22" | "l6" => self.g_r[22], + "g_r23" | "l7" => self.g_r[23], + "g_r24" | "i0" => self.g_r[24], + "g_r25" | "i1" => self.g_r[25], + "g_r26" | "i2" => self.g_r[26], + "g_r27" | "i3" => self.g_r[27], + "g_r28" | "i4" => self.g_r[28], + "g_r29" | "i5" => self.g_r[29], + "g_r30" | "i6" => self.g_r[30], + "g_r31" | "i7" => self.g_r[31], + "ccr" => self.ccr, + "pc" => self.pc, + "npc" => self.npc, + "y" => self.y, + "asi" => self.asi, + "fprs" => self.fprs, + _ => unreachable!("Invalid sparc register! {}", reg), + } + } + + fn set_register(&mut self, reg: &str, val: Self::Register) -> Option<()> { + match reg { + "g_r0" | "g0" => self.g_r[0] = val, + "g_r1" | "g1" => self.g_r[1] = val, + "g_r2" | "g2" => self.g_r[2] = val, + "g_r3" | "g3" => self.g_r[3] = val, + "g_r4" | "g4" => self.g_r[4] = val, + "g_r5" | "g5" => self.g_r[5] = val, + "g_r6" | "g6" => self.g_r[6] = val, + "g_r7" | "g7" => self.g_r[7] = val, + "g_r8" | "o0" => self.g_r[8] = val, + "g_r9" | "o1" => self.g_r[9] = val, + "g_r10" | "o2" => self.g_r[10] = val, + "g_r11" | "o3" => self.g_r[11] = val, + "g_r12" | "o4" => self.g_r[12] = val, + "g_r13" | "o5" => self.g_r[13] = val, + "g_r14" | "o6" => self.g_r[14] = val, + "g_r15" | "o7" => self.g_r[15] = val, + "g_r16" | "l0" => self.g_r[16] = val, + "g_r17" | "l1" => self.g_r[17] = val, + "g_r18" | "l2" => self.g_r[18] = val, + "g_r19" | "l3" => self.g_r[19] = val, + "g_r20" | "l4" => self.g_r[20] = val, + "g_r21" | "l5" => self.g_r[21] = val, + "g_r22" | "l6" => self.g_r[22] = val, + "g_r23" | "l7" => self.g_r[23] = val, + "g_r24" | "i0" => self.g_r[24] = val, + "g_r25" | "i1" => self.g_r[25] = val, + "g_r26" | "i2" => self.g_r[26] = val, + "g_r27" | "i3" => self.g_r[27] = val, + "g_r28" | "i4" => self.g_r[28] = val, + "g_r29" | "i5" => self.g_r[29] = val, + "g_r30" | "i6" => self.g_r[30] = val, + "g_r31" | "i7" => self.g_r[31] = val, + "ccr" => self.ccr = val, + "pc" => self.pc = val, + "npc" => self.npc = val, + "y" => self.y = val, + "asi" => self.asi = val, + "fprs" => self.fprs = val, + _ => return None, + } + Some(()) + } + + fn stack_pointer_register_name(&self) -> &'static str { + "g_r14" // alias out register o6 + } + + fn instruction_pointer_register_name(&self) -> &'static str { + "pc" + } +} + +/// Information about which registers are valid in a `MinidumpContext`. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum MinidumpContextValidity { + // All registers are valid. + All, + // The registers in this set are valid. + Some(HashSet<&'static str>), +} + +/// CPU context such as register states. +/// +/// MinidumpContext carries a CPU-specific MDRawContext structure, which +/// contains CPU context such as register states. Each thread has its +/// own context, and the exception record, if present, also has its own +/// context. Note that if the exception record is present, the context it +/// refers to is probably what the user wants to use for the exception +/// thread, instead of that thread's own context. The exception thread's +/// context (as opposed to the exception record's context) will contain +/// context for the exception handler (which performs minidump generation), +/// and not the context that caused the exception (which is probably what the +/// user wants). +#[derive(Debug, Clone)] +pub struct MinidumpContext { + /// The raw CPU register state. + pub raw: MinidumpRawContext, + /// Which registers are valid in `raw`. + pub valid: MinidumpContextValidity, +} + +/// Errors encountered while reading a `MinidumpContext`. +#[derive(Debug)] +pub enum ContextError { + /// Failed to read data. + ReadFailure, + /// Encountered an unknown CPU context. + UnknownCpuContext, +} + +//====================================================== +// Implementations + +impl MinidumpContext { + /// Return a MinidumpContext given a `MinidumpRawContext`. + pub fn from_raw(raw: MinidumpRawContext) -> MinidumpContext { + MinidumpContext { + raw, + valid: MinidumpContextValidity::All, + } + } + + /// Read a `MinidumpContext` from `bytes`. + pub fn read( + bytes: &[u8], + endian: scroll::Endian, + system_info: &MinidumpSystemInfo, + _misc: Option<&MinidumpMiscInfo>, + ) -> Result { + use md::ProcessorArchitecture::*; + + let mut offset = 0; + + // Although every context contains `context_flags` which tell us what kind + // ok context we're handling, they aren't all in the same location, so we + // need to use SystemInfo to choose what kind of context to parse this as. + // We can then use the `context_flags` to validate our parse. + // We need to use the raw processor_architecture because system_info.cpu + // flattens away some key distinctions for this code. + match md::ProcessorArchitecture::from_u16(system_info.raw.processor_architecture) { + Some(PROCESSOR_ARCHITECTURE_INTEL) | Some(PROCESSOR_ARCHITECTURE_IA32_ON_WIN64) => { + // Not 100% sure IA32_ON_WIN64 is this format, but let's assume so? + let ctx: md::CONTEXT_X86 = bytes + .gread_with(&mut offset, endian) + .or(Err(ContextError::ReadFailure))?; + + let flags = ContextFlagsCpu::from_flags(ctx.context_flags); + if flags == ContextFlagsCpu::CONTEXT_X86 { + if ctx.context_flags & md::CONTEXT_HAS_XSTATE != 0 { + // FIXME: uses MISC_INFO_5 to parse out extra sections here + warn!("Cpu context has extra XSTATE that is being ignored"); + } + Ok(MinidumpContext::from_raw(MinidumpRawContext::X86(ctx))) + } else { + Err(ContextError::ReadFailure) + } + } + Some(PROCESSOR_ARCHITECTURE_AMD64) => { + let ctx: md::CONTEXT_AMD64 = bytes + .gread_with(&mut offset, endian) + .or(Err(ContextError::ReadFailure))?; + + let flags = ContextFlagsCpu::from_flags(ctx.context_flags); + if flags == ContextFlagsCpu::CONTEXT_AMD64 { + if ctx.context_flags & md::CONTEXT_HAS_XSTATE != 0 { + // FIXME: uses MISC_INFO_5 to parse out extra sections here + warn!("Cpu context has extra XSTATE that is being ignored"); + } + Ok(MinidumpContext::from_raw(MinidumpRawContext::Amd64(ctx))) + } else { + Err(ContextError::ReadFailure) + } + } + Some(PROCESSOR_ARCHITECTURE_PPC) => { + let ctx: md::CONTEXT_PPC = bytes + .gread_with(&mut offset, endian) + .or(Err(ContextError::ReadFailure))?; + + let flags = ContextFlagsCpu::from_flags(ctx.context_flags); + if flags == ContextFlagsCpu::CONTEXT_PPC { + Ok(MinidumpContext::from_raw(MinidumpRawContext::Ppc(ctx))) + } else { + Err(ContextError::ReadFailure) + } + } + Some(PROCESSOR_ARCHITECTURE_PPC64) => { + let ctx: md::CONTEXT_PPC64 = bytes + .gread_with(&mut offset, endian) + .or(Err(ContextError::ReadFailure))?; + + let flags = ContextFlagsCpu::from_flags(ctx.context_flags as u32); + if flags == ContextFlagsCpu::CONTEXT_PPC64 { + Ok(MinidumpContext::from_raw(MinidumpRawContext::Ppc64(ctx))) + } else { + Err(ContextError::ReadFailure) + } + } + Some(PROCESSOR_ARCHITECTURE_SPARC) => { + let ctx: md::CONTEXT_SPARC = bytes + .gread_with(&mut offset, endian) + .or(Err(ContextError::ReadFailure))?; + + let flags = ContextFlagsCpu::from_flags(ctx.context_flags); + if flags == ContextFlagsCpu::CONTEXT_SPARC { + Ok(MinidumpContext::from_raw(MinidumpRawContext::Sparc(ctx))) + } else { + Err(ContextError::ReadFailure) + } + } + Some(PROCESSOR_ARCHITECTURE_ARM) => { + let ctx: md::CONTEXT_ARM = bytes + .gread_with(&mut offset, endian) + .or(Err(ContextError::ReadFailure))?; + + let flags = ContextFlagsCpu::from_flags(ctx.context_flags); + if flags == ContextFlagsCpu::CONTEXT_ARM { + Ok(MinidumpContext::from_raw(MinidumpRawContext::Arm(ctx))) + } else { + Err(ContextError::ReadFailure) + } + } + Some(PROCESSOR_ARCHITECTURE_ARM64) => { + let ctx: md::CONTEXT_ARM64 = bytes + .gread_with(&mut offset, endian) + .or(Err(ContextError::ReadFailure))?; + + let flags = ContextFlagsCpu::from_flags(ctx.context_flags); + if flags == ContextFlagsCpu::CONTEXT_ARM64 { + Ok(MinidumpContext::from_raw(MinidumpRawContext::Arm64(ctx))) + } else { + Err(ContextError::ReadFailure) + } + } + Some(PROCESSOR_ARCHITECTURE_ARM64_OLD) => { + let ctx: md::CONTEXT_ARM64_OLD = bytes + .gread_with(&mut offset, endian) + .or(Err(ContextError::ReadFailure))?; + + let flags = ContextFlagsCpu::from_flags(ctx.context_flags as u32); + if flags == ContextFlagsCpu::CONTEXT_ARM64_OLD { + Ok(MinidumpContext::from_raw(MinidumpRawContext::OldArm64(ctx))) + } else { + Err(ContextError::ReadFailure) + } + } + Some(PROCESSOR_ARCHITECTURE_MIPS) => { + let ctx: md::CONTEXT_MIPS = bytes + .gread_with(&mut offset, endian) + .or(Err(ContextError::ReadFailure))?; + + let flags = ContextFlagsCpu::from_flags(ctx.context_flags); + if flags == ContextFlagsCpu::CONTEXT_MIPS { + Ok(MinidumpContext::from_raw(MinidumpRawContext::Mips(ctx))) + } else { + Err(ContextError::ReadFailure) + } + } + _ => Err(ContextError::UnknownCpuContext), + } + } + + pub fn get_instruction_pointer(&self) -> u64 { + match self.raw { + MinidumpRawContext::Amd64(ref ctx) => ctx.rip, + MinidumpRawContext::Arm(ref ctx) => { + ctx.iregs[md::ArmRegisterNumbers::ProgramCounter as usize] as u64 + } + MinidumpRawContext::Arm64(ref ctx) => ctx.pc, + MinidumpRawContext::OldArm64(ref ctx) => ctx.pc, + MinidumpRawContext::Ppc(ref ctx) => ctx.srr0 as u64, + MinidumpRawContext::Ppc64(ref ctx) => ctx.srr0, + MinidumpRawContext::Sparc(ref ctx) => ctx.pc, + MinidumpRawContext::X86(ref ctx) => ctx.eip as u64, + MinidumpRawContext::Mips(ref ctx) => ctx.epc, + } + } + + pub fn get_stack_pointer(&self) -> u64 { + match self.raw { + MinidumpRawContext::Amd64(ref ctx) => ctx.rsp, + MinidumpRawContext::Arm(ref ctx) => { + ctx.iregs[md::ArmRegisterNumbers::StackPointer as usize] as u64 + } + MinidumpRawContext::Arm64(ref ctx) => ctx.sp, + MinidumpRawContext::OldArm64(ref ctx) => ctx.sp, + MinidumpRawContext::Ppc(ref ctx) => { + ctx.gpr[md::PpcRegisterNumbers::StackPointer as usize] as u64 + } + MinidumpRawContext::Ppc64(ref ctx) => { + ctx.gpr[md::Ppc64RegisterNumbers::StackPointer as usize] + } + MinidumpRawContext::Sparc(ref ctx) => { + ctx.g_r[md::SparcRegisterNumbers::StackPointer as usize] + } + MinidumpRawContext::X86(ref ctx) => ctx.esp as u64, + MinidumpRawContext::Mips(ref ctx) => { + ctx.iregs[md::MipsRegisterNumbers::StackPointer as usize] + } + } + } + + pub fn get_register_always(&self, reg: &str) -> u64 { + match self.raw { + MinidumpRawContext::Amd64(ref ctx) => ctx.get_register_always(reg), + MinidumpRawContext::Arm(ref ctx) => ctx.get_register_always(reg).into(), + MinidumpRawContext::Arm64(ref ctx) => ctx.get_register_always(reg), + MinidumpRawContext::OldArm64(ref ctx) => ctx.get_register_always(reg), + MinidumpRawContext::Ppc(ref ctx) => ctx.get_register_always(reg).into(), + MinidumpRawContext::Ppc64(ref ctx) => ctx.get_register_always(reg), + MinidumpRawContext::Sparc(ref ctx) => ctx.get_register_always(reg), + MinidumpRawContext::X86(ref ctx) => ctx.get_register_always(reg).into(), + MinidumpRawContext::Mips(ref ctx) => ctx.get_register_always(reg), + } + } + + pub fn get_register(&self, reg: &str) -> Option { + let valid = match &self.raw { + MinidumpRawContext::X86(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::Ppc(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::Ppc64(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::Amd64(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::Sparc(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::Arm(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::Arm64(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::OldArm64(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::Mips(ctx) => ctx.register_is_valid(reg, &self.valid), + }; + + if valid { + Some(self.get_register_always(reg)) + } else { + None + } + } + + pub fn format_register(&self, reg: &str) -> String { + match self.raw { + MinidumpRawContext::Amd64(ref ctx) => ctx.format_register(reg), + MinidumpRawContext::Arm(ref ctx) => ctx.format_register(reg), + MinidumpRawContext::Arm64(ref ctx) => ctx.format_register(reg), + MinidumpRawContext::OldArm64(ref ctx) => ctx.format_register(reg), + MinidumpRawContext::Ppc(ref ctx) => ctx.format_register(reg), + MinidumpRawContext::Ppc64(ref ctx) => ctx.format_register(reg), + MinidumpRawContext::Sparc(ref ctx) => ctx.format_register(reg), + MinidumpRawContext::X86(ref ctx) => ctx.format_register(reg), + MinidumpRawContext::Mips(ref ctx) => ctx.format_register(reg), + } + } + + pub fn general_purpose_registers(&self) -> &'static [&'static str] { + match self.raw { + MinidumpRawContext::Amd64(_) => md::CONTEXT_AMD64::REGISTERS, + MinidumpRawContext::Arm(_) => md::CONTEXT_ARM::REGISTERS, + MinidumpRawContext::Arm64(_) => md::CONTEXT_ARM64::REGISTERS, + MinidumpRawContext::OldArm64(_) => md::CONTEXT_ARM64::REGISTERS, + MinidumpRawContext::Ppc(_) => md::CONTEXT_PPC::REGISTERS, + MinidumpRawContext::Ppc64(_) => md::CONTEXT_PPC64::REGISTERS, + MinidumpRawContext::Sparc(_) => md::CONTEXT_SPARC::REGISTERS, + MinidumpRawContext::X86(_) => md::CONTEXT_X86::REGISTERS, + MinidumpRawContext::Mips(_) => md::CONTEXT_MIPS::REGISTERS, + } + } + + pub fn registers(&self) -> impl Iterator + '_ { + self.general_purpose_registers() + .iter() + .map(move |®| (reg, self.get_register_always(reg))) + } + + pub fn valid_registers(&self) -> impl Iterator + '_ { + // This is suboptimal in theory, as we could iterate over self.valid just like the original + // and faster `CpuRegisters` iterator does. However, this complicates code here, and the + // minimal gain in performance hasn't been worth the added complexity. + self.registers().filter(move |(reg, _)| match &self.raw { + MinidumpRawContext::X86(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::Ppc(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::Ppc64(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::Amd64(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::Sparc(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::Arm(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::Arm64(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::OldArm64(ctx) => ctx.register_is_valid(reg, &self.valid), + MinidumpRawContext::Mips(ctx) => ctx.register_is_valid(reg, &self.valid), + }) + } + + /// Get the size (in bytes) of general-purpose registers. + pub fn register_size(&self) -> usize { + fn get(_: &T) -> usize { + std::mem::size_of::() + } + + match &self.raw { + MinidumpRawContext::X86(ctx) => get(ctx), + MinidumpRawContext::Ppc(ctx) => get(ctx), + MinidumpRawContext::Ppc64(ctx) => get(ctx), + MinidumpRawContext::Amd64(ctx) => get(ctx), + MinidumpRawContext::Sparc(ctx) => get(ctx), + MinidumpRawContext::Arm(ctx) => get(ctx), + MinidumpRawContext::Arm64(ctx) => get(ctx), + MinidumpRawContext::OldArm64(ctx) => get(ctx), + MinidumpRawContext::Mips(ctx) => get(ctx), + } + } + + /// Write a human-readable description of this `MinidumpContext` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + match self.raw { + MinidumpRawContext::X86(ref raw) => { + write!( + f, + r#"CONTEXT_X86 + context_flags = {:#x} + dr0 = {:#x} + dr1 = {:#x} + dr2 = {:#x} + dr3 = {:#x} + dr6 = {:#x} + dr7 = {:#x} + float_save.control_word = {:#x} + float_save.status_word = {:#x} + float_save.tag_word = {:#x} + float_save.error_offset = {:#x} + float_save.error_selector = {:#x} + float_save.data_offset = {:#x} + float_save.data_selector = {:#x} + float_save.register_area[{:2}] = 0x"#, + raw.context_flags, + raw.dr0, + raw.dr1, + raw.dr2, + raw.dr3, + raw.dr6, + raw.dr7, + raw.float_save.control_word, + raw.float_save.status_word, + raw.float_save.tag_word, + raw.float_save.error_offset, + raw.float_save.error_selector, + raw.float_save.data_offset, + raw.float_save.data_selector, + raw.float_save.register_area.len(), + )?; + write_bytes(f, &raw.float_save.register_area)?; + writeln!(f)?; + write!( + f, + r#" float_save.cr0_npx_state = {:#x} + gs = {:#x} + fs = {:#x} + es = {:#x} + ds = {:#x} + edi = {:#x} + esi = {:#x} + ebx = {:#x} + edx = {:#x} + ecx = {:#x} + eax = {:#x} + ebp = {:#x} + eip = {:#x} + cs = {:#x} + eflags = {:#x} + esp = {:#x} + ss = {:#x} + extended_registers[{:3}] = 0x"#, + raw.float_save.cr0_npx_state, + raw.gs, + raw.fs, + raw.es, + raw.ds, + raw.edi, + raw.esi, + raw.ebx, + raw.edx, + raw.ecx, + raw.eax, + raw.ebp, + raw.eip, + raw.cs, + raw.eflags, + raw.esp, + raw.ss, + raw.extended_registers.len(), + )?; + write_bytes(f, &raw.extended_registers)?; + write!(f, "\n\n")?; + } + MinidumpRawContext::Ppc(_) => { + unimplemented!(); + } + MinidumpRawContext::Ppc64(_) => { + unimplemented!(); + } + MinidumpRawContext::Amd64(ref raw) => { + write!( + f, + r#"CONTEXT_AMD64 + p1_home = {:#x} + p2_home = {:#x} + p3_home = {:#x} + p4_home = {:#x} + p5_home = {:#x} + p6_home = {:#x} + context_flags = {:#x} + mx_csr = {:#x} + cs = {:#x} + ds = {:#x} + es = {:#x} + fs = {:#x} + gs = {:#x} + ss = {:#x} + eflags = {:#x} + dr0 = {:#x} + dr1 = {:#x} + dr2 = {:#x} + dr3 = {:#x} + dr6 = {:#x} + dr7 = {:#x} + rax = {:#x} + rcx = {:#x} + rdx = {:#x} + rbx = {:#x} + rsp = {:#x} + rbp = {:#x} + rsi = {:#x} + rdi = {:#x} + r8 = {:#x} + r9 = {:#x} + r10 = {:#x} + r11 = {:#x} + r12 = {:#x} + r13 = {:#x} + r14 = {:#x} + r15 = {:#x} + rip = {:#x} + +"#, + raw.p1_home, + raw.p2_home, + raw.p3_home, + raw.p4_home, + raw.p5_home, + raw.p6_home, + raw.context_flags, + raw.mx_csr, + raw.cs, + raw.ds, + raw.es, + raw.fs, + raw.gs, + raw.ss, + raw.eflags, + raw.dr0, + raw.dr1, + raw.dr2, + raw.dr3, + raw.dr6, + raw.dr7, + raw.rax, + raw.rcx, + raw.rdx, + raw.rbx, + raw.rsp, + raw.rbp, + raw.rsi, + raw.rdi, + raw.r8, + raw.r9, + raw.r10, + raw.r11, + raw.r12, + raw.r13, + raw.r14, + raw.r15, + raw.rip, + )?; + } + MinidumpRawContext::Sparc(_) => { + unimplemented!(); + } + MinidumpRawContext::Arm(ref raw) => { + write!( + f, + r#"CONTEXT_ARM + context_flags = {:#x} +"#, + raw.context_flags + )?; + for (i, reg) in raw.iregs.iter().enumerate() { + writeln!(f, " iregs[{i:2}] = {reg:#x}")?; + } + write!( + f, + r#" cpsr = {:#x} + float_save.fpscr = {:#x} +"#, + raw.cpsr, raw.float_save.fpscr + )?; + for (i, reg) in raw.float_save.regs.iter().enumerate() { + writeln!(f, " float_save.regs[{i:2}] = {reg:#x}")?; + } + for (i, reg) in raw.float_save.extra.iter().enumerate() { + writeln!(f, " float_save.extra[{i:2}] = {reg:#x}")?; + } + } + MinidumpRawContext::Arm64(ref raw) => { + write!( + f, + r#"CONTEXT_ARM64 + context_flags = {:#x} +"#, + raw.context_flags + )?; + for (i, reg) in raw.iregs[..29].iter().enumerate() { + writeln!(f, " x{i:<2} = {reg:#x}")?; + } + writeln!(f, " x29 (fp) = {:#x}", raw.iregs[29])?; + writeln!(f, " x30 (lr) = {:#x}", raw.iregs[30])?; + writeln!(f, " sp = {:#x}", raw.sp)?; + writeln!(f, " pc = {:#x}", raw.pc)?; + writeln!(f, " cpsr = {:#x}", raw.cpsr)?; + writeln!(f, " fpsr = {:#x}", raw.fpsr)?; + writeln!(f, " fpcr = {:#x}", raw.fpcr)?; + for (i, reg) in raw.float_regs.iter().enumerate() { + writeln!(f, " d{i:<2} = {reg:#x}")?; + } + for (i, reg) in raw.bcr.iter().enumerate() { + writeln!(f, " bcr[{i:2}] = {reg:#x}")?; + } + for (i, reg) in raw.bvr.iter().enumerate() { + writeln!(f, " bvr[{i:2}] = {reg:#x}")?; + } + for (i, reg) in raw.wcr.iter().enumerate() { + writeln!(f, " wcr[{i:2}] = {reg:#x}")?; + } + for (i, reg) in raw.wvr.iter().enumerate() { + writeln!(f, " wvr[{i:2}] = {reg:#x}")?; + } + } + MinidumpRawContext::OldArm64(ref raw) => { + write!( + f, + r#"CONTEXT_ARM64_OLD + context_flags = {:#x} +"#, + raw.context_flags + )?; + for (i, reg) in raw.iregs[..29].iter().enumerate() { + writeln!(f, " x{i:<2} = {reg:#x}")?; + } + writeln!(f, " x29 (fp) = {:#x}", raw.iregs[29])?; + writeln!(f, " x30 (lr) = {:#x}", raw.iregs[30])?; + writeln!(f, " sp = {:#x}", raw.sp)?; + writeln!(f, " pc = {:#x}", raw.pc)?; + writeln!(f, " cpsr = {:#x}", raw.cpsr)?; + writeln!(f, " fpsr = {:#x}", raw.fpsr)?; + writeln!(f, " fpcr = {:#x}", raw.fpcr)?; + for (i, reg) in raw.float_regs.iter().enumerate() { + writeln!(f, " d{i:<2} = {reg:#x}")?; + } + } + MinidumpRawContext::Mips(ref raw) => { + write!( + f, + r#"CONTEXT_MIPS + context_flags = {:#x} +"#, + raw.context_flags + )?; + + use md::MipsRegisterNumbers; + const MIPS_REGS: &[MipsRegisterNumbers] = &[ + MipsRegisterNumbers::S0, + MipsRegisterNumbers::S1, + MipsRegisterNumbers::S2, + MipsRegisterNumbers::S3, + MipsRegisterNumbers::S4, + MipsRegisterNumbers::S5, + MipsRegisterNumbers::S6, + MipsRegisterNumbers::S7, + MipsRegisterNumbers::GlobalPointer, + MipsRegisterNumbers::StackPointer, + MipsRegisterNumbers::FramePointer, + MipsRegisterNumbers::ReturnAddress, + ]; + for reg in MIPS_REGS { + writeln!( + f, + r#" {} = {:#x}"#, + reg.name(), + raw.iregs[*reg as usize] + )?; + } + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + /// Smoke test for the default implementation of `memoize_register`. + fn test_memoize_amd64() { + let context = md::CONTEXT_AMD64::default(); + assert_eq!(context.memoize_register("rip"), Some("rip")); + assert_eq!(context.memoize_register("foo"), None); + } + + #[test] + /// Test ARM register aliases by example of `fp`. + fn test_memoize_arm_alias() { + let context = md::CONTEXT_ARM::default(); + assert_eq!(context.memoize_register("r11"), Some("fp")); + assert_eq!(context.memoize_register("fp"), Some("fp")); + assert_eq!(context.memoize_register("foo"), None); + } + + #[test] + /// Test ARM register aliases by example of `fp`. + fn test_memoize_arm64_alias() { + let context = md::CONTEXT_ARM64::default(); + assert_eq!(context.memoize_register("x29"), Some("fp")); + assert_eq!(context.memoize_register("fp"), Some("fp")); + assert_eq!(context.memoize_register("foo"), None); + } +} diff --git a/third_party/rust/minidump/src/iostuff.rs b/third_party/rust/minidump/src/iostuff.rs new file mode 100644 index 000000000000..b7bf30f78171 --- /dev/null +++ b/third_party/rust/minidump/src/iostuff.rs @@ -0,0 +1,19 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +//! General-purpose I/O routines. + +use std::io; +use std::io::prelude::*; + +/// Shorthand for Read + Seek +pub trait Readable: Read + Seek {} +impl Readable for T {} + +/// Format `bytes` to `f` as a hex string. +pub fn write_bytes(f: &mut T, bytes: &[u8]) -> io::Result<()> { + for b in bytes { + write!(f, "{b:02x}")?; + } + Ok(()) +} diff --git a/third_party/rust/minidump/src/lib.rs b/third_party/rust/minidump/src/lib.rs new file mode 100644 index 000000000000..e4913d33a592 --- /dev/null +++ b/third_party/rust/minidump/src/lib.rs @@ -0,0 +1,418 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +//! A parser for the minidump file format. +//! +//! The `minidump` module provides a parser for the +//! [minidump][minidump] file format as produced by Microsoft's +//! [`MinidumpWriteDump`][minidumpwritedump] API and the +//! [Google Breakpad][breakpad] library. +//! +//! +//! +//! # Usage +//! +//! The primary API for this library is the [`Minidump`][] struct, which can be +//! instantiated by calling the [`Minidump::read`][] or [`Minidump::read_path`][] +//! methods. +//! +//! Successfully parsing a Minidump struct means the minidump has a minimally valid +//! header and stream directory. Individual streams are only parsed when they're +//! requested. +//! +//! Although you may enumerate the streams in a minidump with methods like +//! [`Minidump::all_streams`][], this is only really useful for debugging. Instead +//! you should statically request streams with [`Minidump::get_stream`][]. +//! Depending on what analysis you're trying to perform, you may: +//! +//! * Consider it an error for a stream to be missing (using `?` or `unwrap`) +//! * Branch on the presence of stream to conditionally refine your analysis +//! * Use a stream's `Default` implementation to get an "empty" instance +//! (with `unwrap_or_default`) +//! +//! ``` +//! use minidump::*; +//! +//! fn main() -> Result<(), Error> { +//! // Read the minidump from a file +//! let mut dump = minidump::Minidump::read_path("../testdata/test.dmp")?; +//! +//! // Statically request (and require) several streams we care about: +//! let system_info = dump.get_stream::()?; +//! let exception = dump.get_stream::()?; +//! +//! // Combine the contents of the streams to perform more refined analysis +//! let crash_reason = exception.get_crash_reason(system_info.os, system_info.cpu); +//! +//! // Conditionally analyze a stream +//! if let Ok(threads) = dump.get_stream::() { +//! // Use `Default` to try to make progress when a stream is missing. +//! // This is especially natural for MinidumpMemoryList because +//! // everything needs to handle memory lookups failing anyway. +//! let mem = dump.get_memory().unwrap_or_default(); +//! +//! for thread in &threads.threads { +//! let stack = thread.stack_memory(&mem); +//! // ... +//! } +//! } +//! +//! Ok(()) +//! } +//! ``` +//! +//! Generally speaking, there isn't any reason to distinguish between a stream being +//! absent and it being corrupt. Just ask for what you want and we'll do our best +//! to give it to you. +//! +//! Everything else you would want to do with a Minidump is specific to the +//! individual streams: +//! +//! * [`MinidumpAssertion`][] +//! * [`MinidumpBreakpadInfo`][] +//! * [`MinidumpCrashpadInfo`][] +//! * [`MinidumpException`][] +//! * [`MinidumpLinuxCpuInfo`][] +//! * [`MinidumpLinuxEnviron`][] +//! * [`MinidumpLinuxLsbRelease`][] +//! * [`MinidumpLinuxMaps`][] +//! * [`MinidumpLinuxProcStatus`][] +//! * [`MinidumpMacCrashInfo`][] +//! * [`MinidumpMacBootargs`][] +//! * [`MinidumpMemoryList`][] +//! * [`MinidumpMemoryInfoList`][] +//! * [`MinidumpMiscInfo`][] +//! * [`MinidumpModuleList`][] +//! * [`MinidumpSystemInfo`][] +//! * [`MinidumpThreadList`][] +//! * [`MinidumpThreadNames`][] +//! * [`MinidumpUnloadedModuleList`][] +//! * [`MinidumpLinuxProcLimits`][] +//! +//! +//! +//! +//! # Notable Streams +//! +//! There's a lot of different Minidump Streams, but some are especially +//! notable/fundamental: +//! +//! [`MinidumpSystemInfo`][] includes details about the hardware and operating +//! system that the crash occured on. This information is often required to +//! properly interpret the other streams of the minidump, as they contain +//! platform-specific values. +//! +//! [`MinidumpException`][] includes actual details about where and why the crash +//! occured. +//! +//! [`MinidumpThreadList`][] includes the registers and stack memory of every thread +//! in the program at the time of the crash. This enables generating backtraces for +//! every thread. +//! +//! [`MinidumpMemoryList`][] maps the crashing program's runtime addresses (such as +//! `$rsp`) to ranges of memory in the Minidump. +//! +//! [`MinidumpModuleList`][] includes info on all the modules (libraries) that were +//! linked into the crashing program. This enables symbolication, as you can map +//! instruction addresses back to offsets in a specific library's binary. +//! +//! +//! +//! +//! # What is a Minidump? +//! +//! Minidumps capture the state of a crashing process (threads, stack memory, +//! registers, dlls), why it crashed (crashing thread, error codes, error +//! messages), and details about the system the program was running on (os, cpu). +//! +//! The information in a minidump is divided up into a series of +//! independent "streams". If you want a specific piece of information, you must +//! know the stream that contains it, and then look up that stream in the +//! minidump's directory. Most streams are pretty straight-forward -- you can guess +//! what you might find in [`MinidumpThreadList`][] or [`MinidumpSystemInfo`][] +//! -- but others -- like [`MinidumpMiscInfo`][] -- are a bit more random. +//! +//! This [format][minidump] was initially defined by Microsoft, as Windows has long +//! included [system apis to generate minidumps][minidumpwritedump]. But lots of +//! software gets made for operating systems other than Windows, where no such +//! native support for minidumps is present. [google-breakpad][breakpad] was +//! created to extend Microsoft's minidump format to other platforms, and defines +//! minidump generators for things like Linux and MacOS. +//! +//! I do not believe that Microsoft and Breakpad officially collaborate on the +//! format, it's just designed to be very extensible, so it's easy to add random +//! stuff to a minidump in ways that don't break old tools and likely won't +//! interfere with future versions. That said, Microsoft does now develop +//! cross-platform products that make use of Breakpad, such as VSCode, so at very +//! least their crash reporting infra deals with Breakpad minidumps. +//! +//! The rust-minidump crates are specifically designed to support Breakpad's +//! extended minidump format (and native Windows minidumps, which should in theory +//! just be a subset). That said, rust-minidump doesn't yet (and probably won't +//! ever) support *everything*. There's a lot of random stuff that either Microsoft +//! or Breakpad have defined over the years that we just, do not have any use for +//! at the moment. Not a lot of demand for handling minidumps for PlayStation 3, +//! SPARC, or Windows CE these days. +//! +//! +//! +//! +//! +//! # The Minidump Format +//! +//! This section is dedicated to describing how to parse minidumps, for anyone +//! wanting to maintain this code or write their own parser. +//! +//! Minidumps are a binary format. This format is simultaneously very simple and +//! very complicated. +//! +//! The simple part of a minidump is that it's basically just an array of pointers +//! to different typed "Streams" (system info, exception info, threads, memory +//! mappings, etc.). So if you want to lookup the system info, you just search the +//! array for a system info stream and interpret that range of memory as that +//! stream. +//! +//! The complicated part of a minidump is the fact that every stream contains +//! totally different information in totally different formats. Sure, there are +//! families of streams that have the same general structure, but you've still got +//! to write custom code to interpret the values meaningfully and figure out what +//! on earth that information is useful for. +//! +//! Sometimes the answer to "what is it useful for?" is "I don't know but maybe +//! we'll find a use for it later". This is genuinely useful because it allows us +//! to add new analyses long after a crash occurs and gain new insights that the +//! minidump format wasn't explicitly designed to provide. +//! +//! This is all to say that, beyond the basic layout of the minidump header and +//! directory, it's basically just a big ball of random formats with independent +//! formats and layout -- and everyone is technically free to come up with their +//! own custom Streams that they can just toss in there, so trying to cover +//! everything is kind of impossible? Lets see how far we get! +//! +//! +//! +//! ## The Minidump Header and Directory +//! +//! The first thing in a Minidump is the [`MINIDUMP_HEADER`][format::MINIDUMP_HEADER], which has the +//! following layout: +//! +//! ``` +//! pub struct MINIDUMP_HEADER { +//! pub signature: u32, +//! pub version: u32, +//! pub stream_count: u32, +//! pub stream_directory_rva: RVA, +//! pub checksum: u32, +//! pub time_date_stamp: u32, +//! pub flags: u64, +//! } +//! +//! /// Offset into the minidump +//! pub type RVA = u32; +//! ``` +//! +//! The `signature` is always [`MINIDUMP_SIGNATURE`][format::MINIDUMP_SIGNATURE] = `0x504d444d` +//! ("MDMP" in ascii). You can use this to detect whether the minidump is little-endian or +//! big-endian (minidumps always have the endianess of platform they were generated +//! on, since they contain lots of raw memory from the process, but at this point +//! we don't know what that platform is). +//! +//! The lower 16 bits of `version` are always +//! [`MINIDUMP_VERSION`][format::MINIDUMP_VERSION] = 42899. +//! (The high bits contain implementation-specific values that you should just +//! ignore). +//! +//! `stream_directory_rva` and `stream_count` are the location (offset from the +//! start of the file, in bytes) and size of the stream directory, respectively. +//! +//! `checksum` is some kind of checksum of the minidump itself (which may be null), +//! but the algorithm isn't specified, and rust-minidump doesn't check it. +//! +//! `time_date_stamp` is a Windows `time_t` of when the miniump was generated. +//! +//! `flags` are a [`MINIDUMP_TYPE`][MINIDUMP_TYPE] which largely just specify what you can expect +//! to find in the minidump. This is unused by rust-minidump since this information +//! is generally redundant with the stream directory and flags within the streams +//! that we need to check anyway. (e.g. instead of checking that this is a +//! `MiniDumpWithUnloadedModules`, you can just check the directory for the +//! [`MinidumpUnloadedModuleList`][] stream.) +//! +//! At `stream_directory_rva` (typically immediately after the header) you will find +//! an array of `stream_count` [`MINIDUMP_DIRECTORY`][format::MINIDUMP_DIRECTORY] entries, +//! with the following layout: +//! +//! ``` +//! pub struct MINIDUMP_DIRECTORY { +//! /// The type of the stream +//! pub stream_type: u32, +//! /// The location of the stream contents within the dump. +//! pub location: MINIDUMP_LOCATION_DESCRIPTOR, +//! } +//! +//! /// A "slice" of the minidump +//! pub struct MINIDUMP_LOCATION_DESCRIPTOR { +//! /// The size of this data (in bytes) +//! pub data_size: u32, +//! /// The offset to this data within the minidump file. +//! pub rva: RVA, +//! } +//! +//! /// Offset into the minidump +//! pub type RVA = u32; +//! ``` +//! +//! Known `stream_type` values are defined in +//! [`MINIDUMP_STREAM_TYPE`][format::MINIDUMP_STREAM_TYPE], but users +//! are allowed to define their own stream types, so it's normal to see unknown +//! types (this is the primary mechanism breakpad uses to extend the format without +//! causing upstream problems). +//! +//! And that's it! Everything else in a minidump is just all the different types of +//! stream. As of this writing, rust-minidump is aware of 51 different types of +//! stream, and implements 18 of them (there's a long tail of platform-specific and +//! domain-specific streams, so that isn't as bad as it sounds). +//! +//! +//! +//! +//! ## Stream Format Families +//! +//! Although every stream can do whatever it wants, there's a lot of streams that +//! are basically "a struct" or "a list of structs", so the same header formats and +//! layouts are used in several places. (This is descriptive, so these aren't +//! necessarily official terms/concepts.) +//! +//! +//! +//! ### Plain Old Struct Streams +//! +//! A stream that's just a struct. +//! +//! That's it. Just read the struct out of the stream. Although it might contain +//! RVAs to other data, which may or may not be relative to the start of the stream +//! or the start of the file (annoyingly inconsistent between streams). +//! +//! Known members of this family: +//! +//! * [`MinidumpAssertion`][] (contains [`MINIDUMP_ASSERTION_INFO`][format::MINIDUMP_ASSERTION_INFO]) +//! * [`MinidumpBreakpadInfo`][] (contains [`MINIDUMP_BREAKPAD_INFO`][format::MINIDUMP_BREAKPAD_INFO]) +//! * [`MinidumpCrashpadInfo`][] (contains [`MINIDUMP_CRASHPAD_INFO`][format::MINIDUMP_CRASHPAD_INFO]) +//! * [`MinidumpException`][] (contains [`MINIDUMP_EXCEPTION_STREAM`][format::MINIDUMP_EXCEPTION_STREAM]) +//! * [`MinidumpSystemInfo`][] (contains [`MINIDUMP_SYSTEM_INFO`][format::MINIDUMP_SYSTEM_INFO]) +//! +//! +//! +//! ### List Streams +//! +//! A list of some entry type. +//! +//! A `u32` count of entries followed by an array of entries. There may be padding +//! between the count and the entries. The array should be "right-justified" in the +//! stream (the stream ends exactly where the array does), so you can use the +//! difference between the array's expected size and the rest of the stream's size +//! to determine the padding. +//! +//! This format is used by a lot of the oldest (and therefore most important) +//! minidump streams. +//! +//! Known members of this family: +//! +//! * [`MinidumpMemoryList`] (entries are [`MINIDUMP_MEMORY_DESCRIPTOR`][format::MINIDUMP_MEMORY_DESCRIPTOR]) +//! * [`MinidumpModuleList`] (entries are [`MINIDUMP_MODULE`][format::MINIDUMP_MODULE]) +//! * [`MinidumpThreadList`] (entries are [`MINIDUMP_THREAD`][format::MINIDUMP_THREAD]) +//! * [`MinidumpThreadNames`] (entries are [`MINIDUMP_THREAD_NAME`][format::MINIDUMP_THREAD_NAME]) +//! * `MINIDUMP_THREAD_EX_LIST` (yes, the stream with "EX_LIST" in the name isn't an +//! EX list, names are hard.) +//! +//! The stream [`MinidumpMemory64List`] is a variant of list stream. It starts with +//! a `u64` count of entries, a 64-bit shared RVA for all entries, then followed by +//! an array of entires [`MINIDUMP_MEMORY_DESCRIPTOR64`][format::MINIDUMP_MEMORY_DESCRIPTOR64]. +//! +//! +//! ### EX List Streams +//! +//! A newer and more flexible version of list streams. (so EXtreme!!!) +//! +//! EX list streams start with this header: +//! +//! ``` +//! struct EX_LIST_HEADER { +//! /// Size (in bytes) of this header (array starts immediately after) +//! pub size_of_header: u32, +//! /// Size (in bytes) of an entry in the array +//! pub size_of_entry: u32, +//! /// The number of entries in the array +//! pub number_of_entries: u32, +//! } +//! ``` +//! +//! This design allows newer versions of the stream to be introduced, and for fields +//! to be added to the end of an entry type. I am not aware of an instance where +//! this flexibility has been used yet, but in theory you could identify "versions" +//! of the stream format by size, and older versions don't need to worry about +//! unknown future revisions, because they can just ignore the trailing bytes of +//! each entry. +//! +//! Known members of this family: +//! +//! * [`MinidumpMemoryInfoList`][] (entries are [`MINIDUMP_MEMORY_INFO`][format::MINIDUMP_MEMORY_INFO]) +//! * [`MinidumpUnloadedModuleList`][] (entries are [`MINIDUMP_UNLOADED_MODULE`][format::MINIDUMP_UNLOADED_MODULE]) +//! * [`MinidumpHandleDataStream`][] is a slight variation of this format with different +//! filed names and a trailing `u32` member reserved for future use (entries +//! are [`MINIDUMP_HANDLE_DESCRIPTOR`][format::MINIDUMP_HANDLE_DESCRIPTOR] and +//! [`MINIDUMP_HANDLE_DESCRIPTOR_2`][format::MINIDUMP_HANDLE_DESCRIPTOR_2]) +//! * [`MinidumpThreadInfoList`][] (entries are [`MINIDUMP_THREAD_INFO`][format::MINIDUMP_THREAD_INFO]) +//! +//! +//! +//! ### Linux List Streams +//! +//! A dump of a special linux file like `/proc/cpuinfo`. +//! +//! These streams are plain text ([`strings::LinuxOsString`][]) files containing +//! line-delimited key-value pairs, like: +//! +//! ```text +//! processor : 0 +//! vendor_id : GenuineIntel +//! cpu family : 6 +//! model : 45 +//! model name : Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz +//! ``` +//! +//! Whitespace and separators vary from stream to stream. +//! +//! Known members of this family: +//! +//! * [`MinidumpLinuxCpuInfo`][] (separator is `:`) +//! * [`MinidumpLinuxEnviron`][] (separator is `=`) +//! * [`MinidumpLinuxLsbRelease`][] (separator is `=`) +//! * [`MinidumpLinuxProcStatus`][] (separator is `:`) +//! * [`MinidumpLinuxProcLimits`][] (separator is ` `) +//! +//! +//! +//! [MINIDUMP_TYPE]: https://docs.microsoft.com/en-us/windows/win32/api/minidumpapiset/ne-minidumpapiset-minidump_type +//! [minidump]: https://msdn.microsoft.com/en-us/library/windows/desktop/ms680369%28v=vs.85%29.aspx +//! [minidumpwritedump]: https://msdn.microsoft.com/en-us/library/windows/desktop/ms680360%28v=vs.85%29.aspx +//! [breakpad]: https://chromium.googlesource.com/breakpad/breakpad/+/master/ + +#![warn(missing_debug_implementations)] + +#[cfg(doctest)] +doc_comment::doctest!("../README.md"); + +pub use scroll::Endian; + +mod context; +mod iostuff; +mod minidump; + +pub use minidump_common::format; +pub use minidump_common::traits::Module; + +pub use crate::iostuff::Readable; +pub use crate::minidump::*; + +pub mod strings; +pub mod system_info; diff --git a/third_party/rust/minidump/src/minidump.rs b/third_party/rust/minidump/src/minidump.rs new file mode 100644 index 000000000000..ffc2ed111cfb --- /dev/null +++ b/third_party/rust/minidump/src/minidump.rs @@ -0,0 +1,7412 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +use debugid::{CodeId, DebugId}; +use memmap2::Mmap; +use num_traits::FromPrimitive; +use procfs_core::prelude::*; +use procfs_core::process::{MMPermissions, MemoryMap, MemoryMaps}; +use scroll::ctx::{SizeWith, TryFromCtx}; +use scroll::{Pread, BE, LE}; +use std::borrow::Cow; +use std::collections::BTreeMap; +use std::collections::HashMap; +use std::convert::TryInto; +use std::fmt; +use std::fs::File; +use std::io; +use std::io::prelude::*; +use std::iter; +use std::marker::PhantomData; +use std::mem; +use std::ops::Deref; +use std::path::Path; +use std::str; +use std::time::{Duration, SystemTime}; +use tracing::warn; +use uuid::Uuid; + +pub use crate::context::*; +use crate::strings::*; +use crate::system_info::{Cpu, Os, PointerWidth}; +use minidump_common::errors::{self as err}; +use minidump_common::format::{self as md}; +use minidump_common::format::{CvSignature, MINIDUMP_STREAM_TYPE}; +use minidump_common::traits::{IntoRangeMapSafe, Module}; +use range_map::{Range, RangeMap}; +use time::format_description::well_known::Rfc3339; + +/// An index into the contents of a minidump. +/// +/// The `Minidump` struct represents the parsed header and +/// indices contained at the start of a minidump file. It can be instantiated +/// by calling the [`Minidump::read`][read] or +/// [`Minidump::read_path`][read_path] methods. +/// +/// # Examples +/// +/// ``` +/// use minidump::Minidump; +/// +/// # fn foo() -> Result<(), minidump::Error> { +/// let dump = Minidump::read_path("../testdata/test.dmp")?; +/// # Ok(()) +/// # } +/// ``` +/// +/// [read]: struct.Minidump.html#method.read +/// [read_path]: struct.Minidump.html#method.read_path +#[derive(Debug)] +pub struct Minidump<'a, T> +where + T: Deref + 'a, +{ + data: T, + /// The raw minidump header from the file. + pub header: md::MINIDUMP_HEADER, + streams: BTreeMap, + system_info: Option, + /// The endianness of this minidump file. + pub endian: scroll::Endian, + _phantom: PhantomData<&'a [u8]>, +} + +/// Errors encountered while reading a `Minidump`. +#[derive(Clone, Debug, thiserror::Error, PartialEq, Eq)] +pub enum Error { + #[error("File not found")] + FileNotFound, + #[error("I/O error")] + IoError, + #[error("Missing minidump header (empty minidump?)")] + MissingHeader, + #[error("Header mismatch")] + HeaderMismatch, + #[error("Minidump version mismatch")] + VersionMismatch, + #[error("Missing stream directory (heavily truncated minidump?)")] + MissingDirectory, + #[error("Error reading stream")] + StreamReadFailure, + #[error("Stream size mismatch: expected {expected} bytes, found {actual} bytes")] + StreamSizeMismatch { expected: usize, actual: usize }, + #[error("Stream not found")] + StreamNotFound, + #[error("Module read failure")] + ModuleReadFailure, + #[error("Memory read failure")] + MemoryReadFailure, + #[error("Data error")] + DataError, + #[error("Error reading CodeView data")] + CodeViewReadFailure, + #[error("Uknown element type")] + UknownElementType, +} + +impl Error { + /// Returns just the name of the error, as a more human-friendly version of + /// an error-code for error logging. + pub fn name(&self) -> &'static str { + match self { + Error::FileNotFound => "FileNotFound", + Error::IoError => "IoError", + Error::MissingHeader => "MissingHeader", + Error::HeaderMismatch => "HeaderMismatch", + Error::VersionMismatch => "VersionMismatch", + Error::MissingDirectory => "MissingDirectory", + Error::StreamReadFailure => "StreamReadFailure", + Error::StreamSizeMismatch { .. } => "StreamSizeMismatch", + Error::StreamNotFound => "StreamNotFound", + Error::ModuleReadFailure => "ModuleReadFailure", + Error::MemoryReadFailure => "MemoryReadFailure", + Error::DataError => "DataError", + Error::CodeViewReadFailure => "CodeViewReadFailure", + Error::UknownElementType => "UnknownElementType", + } + } +} + +/// The fundamental unit of data in a `Minidump`. +pub trait MinidumpStream<'a>: Sized { + /// The stream type constant used in the `md::MDRawDirectory` entry. + /// This is usually a [MINIDUMP_STREAM_TYPE][] but it's left as a u32 + /// to allow external projects to add support for their own custom streams. + const STREAM_TYPE: u32; + + /// Read this `MinidumpStream` type from `bytes`. + /// + /// * `bytes` is the contents of this specific stream. + /// * `all` refers to the full contents of the minidump, for reading auxilliary data + /// referred to with `MINIDUMP_LOCATION_DESCRIPTOR`s. + /// * `system_info` is the preparsed SystemInfo stream, if it exists in the minidump. + fn read( + bytes: &'a [u8], + all: &'a [u8], + endian: scroll::Endian, + system_info: Option<&MinidumpSystemInfo>, + ) -> Result; +} + +/// Provides a unified interface for getting metadata about the process's mapped memory regions +/// at the time of the crash. +/// +/// Currently this is one of [`MinidumpMemoryInfoList`], available in Windows minidumps, +/// or [`MinidumpLinuxMaps`], available in Linux minidumps. +/// +/// This allows you to e.g. check whether an address was executable or not without +/// worrying about which platform the crash occured on. If you need to do more +/// specific analysis, you can get the native formats with [`UnifiedMemoryInfoList::info`] +/// and [`UnifiedMemoryInfoList::maps`]. +/// +/// Currently an enum because there is no situation where you can have both, +/// but this may change if the format evolves. Prefer using this type's methods +/// over pattern matching. +#[derive(Debug, Clone)] +pub enum UnifiedMemoryInfoList<'a> { + Maps(MinidumpLinuxMaps<'a>), + Info(MinidumpMemoryInfoList<'a>), +} + +#[derive(Debug, Copy, Clone)] +/// A [`UnifiedMemoryInfoList`] entry, providing metatadata on a region of +/// memory in the crashed process. +pub enum UnifiedMemoryInfo<'a> { + Map(&'a MinidumpLinuxMapInfo<'a>), + Info(&'a MinidumpMemoryInfo<'a>), +} + +/// The contents of `/proc/self/maps` for the crashing process. +/// +/// This is roughly equivalent in functionality to [`MinidumpMemoryInfoList`]. +/// Use [`UnifiedMemoryInfoList`] to handle the two uniformly. +#[derive(Debug, Clone)] +pub struct MinidumpLinuxMaps<'a> { + /// The memory regions, in the order they were stored in the minidump. + regions: Vec>, + /// Map from address range to index in regions. Use + /// [`MinidumpLinuxMaps::memory_info_at_address`]. + regions_by_addr: RangeMap, +} + +/// A memory mapping entry for the process we are analyzing. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct MinidumpLinuxMapInfo<'a> { + pub map: MemoryMap, + _phantom: PhantomData<&'a u8>, +} + +#[derive(Debug, Clone)] +pub struct MinidumpMemoryInfoList<'a> { + /// The memory regions, in the order they were stored in the minidump. + regions: Vec>, + /// Map from address range to index in regions. Use + /// [`MinidumpMemoryInfoList::memory_info_at_address`]. + regions_by_addr: RangeMap, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +/// Metadata about a region of memory (whether it is executable, freed, private, and so on). +pub struct MinidumpMemoryInfo<'a> { + /// The raw value from the minidump. + pub raw: md::MINIDUMP_MEMORY_INFO, + /// The memory protection when the region was initially allocated. + pub allocation_protection: md::MemoryProtection, + /// The state of the pages in the region (whether it is freed or not). + pub state: md::MemoryState, + /// The access protection of the pages in the region. + pub protection: md::MemoryProtection, + /// What kind of memory mapping the pages in this region are. + pub ty: md::MemoryType, + _phantom: PhantomData<&'a u8>, +} + +/// CodeView data describes how to locate debug symbols +#[derive(Debug, Clone)] +pub enum CodeView { + /// PDB 2.0 format data in a separate file + Pdb20(md::CV_INFO_PDB20), + /// PDB 7.0 format data in a separate file (most common) + Pdb70(md::CV_INFO_PDB70), + /// Indicates data is in an ELF binary with build ID `build_id` + Elf(md::CV_INFO_ELF), + /// An unknown format containing the raw bytes of data + Unknown(Vec), +} + +/// An executable or shared library loaded in the process at the time the `Minidump` was written. +#[derive(Debug, Clone)] +pub struct MinidumpModule { + /// The `MINIDUMP_MODULE` direct from the minidump file. + pub raw: md::MINIDUMP_MODULE, + /// The module name. This is stored separately in the minidump. + pub name: String, + /// A `CodeView` record, if one is present. + pub codeview_info: Option, + /// A misc debug record, if one is present. + pub misc_info: Option, + os: Os, + /// The parsed DebugId of the module, if one is present. + debug_id: Option, +} + +/// A list of `MinidumpModule`s contained in a `Minidump`. +#[derive(Debug, Clone)] +pub struct MinidumpModuleList { + /// The modules, in the order they were stored in the minidump. + modules: Vec, + /// Map from address range to index in modules. Use `MinidumpModuleList::module_at_address`. + modules_by_addr: RangeMap, +} + +/// A mapping of thread ids to their names. +#[derive(Debug, Clone, Default)] +pub struct MinidumpThreadNames { + names: BTreeMap, +} + +/// An executable or shared library that was once loaded into the process, but was unloaded +/// by the time the `Minidump` was written. +#[derive(Debug, Clone)] +pub struct MinidumpUnloadedModule { + /// The `MINIDUMP_UNLOADED_MODULE` direct from the minidump file. + pub raw: md::MINIDUMP_UNLOADED_MODULE, + /// The module name. This is stored separately in the minidump. + pub name: String, +} + +/// A list of `MinidumpUnloadedModule`s contained in a `Minidump`. +#[derive(Debug, Clone)] +pub struct MinidumpUnloadedModuleList { + /// The modules, in the order they were stored in the minidump. + modules: Vec, + /// Map from address range to index in modules. + /// Use `MinidumpUnloadedModuleList::modules_at_address`. + modules_by_addr: Vec<(Range, usize)>, +} + +/// Contains object-specific information for a handle. Microsoft documentation +/// doesn't describe the contents of this type. +#[derive(Debug, Clone)] +pub struct MinidumpHandleObjectInformation { + pub raw: md::MINIDUMP_HANDLE_OBJECT_INFORMATION, + pub info_type: md::MINIDUMP_HANDLE_OBJECT_INFORMATION_TYPE, +} + +impl fmt::Display for MinidumpHandleObjectInformation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{{raw: {:?}, type: {:?}}}", self.raw, self.info_type) + } +} + +#[allow(clippy::large_enum_variant)] +#[derive(Debug, Clone)] +pub enum RawHandleDescriptor { + HandleDescriptor(md::MINIDUMP_HANDLE_DESCRIPTOR), + HandleDescriptor2(md::MINIDUMP_HANDLE_DESCRIPTOR_2), +} + +/// Describes the state of an individual system handle at the time the minidump was written. +#[derive(Debug, Clone)] +pub struct MinidumpHandleDescriptor { + /// The `MINIDUMP_HANDKE_DESCRIPTOR` data direct from the minidump file. + pub raw: RawHandleDescriptor, + /// The name of the type of this handle, if present. + pub type_name: Option, + /// The object name of this handle, if present. + /// On Linux this is the file path. + pub object_name: Option, + /// Object information for this handle, can be empty, platform-specific. + pub object_infos: Vec, +} + +/// A stream holding all the system handles at the time the minidump was written. +/// On Linux this is the list of open file descriptors. +#[derive(Debug, Clone)] +pub struct MinidumpHandleDataStream { + pub handles: Vec, +} + +/// The state of a thread from the process when the minidump was written. +#[derive(Debug)] +pub struct MinidumpThread<'a> { + /// The `MINIDUMP_THREAD` direct from the minidump file. + pub raw: md::MINIDUMP_THREAD, + /// The CPU context for the thread, if present. + context: Option<&'a [u8]>, + /// The stack memory for the thread, if present. + stack: Option>, + /// Saved endianness for lazy parsing. + endian: scroll::Endian, +} + +/// A list of `MinidumpThread`s contained in a `Minidump`. +#[derive(Debug)] +pub struct MinidumpThreadList<'a> { + /// The threads, in the order they were present in the `Minidump`. + pub threads: Vec>, + /// A map of thread id to index in `threads`. + thread_ids: HashMap, +} + +/// The state of a thread from the process when the minidump was written. +#[derive(Debug)] +pub struct MinidumpThreadInfo { + /// The `MINIDUMP_THREAD_INFO` direct from the minidump file. + pub raw: md::MINIDUMP_THREAD_INFO, +} + +/// A list of `MinidumpThread`s contained in a `Minidump`. +#[derive(Debug)] +pub struct MinidumpThreadInfoList { + /// The thread info entries, in the order they were present in the `Minidump`. + pub thread_infos: Vec, + /// A map of thread id to index in `entries`. + thread_ids: HashMap, +} + +/// Information about the system that generated the minidump. +#[derive(Debug, Clone)] +pub struct MinidumpSystemInfo { + /// The `MINIDUMP_SYSTEM_INFO` direct from the minidump + pub raw: md::MINIDUMP_SYSTEM_INFO, + /// The operating system that generated the minidump + pub os: Os, + /// The CPU on which the minidump was generated + pub cpu: Cpu, + /// A string that describes the latest Service Pack installed on the system. + /// If no Service Pack has been installed, the string is empty. + /// This is stored separately in the minidump. + csd_version: Option, + /// An x86 (not x64!) CPU vendor name that is stored in `raw` but in a way + /// that's + cpu_info: Option, +} + +/// A region of memory from the process that wrote the minidump. +/// This is the underlying generic type for [MinidumpMemory] and [MinidumpMemory64]. +#[derive(Clone, Debug)] +pub struct MinidumpMemoryBase<'a, Descriptor> { + /// The raw `MINIDUMP_MEMORY_DESCRIPTOR` from the minidump. + pub desc: Descriptor, + /// The starting address of this range of memory. + pub base_address: u64, + /// The length of this range of memory. + pub size: u64, + /// The contents of the memory. + pub bytes: &'a [u8], + /// The endianness of the minidump which is used for memory accesses. + pub endian: scroll::Endian, +} + +/// A region of memory from the process that wrote the minidump. +pub type MinidumpMemory<'a> = MinidumpMemoryBase<'a, md::MINIDUMP_MEMORY_DESCRIPTOR>; + +/// A large region of memory from the process that wrote the minidump (usually a full dump). +pub type MinidumpMemory64<'a> = MinidumpMemoryBase<'a, md::MINIDUMP_MEMORY_DESCRIPTOR64>; + +/// Provides a unified interface for MinidumpMemory and MinidumpMemory64 +#[derive(Debug, Clone, Copy)] +pub enum UnifiedMemory<'a, 'mdmp> { + Memory(&'a MinidumpMemory<'mdmp>), + Memory64(&'a MinidumpMemory64<'mdmp>), +} + +#[derive(Debug, Clone)] +pub enum RawMacCrashInfo { + V1( + md::MINIDUMP_MAC_CRASH_INFO_RECORD, + md::MINIDUMP_MAC_CRASH_INFO_RECORD_STRINGS, + ), + V4( + md::MINIDUMP_MAC_CRASH_INFO_RECORD_4, + md::MINIDUMP_MAC_CRASH_INFO_RECORD_STRINGS_4, + ), + V5( + md::MINIDUMP_MAC_CRASH_INFO_RECORD_5, + md::MINIDUMP_MAC_CRASH_INFO_RECORD_STRINGS_5, + ), +} + +#[derive(Debug, Clone)] +pub struct MinidumpMacCrashInfo { + /// The `MINIDUMP_MAC_CRASH_INFO_RECORD` and `MINIDUMP_MAC_CRASH_INFO_RECORD_STRINGS`. + pub raw: Vec, +} + +#[derive(Debug, Clone)] +pub struct MinidumpMacBootargs { + pub raw: md::MINIDUMP_MAC_BOOTARGS, + pub bootargs: Option, +} + +#[allow(clippy::large_enum_variant)] +#[derive(Debug, Clone)] +pub enum RawMiscInfo { + MiscInfo(md::MINIDUMP_MISC_INFO), + MiscInfo2(md::MINIDUMP_MISC_INFO_2), + MiscInfo3(md::MINIDUMP_MISC_INFO_3), + MiscInfo4(md::MINIDUMP_MISC_INFO_4), + MiscInfo5(md::MINIDUMP_MISC_INFO_5), +} + +/// Miscellaneous information about the process that wrote the minidump. +#[derive(Debug, Clone)] +pub struct MinidumpMiscInfo { + /// The `MINIDUMP_MISC_INFO` struct direct from the minidump. + pub raw: RawMiscInfo, +} + +/// Additional information about process state. +/// +/// MinidumpBreakpadInfo wraps MINIDUMP_BREAKPAD_INFO, which is an optional stream +/// in a minidump that provides additional information about the process state +/// at the time the minidump was generated. +#[derive(Debug, Clone)] +pub struct MinidumpBreakpadInfo { + raw: md::MINIDUMP_BREAKPAD_INFO, + /// The thread that wrote the minidump. + pub dump_thread_id: Option, + /// The thread that requested that a minidump be written. + pub requesting_thread_id: Option, +} + +#[derive(Default, Debug)] +/// Interesting values extracted from /etc/lsb-release +pub struct MinidumpLinuxLsbRelease<'a> { + data: &'a [u8], +} + +/// Interesting values extracted from /proc/self/environ +#[derive(Default, Debug)] +pub struct MinidumpLinuxEnviron<'a> { + data: &'a [u8], +} + +/// Interesting values extracted from /proc/cpuinfo +#[derive(Default, Debug)] +pub struct MinidumpLinuxCpuInfo<'a> { + data: &'a [u8], +} + +/// Interesting values extracted from /proc/self/status +#[derive(Default, Debug)] +pub struct MinidumpLinuxProcStatus<'a> { + data: &'a [u8], +} + +/// Interesting values extracted from /proc/self/limits +#[derive(Default, Debug)] +pub struct MinidumpLinuxProcLimits<'a> { + data: &'a [u8], +} + +/// The reason for a process crash. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum CrashReason { + /// A Mac/iOS error code with no other interesting details. + MacGeneral(err::ExceptionCodeMac, u32), + MacBadAccessKern(err::ExceptionCodeMacBadAccessKernType), + MacBadAccessArm(err::ExceptionCodeMacBadAccessArmType), + MacBadAccessPpc(err::ExceptionCodeMacBadAccessPpcType), + MacBadAccessX86(err::ExceptionCodeMacBadAccessX86Type), + MacBadInstructionArm(err::ExceptionCodeMacBadInstructionArmType), + MacBadInstructionPpc(err::ExceptionCodeMacBadInstructionPpcType), + MacBadInstructionX86(err::ExceptionCodeMacBadInstructionX86Type), + MacArithmeticArm(err::ExceptionCodeMacArithmeticArmType), + MacArithmeticPpc(err::ExceptionCodeMacArithmeticPpcType), + MacArithmeticX86(err::ExceptionCodeMacArithmeticX86Type), + MacSoftware(err::ExceptionCodeMacSoftwareType), + MacBreakpointArm(err::ExceptionCodeMacBreakpointArmType), + MacBreakpointPpc(err::ExceptionCodeMacBreakpointPpcType), + MacBreakpointX86(err::ExceptionCodeMacBreakpointX86Type), + MacResource(err::ExceptionCodeMacResourceType, u64, u64), + MacGuard(err::ExceptionCodeMacGuardType, u64, u64), + + /// A Linux/Android error code with no other interesting metadata. + LinuxGeneral(err::ExceptionCodeLinux, u32), + LinuxSigill(err::ExceptionCodeLinuxSigillKind), + LinuxSigtrap(err::ExceptionCodeLinuxSigtrapKind), + LinuxSigbus(err::ExceptionCodeLinuxSigbusKind), + LinuxSigfpe(err::ExceptionCodeLinuxSigfpeKind), + LinuxSigsegv(err::ExceptionCodeLinuxSigsegvKind), + LinuxSigsys(err::ExceptionCodeLinuxSigsysKind), + + /// A Windows error code with no other interesting metadata. + WindowsGeneral(err::ExceptionCodeWindows), + /// A Windows error from winerror.h. + WindowsWinError(err::WinErrorWindows), + /// A Windows error for a specific facility from winerror.h. + WindowsWinErrorWithFacility(err::WinErrorFacilityWindows, err::WinErrorWindows), + /// A Windows error from ntstatus.h + WindowsNtStatus(err::NtStatusWindows), + /// ExceptionCodeWindows::EXCEPTION_ACCESS_VIOLATION but with details on the kind of access. + WindowsAccessViolation(err::ExceptionCodeWindowsAccessType), + /// ExceptionCodeWindows::EXCEPTION_IN_PAGE_ERROR but with details on the kind of access. + /// Second argument is a windows NTSTATUS value. + WindowsInPageError(err::ExceptionCodeWindowsInPageErrorType, u64), + /// ExceptionCodeWindows::EXCEPTION_STACK_BUFFER_OVERRUN with an accompanying + /// windows FAST_FAIL value. + WindowsStackBufferOverrun(u64), + /// A Windows error with no known mapping. + WindowsUnknown(u32), + + Unknown(u32, u32), +} + +/// Information about the exception that caused the minidump to be generated. +/// +/// `MinidumpException` wraps `MINIDUMP_EXCEPTION_STREAM`, which contains information +/// about the exception that caused the minidump to be generated, if the +/// minidump was generated in an exception handler called as a result of an +/// exception. It also provides access to a `MinidumpContext` object, which +/// contains the CPU context for the exception thread at the time the exception +/// occurred. +#[derive(Debug)] +pub struct MinidumpException<'a> { + /// The raw exception information from the minidump stream. + pub raw: md::MINIDUMP_EXCEPTION_STREAM, + /// The thread that encountered this exception. + pub thread_id: u32, + /// If present, the CPU context from the time the thread encountered the exception. + /// + /// This should be used in place of the context contained within the thread with id + /// `thread_id`, since it points to the code location where the exception happened, + /// without any exception handling routines that are likely to be on the stack after + /// that point. + context: Option<&'a [u8]>, + /// Saved endianess for lazy parsing. + endian: scroll::Endian, +} + +/// A list of memory regions included in a minidump. +/// This is the underlying generic type for [MinidumpMemoryList] and [MinidumpMemory64List]. +#[derive(Debug)] +pub struct MinidumpMemoryListBase<'a, Descriptor> { + /// The memory regions, in the order they were stored in the minidump. + regions: Vec>, + /// Map from address range to index in regions. Use `MinidumpMemoryList::memory_at_address`. + regions_by_addr: RangeMap, +} + +/// A list of memory regions included in a minidump. +pub type MinidumpMemoryList<'a> = MinidumpMemoryListBase<'a, md::MINIDUMP_MEMORY_DESCRIPTOR>; + +/// A list of large memory regions included in a minidump (usually a full dump). +pub type MinidumpMemory64List<'a> = MinidumpMemoryListBase<'a, md::MINIDUMP_MEMORY_DESCRIPTOR64>; + +/// Provides a unified interface for MinidumpMemoryList and MinidumpMemory64List +#[derive(Debug)] +pub enum UnifiedMemoryList<'a> { + Memory(MinidumpMemoryList<'a>), + Memory64(MinidumpMemory64List<'a>), +} +impl<'a> Default for UnifiedMemoryList<'a> { + fn default() -> Self { + Self::Memory(Default::default()) + } +} + +/// Information about an assertion that caused a crash. +#[derive(Debug)] +pub struct MinidumpAssertion { + pub raw: md::MINIDUMP_ASSERTION_INFO, +} + +/// A typed annotation object. +#[derive(Clone, Debug)] +#[non_exhaustive] +pub enum MinidumpAnnotation { + /// An invalid annotation. Reserved for internal use. + Invalid, + /// A `NUL`-terminated C-string. + String(String), + /// Clients may declare their own custom types. + UserDefined(md::MINIDUMP_ANNOTATION), + /// An unsupported annotation from a future crashpad version. + Unsupported(md::MINIDUMP_ANNOTATION), +} + +impl PartialEq for MinidumpAnnotation { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::Invalid, Self::Invalid) => true, + (Self::String(a), Self::String(b)) => a == b, + _ => false, + } + } +} + +/// Additional Crashpad-specific information about a module carried within a minidump file. +#[derive(Debug)] +pub struct MinidumpModuleCrashpadInfo { + /// The raw crashpad module extension information. + pub raw: md::MINIDUMP_MODULE_CRASHPAD_INFO, + /// Index of the corresponding module in the `MinidumpModuleList`. + pub module_index: usize, + pub list_annotations: Vec, + pub simple_annotations: BTreeMap, + pub annotation_objects: BTreeMap, +} + +/// Additional Crashpad-specific information carried within a minidump file. +#[derive(Debug)] +pub struct MinidumpCrashpadInfo { + pub raw: md::MINIDUMP_CRASHPAD_INFO, + pub simple_annotations: BTreeMap, + pub module_list: Vec, +} + +//====================================================== +// Implementations + +fn format_time_t(t: u32) -> String { + time::OffsetDateTime::from_unix_timestamp(t as i64) + .ok() + .and_then(|datetime| datetime.format(&Rfc3339).ok()) + .unwrap_or_default() +} + +fn format_system_time(time: &md::SYSTEMTIME) -> String { + // Note this drops the day_of_week field on the ground -- is that fine? + let format_date = || { + use std::convert::TryFrom; + let month = time::Month::try_from(time.month as u8).ok()?; + let date = time::Date::from_calendar_date(time.year as i32, month, time.day as u8).ok()?; + let datetime = date + .with_hms_milli( + time.hour as u8, + time.minute as u8, + time.second as u8, + time.milliseconds, + ) + .ok()? + .assume_utc(); + datetime.format(&Rfc3339).ok() + }; + format_date().unwrap_or_else(|| "".to_owned()) +} + +/// Produce a slice of `bytes` corresponding to the offset and size in `loc`, or an +/// `Error` if the data is not fully contained within `bytes`. +fn location_slice<'a>( + bytes: &'a [u8], + loc: &md::MINIDUMP_LOCATION_DESCRIPTOR, +) -> Result<&'a [u8], Error> { + let start = loc.rva as usize; + start + .checked_add(loc.data_size as usize) + .and_then(|end| bytes.get(start..end)) + .ok_or(Error::StreamReadFailure) +} + +/// Read a u32 length-prefixed UTF-16 string from `bytes` at `offset`. +fn read_string_utf16(offset: &mut usize, bytes: &[u8], endian: scroll::Endian) -> Option { + let u: u32 = bytes.gread_with(offset, endian).ok()?; + let size = u as usize; + if size % 2 != 0 || (*offset + size) > bytes.len() { + return None; + } + let encoding = match endian { + scroll::Endian::Little => encoding_rs::UTF_16LE, + scroll::Endian::Big => encoding_rs::UTF_16BE, + }; + + let s = encoding + .decode_without_bom_handling_and_without_replacement(&bytes[*offset..*offset + size])?; + *offset += size; + Some(s.into()) +} + +#[inline] +fn read_string_utf8_unterminated<'a>( + offset: &mut usize, + bytes: &'a [u8], + endian: scroll::Endian, +) -> Option<&'a str> { + let length: u32 = bytes.gread_with(offset, endian).ok()?; + let slice = bytes.gread_with(offset, length as usize).ok()?; + std::str::from_utf8(slice).ok() +} + +fn read_string_utf8<'a>( + offset: &mut usize, + bytes: &'a [u8], + endian: scroll::Endian, +) -> Option<&'a str> { + let string = read_string_utf8_unterminated(offset, bytes, endian)?; + match bytes.gread(offset) { + Ok(0u8) => Some(string), + _ => None, + } +} + +fn read_cstring_utf8(offset: &mut usize, bytes: &[u8]) -> Option { + let initial_offset = *offset; + loop { + let byte: u8 = bytes.gread(offset).ok()?; + if byte == 0 { + break; + } + } + std::str::from_utf8(&bytes[initial_offset..*offset - 1]) + .map(String::from) + .ok() +} + +/// Convert `bytes` with trailing NUL characters to a string +fn string_from_bytes_nul(bytes: &[u8]) -> Option> { + bytes.split(|&b| b == 0).next().map(String::from_utf8_lossy) +} + +/// Format `bytes` as a String of hex digits +fn bytes_to_hex(bytes: &[u8]) -> String { + let hex_bytes: Vec = bytes.iter().map(|b| format!("{b:02x}")).collect(); + hex_bytes.join("") +} + +/// Attempt to read a CodeView record from `data` at `location` +fn read_codeview( + location: &md::MINIDUMP_LOCATION_DESCRIPTOR, + data: &[u8], + endian: scroll::Endian, +) -> Option { + let bytes = location_slice(data, location).ok()?; + // The CodeView data can be one of a few different formats. Try to read the + // signature first to figure out what format the data is. + let signature: u32 = bytes.pread_with(0, endian).ok()?; + Some(match CvSignature::from_u32(signature) { + // PDB data has two known versions: the current 7.0 and the older 2.0 version. + Some(CvSignature::Pdb70) => CodeView::Pdb70(bytes.pread_with(0, endian).ok()?), + Some(CvSignature::Pdb20) => CodeView::Pdb20(bytes.pread_with(0, endian).ok()?), + // Breakpad's ELF build ID format. + Some(CvSignature::Elf) => CodeView::Elf(bytes.pread_with(0, endian).ok()?), + // Other formats aren't handled, but save the raw bytes. + _ => CodeView::Unknown(bytes.to_owned()), + }) +} + +fn read_debug_id(codeview_info: &CodeView, endian: scroll::Endian) -> Option { + match codeview_info { + CodeView::Pdb70(ref raw) => { + // For macOS, this should be its code ID with the age (0) + // appended to the end of it. This makes it identical to debug + // IDs for Windows, and is why it doesn't have a special case + // here. + let uuid = Uuid::from_fields( + raw.signature.data1, + raw.signature.data2, + raw.signature.data3, + &raw.signature.data4, + ); + (!uuid.is_nil()).then(|| DebugId::from_parts(uuid, raw.age)) + } + CodeView::Pdb20(ref raw) => Some(DebugId::from_pdb20(raw.signature, raw.age)), + CodeView::Elf(ref raw) => { + // For empty or trivial `build_id`s, we don't want to return a `DebugId`. + // This can happen for mapped files that aren't executable, like fonts or .jar files. + if raw.build_id.iter().all(|byte| *byte == 0) { + return None; + } + + // For backwards-compat (Linux minidumps have historically + // been written using PDB70 CodeView info), treat build_id + // as if the first 16 bytes were a GUID. + let guid_size = ::size_with(&endian); + let guid = if raw.build_id.len() < guid_size { + // Pad with zeros. + let v: Vec = raw + .build_id + .iter() + .cloned() + .chain(iter::repeat(0)) + .take(guid_size) + .collect(); + v.pread_with::(0, endian).ok() + } else { + raw.build_id.pread_with::(0, endian).ok() + }; + guid.map(|g| Uuid::from_fields(g.data1, g.data2, g.data3, &g.data4)) + .map(DebugId::from_uuid) + } + _ => None, + } +} + +/// Checks that the buffer is large enough for the given number of items. +/// +/// Essentially ensures that `buf.len() >= offset + (number_of_entries * size_of_entry)`. +/// Returns `(number_of_entries, expected_size)` on success. +fn ensure_count_in_bound( + buf: &[u8], + number_of_entries: usize, + size_of_entry: usize, + offset: usize, +) -> Result<(usize, usize), Error> { + let expected_size = number_of_entries + .checked_mul(size_of_entry) + .and_then(|v| v.checked_add(offset)) + .ok_or(Error::StreamReadFailure)?; + if buf.len() < expected_size { + return Err(Error::StreamSizeMismatch { + expected: expected_size, + actual: buf.len(), + }); + } + Ok((number_of_entries, expected_size)) +} + +impl MinidumpModule { + /// Create a `MinidumpModule` with some basic info. + /// + /// Useful for testing. + pub fn new(base: u64, size: u32, name: &str) -> MinidumpModule { + MinidumpModule { + raw: md::MINIDUMP_MODULE { + base_of_image: base, + size_of_image: size, + ..md::MINIDUMP_MODULE::default() + }, + name: String::from(name), + codeview_info: None, + misc_info: None, + os: Os::Unknown(0), + debug_id: None, + } + } + + /// Read additional data to construct a `MinidumpModule` from `bytes` using the information + /// from the module list in `raw`. + pub fn read( + raw: md::MINIDUMP_MODULE, + bytes: &[u8], + endian: scroll::Endian, + system_info: Option<&MinidumpSystemInfo>, + ) -> Result { + let mut offset = raw.module_name_rva as usize; + let name = + read_string_utf16(&mut offset, bytes, endian).ok_or(Error::CodeViewReadFailure)?; + let codeview_info = if raw.cv_record.data_size == 0 { + None + } else { + Some(read_codeview(&raw.cv_record, bytes, endian).ok_or(Error::CodeViewReadFailure)?) + }; + + let os = system_info.map(|info| info.os).unwrap_or(Os::Unknown(0)); + + let debug_id = codeview_info + .as_ref() + .and_then(|cv| read_debug_id(cv, endian)); + + Ok(MinidumpModule { + raw, + name, + codeview_info, + misc_info: None, + os, + debug_id, + }) + } + + /// Write a human-readable description of this `MinidumpModule` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MINIDUMP_MODULE + base_of_image = {:#x} + size_of_image = {:#x} + checksum = {:#x} + time_date_stamp = {:#x} {} + module_name_rva = {:#x} + version_info.signature = {:#x} + version_info.struct_version = {:#x} + version_info.file_version = {:#x}:{:#x} + version_info.product_version = {:#x}:{:#x} + version_info.file_flags_mask = {:#x} + version_info.file_flags = {:#x} + version_info.file_os = {:#x} + version_info.file_type = {:#x} + version_info.file_subtype = {:#x} + version_info.file_date = {:#x}:{:#x} + cv_record.data_size = {} + cv_record.rva = {:#x} + misc_record.data_size = {} + misc_record.rva = {:#x} + (code_file) = \"{}\" + (code_identifier) = \"{}\" +", + self.raw.base_of_image, + self.raw.size_of_image, + self.raw.checksum, + self.raw.time_date_stamp, + format_time_t(self.raw.time_date_stamp), + self.raw.module_name_rva, + self.raw.version_info.signature, + self.raw.version_info.struct_version, + self.raw.version_info.file_version_hi, + self.raw.version_info.file_version_lo, + self.raw.version_info.product_version_hi, + self.raw.version_info.product_version_lo, + self.raw.version_info.file_flags_mask, + self.raw.version_info.file_flags, + self.raw.version_info.file_os, + self.raw.version_info.file_type, + self.raw.version_info.file_subtype, + self.raw.version_info.file_date_hi, + self.raw.version_info.file_date_lo, + self.raw.cv_record.data_size, + self.raw.cv_record.rva, + self.raw.misc_record.data_size, + self.raw.misc_record.rva, + self.code_file(), + self.code_identifier().unwrap_or_default(), + )?; + // Print CodeView data. + match self.codeview_info { + Some(CodeView::Pdb70(ref raw)) => { + let pdb_file_name = + string_from_bytes_nul(&raw.pdb_file_name).unwrap_or(Cow::Borrowed("(invalid)")); + write!(f, " (cv_record).cv_signature = {:#x} + (cv_record).signature = {:08x}-{:04x}-{:04x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x} + (cv_record).age = {} + (cv_record).pdb_file_name = \"{}\" +", + raw.cv_signature, + raw.signature.data1, + raw.signature.data2, + raw.signature.data3, + raw.signature.data4[0], + raw.signature.data4[1], + raw.signature.data4[2], + raw.signature.data4[3], + raw.signature.data4[4], + raw.signature.data4[5], + raw.signature.data4[6], + raw.signature.data4[7], + raw.age, + pdb_file_name, + )?; + } + Some(CodeView::Pdb20(ref raw)) => { + let pdb_file_name = + string_from_bytes_nul(&raw.pdb_file_name).unwrap_or(Cow::Borrowed("(invalid)")); + write!( + f, + " (cv_record).cv_header.signature = {:#x} + (cv_record).cv_header.offset = {:#x} + (cv_record).signature = {:#x} {} + (cv_record).age = {} + (cv_record).pdb_file_name = \"{}\" +", + raw.cv_signature, + raw.cv_offset, + raw.signature, + format_time_t(raw.signature), + raw.age, + pdb_file_name, + )?; + } + Some(CodeView::Elf(ref raw)) => { + // Fibbing about having cv_signature handy here. + write!( + f, + " (cv_record).cv_signature = {:#x} + (cv_record).build_id = {} +", + raw.cv_signature, + bytes_to_hex(&raw.build_id), + )?; + } + Some(CodeView::Unknown(ref bytes)) => { + writeln!( + f, + " (cv_record) = {}", + bytes_to_hex(bytes), + )?; + } + None => { + writeln!(f, " (cv_record) = (null)")?; + } + } + + // Print misc record data. + if let Some(ref _misc) = self.misc_info { + //TODO, not terribly important. + writeln!(f, " (misc_record) = (unimplemented)")?; + } else { + writeln!(f, " (misc_record) = (null)")?; + } + + // Print remaining data. + write!( + f, + r#" (debug_file) = "{}" + (debug_identifier) = "{}" + (version) = "{}" + +"#, + self.debug_file().unwrap_or(Cow::Borrowed("")), + self.debug_identifier().unwrap_or_default(), + self.version().unwrap_or(Cow::Borrowed("")), + )?; + Ok(()) + } + + fn memory_range(&self) -> Option> { + if self.size() == 0 { + return None; + } + Some(Range::new( + self.base_address(), + self.base_address().checked_add(self.size())? - 1, + )) + } +} + +impl Module for MinidumpModule { + fn base_address(&self) -> u64 { + self.raw.base_of_image + } + fn size(&self) -> u64 { + self.raw.size_of_image as u64 + } + fn code_file(&self) -> Cow<'_, str> { + Cow::Borrowed(&self.name) + } + + fn code_identifier(&self) -> Option { + match self.codeview_info { + Some(CodeView::Pdb70(ref raw)) if matches!(self.os, Os::MacOs | Os::Ios) => { + // MacOs uses PDB70 instead of its own dedicated format. + // See the following issue for a potential MacOs-specific format: + // https://github.com/rust-minidump/rust-minidump/issues/455 + Some(CodeId::new(format!("{:#}", raw.signature))) + } + Some(CodeView::Pdb20(_)) | Some(CodeView::Pdb70(_)) => Some(CodeId::new(format!( + "{0:08X}{1:x}", + self.raw.time_date_stamp, self.raw.size_of_image + ))), + Some(CodeView::Elf(ref raw)) => { + // Return None instead of sentinel CodeIds for empty + // `build_id`s. Non-executable mapped files like fonts or .jar + // files will usually fall under this case. + if raw.build_id.iter().all(|byte| *byte == 0) { + None + } else { + Some(CodeId::from_binary(&raw.build_id)) + } + } + None if self.os == Os::Windows => { + // Fall back to the timestamp + size-based debug-id for Windows. + // Some Module records from Windows have no codeview record, but + // the CodeId generated here is valid and can be looked up on + // the Microsoft symbol server. + // One example might be `wow64cpu.dll` with code-id `378BC3CDa000`. + // This can however lead to "false positive" code-ids for modules + // that have no timestamp, in which case the code-id looks extremely + // low-entropy. The same can happen though if they *do* have a + // codeview record. + Some(CodeId::new(format!( + "{0:08X}{1:x}", + self.raw.time_date_stamp, self.raw.size_of_image + ))) + } + // Occasionally things will make it into the module stream that + // shouldn't be there, and so no meaningful CodeId can be found from + // those. One of those things are SysV shared memory segments which + // have no CodeView record. + _ => None, + } + } + fn debug_file(&self) -> Option> { + match self.codeview_info { + Some(CodeView::Pdb70(ref raw)) => string_from_bytes_nul(&raw.pdb_file_name), + Some(CodeView::Pdb20(ref raw)) => string_from_bytes_nul(&raw.pdb_file_name), + Some(CodeView::Elf(_)) => Some(Cow::Borrowed(&self.name)), + // TODO: support misc record? not really important. + _ => None, + } + } + fn debug_identifier(&self) -> Option { + self.debug_id + } + fn version(&self) -> Option> { + if self.raw.version_info.signature == md::VS_FFI_SIGNATURE + && self.raw.version_info.struct_version == md::VS_FFI_STRUCVERSION + { + if matches!(self.os, Os::MacOs | Os::Ios | Os::Windows) { + let ver = format!( + "{}.{}.{}.{}", + self.raw.version_info.file_version_hi >> 16, + self.raw.version_info.file_version_hi & 0xffff, + self.raw.version_info.file_version_lo >> 16, + self.raw.version_info.file_version_lo & 0xffff + ); + Some(Cow::Owned(ver)) + } else { + // Assume Elf + let ver = format!( + "{}.{}.{}.{}", + self.raw.version_info.file_version_hi, + self.raw.version_info.file_version_lo, + self.raw.version_info.product_version_hi, + self.raw.version_info.product_version_lo + ); + Some(Cow::Owned(ver)) + } + } else { + None + } + } +} + +impl MinidumpUnloadedModule { + /// Create a `MinidumpUnloadedModule` with some basic info. + /// + /// Useful for testing. + pub fn new(base: u64, size: u32, name: &str) -> MinidumpUnloadedModule { + MinidumpUnloadedModule { + raw: md::MINIDUMP_UNLOADED_MODULE { + base_of_image: base, + size_of_image: size, + ..md::MINIDUMP_UNLOADED_MODULE::default() + }, + name: String::from(name), + } + } + + /// Read additional data to construct a `MinidumpUnloadedModule` from `bytes` using the information + /// from the module list in `raw`. + pub fn read( + raw: md::MINIDUMP_UNLOADED_MODULE, + bytes: &[u8], + endian: scroll::Endian, + ) -> Result { + let mut offset = raw.module_name_rva as usize; + let name = read_string_utf16(&mut offset, bytes, endian).ok_or(Error::DataError)?; + Ok(MinidumpUnloadedModule { raw, name }) + } + + /// Write a human-readable description of this `MinidumpModule` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MINIDUMP_UNLOADED_MODULE + base_of_image = {:#x} + size_of_image = {:#x} + checksum = {:#x} + time_date_stamp = {:#x} {} + module_name_rva = {:#x} + (code_file) = \"{}\" + (code_identifier) = \"{}\" +", + self.raw.base_of_image, + self.raw.size_of_image, + self.raw.checksum, + self.raw.time_date_stamp, + format_time_t(self.raw.time_date_stamp), + self.raw.module_name_rva, + self.code_file(), + self.code_identifier().unwrap_or_default(), + )?; + + Ok(()) + } + + fn memory_range(&self) -> Option> { + if self.size() == 0 { + return None; + } + Some(Range::new( + self.base_address(), + self.base_address().checked_add(self.size())? - 1, + )) + } +} + +impl Module for MinidumpUnloadedModule { + fn base_address(&self) -> u64 { + self.raw.base_of_image + } + fn size(&self) -> u64 { + self.raw.size_of_image as u64 + } + fn code_file(&self) -> Cow<'_, str> { + Cow::Borrowed(&self.name) + } + fn code_identifier(&self) -> Option { + // TODO: This should be returning None if the unloaded module is coming + // from a non-Windows minidump. We'll need info about the operating + // system, ideally sourced from the SystemInfo to be able to do this. + Some(CodeId::new(format!( + "{0:08X}{1:x}", + self.raw.time_date_stamp, self.raw.size_of_image + ))) + } + fn debug_file(&self) -> Option> { + None + } + fn debug_identifier(&self) -> Option { + None + } + fn version(&self) -> Option> { + None + } +} + +/// Parses X:Y or X=Y lists, skipping any blank/unparseable lines +fn linux_list_iter( + bytes: &[u8], + separator: u8, +) -> impl Iterator { + fn strip_quotes(input: &LinuxOsStr) -> &LinuxOsStr { + // Remove any extra surrounding whitespace since formats are inconsistent on this. + let input = input.trim_ascii_whitespace(); + + // Convert `"MyValue"` into `MyValue`, or just return the trimmed input. + let output = input + .strip_prefix(b"\"") + .and_then(|input| input.strip_suffix(b"\"")) + .unwrap_or(input); + + LinuxOsStr::from_bytes(output) + } + + let input = LinuxOsStr::from_bytes(bytes); + input.lines().filter_map(move |line| { + line.split_once(separator) + .map(|(label, val)| (strip_quotes(label), (strip_quotes(val)))) + }) +} + +fn read_stream_list<'a, T>( + offset: &mut usize, + bytes: &'a [u8], + endian: scroll::Endian, +) -> Result, Error> +where + T: TryFromCtx<'a, scroll::Endian, [u8], Error = scroll::Error>, + T: SizeWith, +{ + let u: u32 = bytes + .gread_with(offset, endian) + .or(Err(Error::StreamReadFailure))?; + + let (count, counted_size) = ensure_count_in_bound( + bytes, + u as usize, + ::size_with(&endian), + mem::size_of::(), + )?; + + match bytes.len() - counted_size { + 0 => {} + 4 => { + // 4 bytes of padding. + *offset += 4; + } + _ => { + return Err(Error::StreamSizeMismatch { + expected: counted_size, + actual: bytes.len(), + }); + } + }; + // read count T raw stream entries + let mut raw_entries = Vec::with_capacity(count); + for _ in 0..count { + let raw: T = bytes + .gread_with(offset, endian) + .or(Err(Error::StreamReadFailure))?; + raw_entries.push(raw); + } + Ok(raw_entries) +} + +fn read_ex_stream_list<'a, T>( + offset: &mut usize, + bytes: &'a [u8], + endian: scroll::Endian, +) -> Result, Error> +where + T: TryFromCtx<'a, scroll::Endian, [u8], Error = scroll::Error>, + T: SizeWith, +{ + // Some newer list streams have an extended header: + // + // size_of_header: u32, + // size_of_entry: u32, + // number_of_entries: u32, + // ...entries + + // In theory this allows the format of the stream to be extended without + // us knowing how to handle the new parts. + + let size_of_header: u32 = bytes + .gread_with(offset, endian) + .or(Err(Error::StreamReadFailure))?; + + let size_of_entry: u32 = bytes + .gread_with(offset, endian) + .or(Err(Error::StreamReadFailure))?; + + let number_of_entries: u32 = bytes + .gread_with(offset, endian) + .or(Err(Error::StreamReadFailure))?; + + let expected_size_of_entry = ::size_with(&endian); + + if size_of_entry as usize != expected_size_of_entry { + // For now, conservatively bail out if entries don't have + // the expected size. In theory we can assume entries are + // always extended with new trailing fields, and this information + // would let us walk over trailing fields we don't know about? + // But without an example let's be safe. + return Err(Error::StreamReadFailure); + } + + let (number_of_entries, _) = ensure_count_in_bound( + bytes, + number_of_entries as usize, + size_of_entry as usize, + size_of_header as usize, + )?; + + let header_padding = match (size_of_header as usize).checked_sub(*offset) { + Some(s) => s, + None => return Err(Error::StreamReadFailure), + }; + *offset += header_padding; + + // read count T raw stream entries + let mut raw_entries = Vec::with_capacity(number_of_entries); + for _ in 0..number_of_entries { + let raw: T = bytes + .gread_with(offset, endian) + .or(Err(Error::StreamReadFailure))?; + raw_entries.push(raw); + } + Ok(raw_entries) +} + +impl<'a> MinidumpStream<'a> for MinidumpThreadNames { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::ThreadNamesStream as u32; + + fn read( + bytes: &'a [u8], + all: &'a [u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result { + let mut offset = 0; + let raw_names: Vec = + read_stream_list(&mut offset, bytes, endian)?; + // read out the actual names + let mut names = BTreeMap::new(); + for raw_name in raw_names { + let mut offset = raw_name.thread_name_rva as usize; + // Better to just drop unreadable names individually than the whole stream. + if let Some(name) = read_string_utf16(&mut offset, all, endian) { + names.insert(raw_name.thread_id, name); + } else { + warn!( + "Couldn't read thread name for thread id {}", + raw_name.thread_id + ); + } + } + Ok(MinidumpThreadNames { names }) + } +} + +impl MinidumpThreadNames { + pub fn get_name(&self, thread_id: u32) -> Option> { + self.names + .get(&thread_id) + .map(|name| Cow::Borrowed(&**name)) + } + + /// Write a human-readable description of this `MinidumpThreadNames` to `f`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MinidumpThreadNames + thread_count = {} + +", + self.names.len() + )?; + for (i, (thread_id, name)) in self.names.iter().enumerate() { + writeln!( + f, + "thread_name[{i}] +MINIDUMP_THREAD_NAME + thread_id = {thread_id:#x} + name = \"{name}\" +" + )?; + } + + Ok(()) + } +} + +impl MinidumpModuleList { + /// Return an empty `MinidumpModuleList`. + pub fn new() -> MinidumpModuleList { + MinidumpModuleList { + modules: vec![], + modules_by_addr: RangeMap::new(), + } + } + /// Create a `MinidumpModuleList` from a list of `MinidumpModule`s. + pub fn from_modules(modules: Vec) -> MinidumpModuleList { + let modules_by_addr = modules + .iter() + .enumerate() + .map(|(i, module)| (module.memory_range(), i)) + .into_rangemap_safe(); + MinidumpModuleList { + modules, + modules_by_addr, + } + } + + /// Returns the module corresponding to the main executable. + pub fn main_module(&self) -> Option<&MinidumpModule> { + // The main code module is the first one present in a minidump file's + // MINIDUMP_MODULEList. + if !self.modules.is_empty() { + Some(&self.modules[0]) + } else { + None + } + } + + /// Return a `MinidumpModule` whose address range covers `address`. + pub fn module_at_address(&self, address: u64) -> Option<&MinidumpModule> { + self.modules_by_addr + .get(address) + .map(|&index| &self.modules[index]) + } + + /// Iterate over the modules in arbitrary order. + pub fn iter(&self) -> impl Iterator { + self.modules.iter() + } + + /// Iterate over the modules in order by memory address. + pub fn by_addr(&self) -> impl DoubleEndedIterator { + self.modules_by_addr + .ranges_values() + .map(move |&(_, index)| &self.modules[index]) + } + + /// Write a human-readable description of this `MinidumpModuleList` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MinidumpModuleList + module_count = {} + +", + self.modules.len() + )?; + for (i, module) in self.modules.iter().enumerate() { + writeln!(f, "module[{i}]")?; + module.print(f)?; + } + Ok(()) + } +} + +impl Default for MinidumpModuleList { + fn default() -> Self { + Self::new() + } +} + +impl<'a> MinidumpStream<'a> for MinidumpModuleList { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::ModuleListStream as u32; + + fn read( + bytes: &'a [u8], + all: &'a [u8], + endian: scroll::Endian, + system_info: Option<&MinidumpSystemInfo>, + ) -> Result { + let mut offset = 0; + let raw_modules: Vec = read_stream_list(&mut offset, bytes, endian)?; + // read auxiliary data for each module + let mut modules = Vec::with_capacity(raw_modules.len()); + for (module_index, raw) in raw_modules.into_iter().enumerate() { + if raw.size_of_image == 0 || raw.size_of_image as u64 > (u64::MAX - raw.base_of_image) { + // Bad image size. + tracing::warn!( + module_index, + base = raw.base_of_image, + size = raw.size_of_image, + "bad module image size" + ); + continue; + } + modules.push(MinidumpModule::read(raw, all, endian, system_info)?); + } + Ok(MinidumpModuleList::from_modules(modules)) + } +} + +impl MinidumpUnloadedModuleList { + /// Return an empty `MinidumpModuleList`. + pub fn new() -> MinidumpUnloadedModuleList { + MinidumpUnloadedModuleList { + modules: vec![], + modules_by_addr: vec![], + } + } + /// Create a `MinidumpModuleList` from a list of `MinidumpModule`s. + pub fn from_modules(modules: Vec) -> MinidumpUnloadedModuleList { + let mut modules_by_addr = (0..modules.len()) + .filter_map(|i| modules[i].memory_range().map(|r| (r, i))) + .collect::>(); + + modules_by_addr.sort_by_key(|(range, _idx)| *range); + + MinidumpUnloadedModuleList { + modules, + modules_by_addr, + } + } + + /// Return an iterator of `MinidumpUnloadedModules` whose address range covers `address`. + pub fn modules_at_address( + &self, + address: u64, + ) -> impl Iterator { + // We have all of our modules sorted by memory range (base address being the + // high-order value), and we need to get the range of values that overlap + // with our target address. I'm a bit too tired to work out the exact + // combination of binary searches to do this, so let's just use `filter` + // for now (unloaded_modules should be a bounded list anyway). + self.modules_by_addr + .iter() + .filter(move |(range, _idx)| range.contains(address)) + .map(move |(_range, idx)| &self.modules[*idx]) + } + + /// Iterate over the modules in arbitrary order. + pub fn iter(&self) -> impl Iterator { + self.modules.iter() + } + + /// Iterate over the modules in order by memory address. + pub fn by_addr(&self) -> impl Iterator { + self.modules_by_addr + .iter() + .map(move |&(_, index)| &self.modules[index]) + } + + /// Write a human-readable description of this `MinidumpModuleList` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MinidumpUnloadedModuleList + module_count = {} + +", + self.modules.len() + )?; + for (i, module) in self.modules.iter().enumerate() { + writeln!(f, "module[{i}]")?; + module.print(f)?; + } + Ok(()) + } +} + +impl Default for MinidumpUnloadedModuleList { + fn default() -> Self { + Self::new() + } +} + +impl<'a> MinidumpStream<'a> for MinidumpUnloadedModuleList { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::UnloadedModuleListStream as u32; + + fn read( + bytes: &'a [u8], + all: &'a [u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result { + let mut offset = 0; + let raw_modules: Vec = + read_ex_stream_list(&mut offset, bytes, endian)?; + // read auxiliary data for each module + let mut modules = Vec::with_capacity(raw_modules.len()); + for raw in raw_modules.into_iter() { + if raw.size_of_image == 0 || raw.size_of_image as u64 > (u64::MAX - raw.base_of_image) { + // Bad image size. + // TODO: just drop this module, keep the rest? + return Err(Error::ModuleReadFailure); + } + modules.push(MinidumpUnloadedModule::read(raw, all, endian)?); + } + Ok(MinidumpUnloadedModuleList::from_modules(modules)) + } +} + +// Generates an accessor for a HANDLE_DESCRIPTOR field with the following syntax: +// +// * VERSION_NUMBER: FIELD_NAME -> FIELD_TYPE +// +// With the following definitions: +// +// * VERSION_NUMBER: The HANDLE_DESCRIPTOR version this field was introduced in +// * FIELD_NAME: The name of the field to read +// * FIELD_TYPE: The type of the field +macro_rules! handle_descriptor_accessors { + () => {}; + (@def $name:ident $t:ty [$($variant:ident)+]) => { + #[allow(unreachable_patterns)] + pub fn $name(&self) -> Option<&$t> { + match self { + $( + RawHandleDescriptor::$variant(ref raw) => Some(&raw.$name), + )+ + _ => None, + } + } + }; + (1: $name:ident -> $t:ty, $($rest:tt)*) => { + handle_descriptor_accessors!(@def $name $t [HandleDescriptor HandleDescriptor2]); + handle_descriptor_accessors!($($rest)*); + }; + + (2: $name:ident -> $t:ty, $($rest:tt)*) => { + handle_descriptor_accessors!(@def $name $t [HandleDescriptor2]); + handle_descriptor_accessors!($($rest)*); + }; +} + +impl RawHandleDescriptor { + handle_descriptor_accessors!( + 1: handle -> u64, + 1: type_name_rva -> md::RVA, + 1: object_name_rva -> md::RVA, + 1: attributes -> u32, + 1: granted_access -> u32, + 1: handle_count -> u32, + 1: pointer_count -> u32, + 2: object_info_rva -> md::RVA, + ); +} + +impl MinidumpHandleDescriptor { + /// Write a human-readable description. + pub fn print(&self, f: &mut T) -> io::Result<()> { + macro_rules! write_simple_field { + ($stream:ident, $field:ident, $format:literal) => { + write!(f, " {:18}= ", stringify!($field))?; + match self.raw.$field() { + Some($field) => { + writeln!(f, $format, $field)?; + } + None => writeln!(f, "(invalid)")?, + } + }; + ($stream:ident, $field:ident) => { + write_simple_field!($stream, $field, "{}"); + }; + } + + writeln!(f, "MINIDUMP_HANDLE_DESCRIPTOR")?; + write_simple_field!(f, handle, "{:#x}"); + write_simple_field!(f, type_name_rva, "{:#x}"); + write_simple_field!(f, object_name_rva, "{:#x}"); + write_simple_field!(f, attributes, "{:#x}"); + write_simple_field!(f, granted_access, "{:#x}"); + write_simple_field!(f, handle_count); + write_simple_field!(f, pointer_count); + write_simple_field!(f, object_info_rva, "{:#x}"); + write!(f, " (type_name) = ")?; + if let Some(type_name) = &self.type_name { + writeln!(f, "{type_name:}")?; + } else { + writeln!(f, "(null)")?; + }; + write!(f, " (object_name) = ")?; + if let Some(object_name) = &self.object_name { + writeln!(f, "{object_name:}")?; + } else { + writeln!(f, "(null)")?; + }; + if self.object_infos.is_empty() { + writeln!(f, " (object_info) = (null)")?; + } else { + for object_info in &self.object_infos { + writeln!(f, " (object_info) = {object_info:}")?; + } + } + writeln!(f) + } + + fn read_string(offset: usize, ctx: HandleDescriptorContext) -> Option { + let mut offset = offset; + if offset != 0 { + read_string_utf16(&mut offset, ctx.bytes, ctx.endianess) + } else { + None + } + } + + fn read_object_info( + offset: usize, + ctx: HandleDescriptorContext, + ) -> Option { + if offset != 0 { + ctx.bytes + .pread_with::(offset, ctx.endianess) + .ok() + .map(|raw| MinidumpHandleObjectInformation { + raw: raw.clone(), + info_type: md::MINIDUMP_HANDLE_OBJECT_INFORMATION_TYPE::from_u32(raw.info_type) + .unwrap(), + }) + } else { + None + } + } +} + +#[derive(Copy, Clone)] +struct HandleDescriptorContext<'a> { + bytes: &'a [u8], + fieldsize: u32, + endianess: scroll::Endian, +} + +impl<'a> HandleDescriptorContext<'a> { + fn new(bytes: &'a [u8], fieldsize: u32, endianess: scroll::Endian) -> HandleDescriptorContext { + HandleDescriptorContext { + bytes, + fieldsize, + endianess, + } + } +} + +impl<'a> TryFromCtx<'a, HandleDescriptorContext<'a>> for MinidumpHandleDescriptor { + type Error = scroll::Error; + + fn try_from_ctx( + src: &'a [u8], + ctx: HandleDescriptorContext, + ) -> Result<(Self, usize), Self::Error> { + const MINIDUMP_HANDLE_DESCRIPTOR_SIZE: u32 = + mem::size_of::() as u32; + const MINIDUMP_HANDLE_DESCRIPTOR_2_SIZE: u32 = + mem::size_of::() as u32; + + match ctx.fieldsize { + MINIDUMP_HANDLE_DESCRIPTOR_SIZE => { + let raw = src.pread_with::(0, ctx.endianess)?; + let type_name = Self::read_string(raw.type_name_rva as usize, ctx); + let object_name = Self::read_string(raw.object_name_rva as usize, ctx); + Ok(( + MinidumpHandleDescriptor { + raw: RawHandleDescriptor::HandleDescriptor(raw), + type_name, + object_name, + object_infos: Vec::new(), + }, + ctx.fieldsize as usize, + )) + } + MINIDUMP_HANDLE_DESCRIPTOR_2_SIZE => { + let raw = src.pread_with::(0, ctx.endianess)?; + let type_name = Self::read_string(raw.type_name_rva as usize, ctx); + let object_name = Self::read_string(raw.object_name_rva as usize, ctx); + let mut object_infos = Vec::::new(); + let mut object_info_rva = raw.object_info_rva; + + while object_info_rva != 0 { + if let Some(object_info) = Self::read_object_info(object_info_rva as usize, ctx) + { + object_info_rva = object_info.raw.next_info_rva; + object_infos.push(object_info); + } else { + break; + } + } + + Ok(( + MinidumpHandleDescriptor { + raw: RawHandleDescriptor::HandleDescriptor2(raw), + type_name, + object_name, + object_infos, + }, + ctx.fieldsize as usize, + )) + } + _ => Err(scroll::Error::BadInput { + size: ctx.fieldsize as usize, + msg: "Unknown MINIDUMP_HANDLE_DESCRIPTOR type", + }), + } + } +} + +impl MinidumpHandleDataStream { + /// Return an empty `MinidumpHandleDataStream`. + pub fn new() -> MinidumpHandleDataStream { + MinidumpHandleDataStream { handles: vec![] } + } + + /// Create a `MinidumpHandleDataStream` from a list of `MinidumpHandleDescriptor`s. + pub fn from_handles(handles: Vec) -> MinidumpHandleDataStream { + MinidumpHandleDataStream { handles } + } + + /// Iterate over the handles in the order contained in the minidump. + pub fn iter(&self) -> impl Iterator { + self.handles.iter() + } + + /// Write a human-readable description. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MinidumpHandleDataStream + handle_count = {} + +", + self.handles.len() + )?; + for (i, handle) in self.handles.iter().enumerate() { + writeln!(f, "handle[{i}]")?; + handle.print(f)?; + } + Ok(()) + } +} + +impl Default for MinidumpHandleDataStream { + fn default() -> Self { + Self::new() + } +} + +impl<'a> MinidumpStream<'a> for MinidumpHandleDataStream { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::HandleDataStream as u32; + + fn read( + bytes: &'a [u8], + all: &'a [u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result { + let mut offset = 0; + + let size_of_header = bytes + .gread_with::(&mut offset, endian) + .or(Err(Error::StreamReadFailure))?; + let size_of_descriptor = bytes + .gread_with::(&mut offset, endian) + .or(Err(Error::StreamReadFailure))?; + let number_of_descriptors = bytes + .gread_with::(&mut offset, endian) + .or(Err(Error::StreamReadFailure))?; + + let ctx = HandleDescriptorContext::new(all, size_of_descriptor, endian); + let (number_of_entries, _) = ensure_count_in_bound( + bytes, + number_of_descriptors as usize, + size_of_descriptor as usize, + size_of_header as usize, + )?; + + // Skip the header + offset = size_of_header as usize; + + let mut descriptors = Vec::::with_capacity(number_of_entries); + for _ in 0..number_of_entries { + let descriptor: MinidumpHandleDescriptor = bytes + .gread_with(&mut offset, ctx) + .or(Err(Error::StreamReadFailure))?; + descriptors.push(descriptor); + } + + Ok(MinidumpHandleDataStream::from_handles(descriptors)) + } +} + +impl<'a> MinidumpMemory<'a> { + pub fn read( + desc: &md::MINIDUMP_MEMORY_DESCRIPTOR, + data: &'a [u8], + endian: scroll::Endian, + ) -> Result, Error> { + if desc.memory.rva == 0 || desc.memory.data_size == 0 { + // Windows will sometimes emit null stack RVAs, indicating that + // we need to lookup the address in a memory region. It's ok to + // emit an error for that here, the thread processing code will + // catch it. + return Err(Error::MemoryReadFailure); + } + let bytes = location_slice(data, &desc.memory).or(Err(Error::StreamReadFailure))?; + Ok(MinidumpMemory { + desc: *desc, + base_address: desc.start_of_memory_range, + size: desc.memory.data_size as u64, + bytes, + endian, + }) + } + + /// Write a human-readable description of this `MinidumpMemory` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T, brief: bool) -> io::Result<()> { + write!( + f, + "MINIDUMP_MEMORY_DESCRIPTOR + start_of_memory_range = {:#x} + memory.data_size = {:#x} + memory.rva = {:#x} +", + self.desc.start_of_memory_range, self.desc.memory.data_size, self.desc.memory.rva, + )?; + if !brief { + writeln!(f, "Memory")?; + self.print_contents(f)?; + } + writeln!(f) + } +} + +impl<'a> MinidumpMemory64<'a> { + /// Write a human-readable description of this `MinidumpMemory64` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T, brief: bool) -> io::Result<()> { + write!( + f, + "MINIDUMP_MEMORY_DESCRIPTOR64 + start_of_memory_range = {:#x} + memory.data_size = {:#x} +", + self.desc.start_of_memory_range, self.desc.data_size, + )?; + if !brief { + writeln!(f, "Memory")?; + self.print_contents(f)?; + } + writeln!(f) + } +} + +impl<'a, Descriptor> MinidumpMemoryBase<'a, Descriptor> { + /// Get `mem::size_of::()` bytes of memory at `addr` from this region. + /// + /// Return `None` if the requested address range falls out of the bounds + /// of this memory region. + pub fn get_memory_at_address(&self, addr: u64) -> Option + where + T: TryFromCtx<'a, scroll::Endian, [u8], Error = scroll::Error>, + { + let start = addr.checked_sub(self.base_address)? as usize; + + self.bytes.pread_with::(start, self.endian).ok() + } + + /// Write the contents of this `MinidumpMemory` to `f` as a hex string. + pub fn print_contents(&self, f: &mut T) -> io::Result<()> { + const PARAGRAPH_SIZE: usize = 16; + let mut offset = 0; + for paragraph in self.bytes.chunks(PARAGRAPH_SIZE) { + write!(f, " {offset:08x}: ")?; + let mut byte_iter = paragraph.iter().fuse(); + for _ in 0..PARAGRAPH_SIZE { + if let Some(byte) = byte_iter.next() { + write!(f, "{byte:02x} ")?; + } else { + write!(f, " ")?; + } + } + for &byte in paragraph.iter() { + let ascii_char = if !byte.is_ascii() || byte.is_ascii_control() { + '.' + } else { + char::from(byte) + }; + + write!(f, "{ascii_char}")?; + } + writeln!(f)?; + + offset += PARAGRAPH_SIZE; + } + Ok(()) + } + + pub fn memory_range(&self) -> Option> { + if self.size == 0 { + return None; + } + Some(Range::new( + self.base_address, + self.base_address.checked_add(self.size)? - 1, + )) + } +} + +impl<'a, 'mdmp> UnifiedMemory<'a, 'mdmp> { + pub fn get_memory_at_address(&self, addr: u64) -> Option + where + T: TryFromCtx<'mdmp, scroll::Endian, [u8], Error = scroll::Error>, + { + match self { + UnifiedMemory::Memory(this) => this.get_memory_at_address(addr), + UnifiedMemory::Memory64(this) => this.get_memory_at_address(addr), + } + } + + pub fn memory_range(&self) -> Option> { + match self { + UnifiedMemory::Memory(this) => this.memory_range(), + UnifiedMemory::Memory64(this) => this.memory_range(), + } + } + + pub fn bytes(&self) -> &'a [u8] { + match self { + UnifiedMemory::Memory(this) => this.bytes, + UnifiedMemory::Memory64(this) => this.bytes, + } + } + + pub fn base_address(&self) -> u64 { + match self { + UnifiedMemory::Memory(this) => this.base_address, + UnifiedMemory::Memory64(this) => this.base_address, + } + } + + pub fn size(&self) -> u64 { + match self { + UnifiedMemory::Memory(this) => this.size, + UnifiedMemory::Memory64(this) => this.size, + } + } + + pub fn print_contents(&self, f: &mut T) -> io::Result<()> { + match self { + UnifiedMemory::Memory(this) => this.print_contents(f), + UnifiedMemory::Memory64(this) => this.print_contents(f), + } + } + + pub fn print(&self, f: &mut T, brief: bool) -> io::Result<()> { + match self { + UnifiedMemory::Memory(this) => this.print(f, brief), + UnifiedMemory::Memory64(this) => this.print(f, brief), + } + } +} + +impl<'mdmp, Descriptor> MinidumpMemoryListBase<'mdmp, Descriptor> { + /// Return an empty `MinidumpMemoryListBase`. + pub fn new() -> MinidumpMemoryListBase<'mdmp, Descriptor> { + MinidumpMemoryListBase { + regions: vec![], + regions_by_addr: RangeMap::new(), + } + } + + /// Create a `MinidumpMemoryListBase` from a list of `MinidumpMemoryBase`s. + pub fn from_regions( + regions: Vec>, + ) -> MinidumpMemoryListBase<'mdmp, Descriptor> { + let regions_by_addr = regions + .iter() + .enumerate() + .map(|(i, region)| (region.memory_range(), i)) + .into_rangemap_safe(); + MinidumpMemoryListBase { + regions, + regions_by_addr, + } + } + + /// Return a `MinidumpMemoryBase` containing memory at `address`, if one exists. + pub fn memory_at_address( + &self, + address: u64, + ) -> Option<&MinidumpMemoryBase<'mdmp, Descriptor>> { + self.regions_by_addr + .get(address) + .and_then(|&index| self.regions.get(index)) + } + + /// Iterate over the memory regions in the order contained in the minidump. + /// + /// The iterator returns items of [MinidumpMemoryBase] as `&'slf MinidumpMemoryBase<'mdmp, Descriptor>`. + /// That is the lifetime of the item is bound to the lifetime of the iterator itself + /// (`'slf`), while the slice inside [MinidumpMemoryBase] pointing at the memory itself has + /// the lifetime of the [Minidump] struct ('mdmp). + pub fn iter<'slf>( + &'slf self, + ) -> impl Iterator> { + self.regions.iter() + } + + /// Iterate over the memory regions in order by memory address. + pub fn by_addr<'slf>( + &'slf self, + ) -> impl Iterator> { + self.regions_by_addr + .ranges_values() + .map(move |&(_, index)| &self.regions[index]) + } +} + +impl<'mdmp> MinidumpMemoryList<'mdmp> { + /// Write a human-readable description of this `MinidumpMemoryList` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T, brief: bool) -> io::Result<()> { + write!( + f, + "MinidumpMemoryList + region_count = {} + +", + self.regions.len() + )?; + for (i, region) in self.regions.iter().enumerate() { + writeln!(f, "region[{i}]")?; + region.print(f, brief)?; + } + Ok(()) + } +} + +impl<'mdmp> MinidumpMemory64List<'mdmp> { + /// Write a human-readable description of this `MinidumpMemory64List` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T, brief: bool) -> io::Result<()> { + write!( + f, + "MinidumpMemory64List + region_count = {} + +", + self.regions.len() + )?; + for (i, region) in self.regions.iter().enumerate() { + writeln!(f, "region[{i}]")?; + region.print(f, brief)?; + } + Ok(()) + } +} + +impl<'mdmp> UnifiedMemoryList<'mdmp> { + pub fn memory_at_address<'slf>(&'slf self, address: u64) -> Option> { + match self { + UnifiedMemoryList::Memory(this) => { + this.memory_at_address(address).map(UnifiedMemory::Memory) + } + UnifiedMemoryList::Memory64(this) => { + this.memory_at_address(address).map(UnifiedMemory::Memory64) + } + } + } + + pub fn iter<'slf>(&'slf self) -> impl Iterator> { + let iter1 = if let UnifiedMemoryList::Memory(this) = self { + Some(this.iter().map(UnifiedMemory::Memory)) + } else { + None + }; + let iter2 = if let UnifiedMemoryList::Memory64(this) = self { + Some(this.iter().map(UnifiedMemory::Memory64)) + } else { + None + }; + iter1 + .into_iter() + .flatten() + .chain(iter2.into_iter().flatten()) + } + + /// Iterate over the memory regions in order by memory address. + pub fn by_addr<'slf>(&'slf self) -> impl Iterator> { + let iter1 = if let UnifiedMemoryList::Memory(this) = self { + Some(this.by_addr().map(UnifiedMemory::Memory)) + } else { + None + }; + let iter2 = if let UnifiedMemoryList::Memory64(this) = self { + Some(this.by_addr().map(UnifiedMemory::Memory64)) + } else { + None + }; + iter1 + .into_iter() + .flatten() + .chain(iter2.into_iter().flatten()) + } + + pub fn print(&self, f: &mut T, brief: bool) -> io::Result<()> { + match self { + UnifiedMemoryList::Memory(this) => this.print(f, brief), + UnifiedMemoryList::Memory64(this) => this.print(f, brief), + } + } +} + +impl<'a, Descriptor> Default for MinidumpMemoryListBase<'a, Descriptor> { + fn default() -> Self { + Self::new() + } +} + +impl<'a> MinidumpStream<'a> for MinidumpMemoryList<'a> { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::MemoryListStream as u32; + + fn read( + bytes: &'a [u8], + all: &'a [u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result, Error> { + let mut offset = 0; + let descriptors: Vec = + read_stream_list(&mut offset, bytes, endian)?; + // read memory contents for each region + let mut regions = Vec::with_capacity(descriptors.len()); + for raw in descriptors.into_iter() { + if let Ok(memory) = MinidumpMemory::read(&raw, all, endian) { + regions.push(memory); + } else { + // Just skip over corrupt entries and try to limp along. + continue; + } + } + Ok(MinidumpMemoryList::from_regions(regions)) + } +} + +impl<'a> MinidumpStream<'a> for MinidumpMemory64List<'a> { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::Memory64ListStream as u32; + + fn read( + bytes: &'a [u8], + all: &'a [u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result, Error> { + let mut offset = 0; + let u: u64 = bytes + .gread_with(&mut offset, endian) + .or(Err(Error::StreamReadFailure))?; + + let mut rva: u64 = bytes + .gread_with(&mut offset, endian) + .or(Err(Error::StreamReadFailure))?; + + let (count, counted_size) = ensure_count_in_bound( + bytes, + u.try_into().map_err(|_| Error::StreamReadFailure)?, + md::MINIDUMP_MEMORY_DESCRIPTOR64::size_with(&endian), + offset, + )?; + + if bytes.len() != counted_size { + return Err(Error::StreamSizeMismatch { + expected: counted_size, + actual: bytes.len(), + }); + } + + let mut raw_entries = Vec::with_capacity(count); + for _ in 0..count { + let raw: md::MINIDUMP_MEMORY_DESCRIPTOR64 = bytes + .gread_with(&mut offset, endian) + .or(Err(Error::StreamReadFailure))?; + raw_entries.push(raw); + } + + let mut regions = Vec::with_capacity(raw_entries.len()); + for raw in raw_entries { + let start = rva; + let end = rva + .checked_add(raw.data_size) + .ok_or(Error::StreamReadFailure)?; + let bytes = all + .get(start as usize..end as usize) + .ok_or(Error::StreamReadFailure)?; + + regions.push(MinidumpMemory64 { + desc: raw, + base_address: raw.start_of_memory_range, + size: raw.data_size, + bytes, + endian, + }); + + rva = end; + } + Ok(MinidumpMemory64List::from_regions(regions)) + } +} + +impl<'a> MinidumpStream<'a> for MinidumpMemoryInfoList<'a> { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::MemoryInfoListStream as u32; + + fn read( + bytes: &'a [u8], + _all: &'a [u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result, Error> { + let mut offset = 0; + let raw_regions: Vec = + read_ex_stream_list(&mut offset, bytes, endian)?; + let regions = raw_regions + .into_iter() + .map(|raw| MinidumpMemoryInfo { + allocation_protection: md::MemoryProtection::from_bits_truncate( + raw.allocation_protection, + ), + state: md::MemoryState::from_bits_truncate(raw.state), + protection: md::MemoryProtection::from_bits_truncate(raw.protection), + ty: md::MemoryType::from_bits_truncate(raw._type), + raw, + _phantom: PhantomData, + }) + .collect(); + Ok(MinidumpMemoryInfoList::from_regions(regions)) + } +} + +impl<'a> Default for MinidumpMemoryInfoList<'a> { + fn default() -> Self { + Self::new() + } +} + +impl<'mdmp> MinidumpMemoryInfoList<'mdmp> { + /// Return an empty `MinidumpMemoryList`. + pub fn new() -> MinidumpMemoryInfoList<'mdmp> { + MinidumpMemoryInfoList { + regions: vec![], + regions_by_addr: RangeMap::new(), + } + } + + /// Create a `MinidumpMemoryList` from a list of `MinidumpMemory`s. + pub fn from_regions(regions: Vec>) -> MinidumpMemoryInfoList<'mdmp> { + let regions_by_addr = regions + .iter() + .enumerate() + .map(|(i, region)| (region.memory_range(), i)) + .into_rangemap_safe(); + MinidumpMemoryInfoList { + regions, + regions_by_addr, + } + } + + /// Return a `MinidumpMemory` containing memory at `address`, if one exists. + pub fn memory_info_at_address(&self, address: u64) -> Option<&MinidumpMemoryInfo<'mdmp>> { + self.regions_by_addr + .get(address) + .map(|&index| &self.regions[index]) + } + + /// Iterate over the memory regions in the order contained in the minidump. + /// + /// The iterator returns items of [MinidumpMemory] as `&'slf MinidumpMemory<'mdmp>`. + /// That is the lifetime of the item is bound to the lifetime of the iterator itself + /// (`'slf`), while the slice inside [MinidumpMemory] pointing at the memory itself has + /// the lifetime of the [Minidump] struct ('mdmp). + pub fn iter<'slf>(&'slf self) -> impl Iterator> { + self.regions.iter() + } + + /// Iterate over the memory regions in order by memory address. + pub fn by_addr<'slf>(&'slf self) -> impl Iterator> { + self.regions_by_addr + .ranges_values() + .map(move |&(_, index)| &self.regions[index]) + } + + /// Write a human-readable description. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MinidumpMemoryInfoList + region_count = {} + +", + self.regions.len() + )?; + for (i, region) in self.regions.iter().enumerate() { + writeln!(f, "region[{i}]")?; + region.print(f)?; + } + Ok(()) + } +} + +impl<'a> MinidumpMemoryInfo<'a> { + /// Write a human-readable description. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MINIDUMP_MEMORY_INFO + base_address = {:#x} + allocation_base = {:#x} + allocation_protection = {:#x} + region_size = {:#x} + state = {:#x} + protection = {:#x} + _type = {:#x} +", + self.raw.base_address, + self.raw.allocation_base, + self.allocation_protection, + self.raw.region_size, + self.state, + self.protection, + self.ty, + )?; + writeln!(f) + } + + pub fn memory_range(&self) -> Option> { + if self.raw.region_size == 0 { + return None; + } + Some(Range::new( + self.raw.base_address, + self.raw.base_address.checked_add(self.raw.region_size)? - 1, + )) + } + + /// Whether this memory range was readable. + pub fn is_readable(&self) -> bool { + self.protection.intersects( + md::MemoryProtection::PAGE_READONLY + | md::MemoryProtection::PAGE_READWRITE + | md::MemoryProtection::PAGE_EXECUTE_READ + | md::MemoryProtection::PAGE_EXECUTE_READWRITE, + ) + } + + /// Whether this memory range was writable. + pub fn is_writable(&self) -> bool { + self.protection.intersects( + md::MemoryProtection::PAGE_READWRITE + | md::MemoryProtection::PAGE_WRITECOPY + | md::MemoryProtection::PAGE_EXECUTE_READWRITE + | md::MemoryProtection::PAGE_EXECUTE_WRITECOPY, + ) + } + + /// Whether this memory range was executable. + pub fn is_executable(&self) -> bool { + self.protection.intersects( + md::MemoryProtection::PAGE_EXECUTE + | md::MemoryProtection::PAGE_EXECUTE_READ + | md::MemoryProtection::PAGE_EXECUTE_READWRITE + | md::MemoryProtection::PAGE_EXECUTE_WRITECOPY, + ) + } +} + +impl<'a> MinidumpStream<'a> for MinidumpLinuxMaps<'a> { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::LinuxMaps as u32; + + fn read( + bytes: &'a [u8], + _all: &'a [u8], + _endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result, Error> { + let maps = MemoryMaps::from_read(std::io::Cursor::new(bytes)).map_err(|e| { + tracing::error!("linux memory map read error: {e}"); + Error::StreamReadFailure + })?; + + Ok(MinidumpLinuxMaps::from_regions( + maps.into_iter() + .map(|map| MinidumpLinuxMapInfo { + map, + _phantom: PhantomData, + }) + .collect(), + )) + } +} + +impl<'a> Default for MinidumpLinuxMaps<'a> { + fn default() -> Self { + Self::new() + } +} + +impl<'mdmp> MinidumpLinuxMaps<'mdmp> { + /// Return an empty `MinidumpMemoryList`. + pub fn new() -> Self { + Self { + regions: vec![], + regions_by_addr: RangeMap::new(), + } + } + + /// Create a `MinidumpMemoryList` from a list of `MinidumpMemory`s. + pub fn from_regions(regions: Vec>) -> Self { + let regions_by_addr = regions + .iter() + .enumerate() + .map(|(i, region)| (region.memory_range(), i)) + .into_rangemap_safe(); + Self { + regions, + regions_by_addr, + } + } + + /// Return a `MinidumpMemory` containing memory at `address`, if one exists. + pub fn memory_info_at_address(&self, address: u64) -> Option<&MinidumpLinuxMapInfo<'mdmp>> { + self.regions_by_addr + .get(address) + .map(|&index| &self.regions[index]) + } + + /// Iterate over the memory regions in the order contained in the minidump. + pub fn iter<'slf>(&'slf self) -> impl Iterator> { + self.regions.iter() + } + + /// Iterate over the memory regions in order by memory address. + pub fn by_addr<'slf>(&'slf self) -> impl Iterator> { + self.regions_by_addr + .ranges_values() + .map(move |&(_, index)| &self.regions[index]) + } + + /// Write a human-readable description. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MinidumpLinuxMapInfo + region_count = {} + +", + self.regions.len() + )?; + for (i, region) in self.regions.iter().enumerate() { + writeln!(f, "region[{i}]")?; + region.print(f)?; + } + Ok(()) + } + + // Return number of memory mappings + pub fn memory_map_count(&self) -> usize { + self.regions.len() + } +} + +impl<'a> MinidumpLinuxMapInfo<'a> { + /// Write a human-readable description of this. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MINIDUMP_LINUX_MAP_INFO + base_address = {:#x} + final_address = {:#x} + kind = {:#?} + permissions = {} +", + self.map.address.0, + self.map.address.1, + self.map.pathname, + self.map.perms.as_str() + )?; + writeln!(f) + } + + pub fn memory_range(&self) -> Option> { + // final address is inclusive afaik + if self.map.address.0 > self.map.address.1 { + return None; + } + Some(Range::new(self.map.address.0, self.map.address.1)) + } + + /// Whether this memory range was readable. + pub fn is_readable(&self) -> bool { + self.map.perms.contains(MMPermissions::READ) + } + + /// Whether this memory range was writable. + pub fn is_writable(&self) -> bool { + self.map.perms.contains(MMPermissions::WRITE) + } + + /// Whether this memory range was executable. + pub fn is_executable(&self) -> bool { + self.map.perms.contains(MMPermissions::EXECUTE) + } + + #[cfg(test)] + pub fn from_line(bytes: &[u8]) -> Option { + let map = MemoryMaps::from_read(std::io::Cursor::new(bytes)) + .ok()? + .into_iter() + .next()?; + Some(MinidumpLinuxMapInfo { + map, + _phantom: PhantomData, + }) + } +} + +impl<'a> Default for UnifiedMemoryInfoList<'a> { + fn default() -> Self { + Self::Info(MinidumpMemoryInfoList::default()) + } +} + +impl<'a> UnifiedMemoryInfoList<'a> { + /// Take two potential memory info sources and create an interface that unifies them. + /// + /// Under normal circumstances a minidump should only contain one of these. + /// If both are provided, one will be arbitrarily preferred to attempt to + /// make progress. + pub fn new( + info: Option>, + maps: Option>, + ) -> Option { + match (info, maps) { + (Some(info), Some(_maps)) => { + warn!("UnifiedMemoryInfoList got both kinds of info! (using InfoList)"); + // Just pick one I guess? + Some(Self::Info(info)) + } + (Some(info), None) => Some(Self::Info(info)), + (None, Some(maps)) => Some(Self::Maps(maps)), + (None, None) => None, + } + } + + /// Return a `MinidumpMemory` containing memory at `address`, if one exists. + pub fn memory_info_at_address(&self, address: u64) -> Option { + match self { + Self::Info(info) => info + .memory_info_at_address(address) + .map(UnifiedMemoryInfo::Info), + Self::Maps(maps) => maps + .memory_info_at_address(address) + .map(UnifiedMemoryInfo::Map), + } + } + + /// Iterate over the memory regions in the order contained in the minidump. + pub fn iter(&self) -> impl Iterator { + // Use `flat_map` and `chain` to create a unified stream of the two types + // (only one of which will conatin any values). Note that we are using + // the fact that `Option` can be iterated (producing 1 to 0 values). + let info = self + .info() + .into_iter() + .flat_map(|info| info.iter().map(UnifiedMemoryInfo::Info)); + let maps = self + .maps() + .into_iter() + .flat_map(|maps| maps.iter().map(UnifiedMemoryInfo::Map)); + + info.chain(maps) + } + + /// Iterate over the memory regions in order by memory address. + pub fn by_addr(&self) -> impl Iterator { + let info = self + .info() + .into_iter() + .flat_map(|info| info.by_addr().map(UnifiedMemoryInfo::Info)); + let maps = self + .maps() + .into_iter() + .flat_map(|maps| maps.by_addr().map(UnifiedMemoryInfo::Map)); + + info.chain(maps) + } + + /// Write a human-readable description of this `MinidumpMemoryList` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + match self { + Self::Info(info) => info.print(f), + Self::Maps(maps) => maps.print(f), + } + } + + /// Get the [`MinidumpLinuxMaps`] contained inside, if it exists. + /// + /// Potentially useful for doing a more refined analysis in specific places. + pub fn maps(&self) -> Option<&MinidumpLinuxMaps<'a>> { + match &self { + Self::Maps(maps) => Some(maps), + Self::Info(_) => None, + } + } + + /// Get the [`MinidumpMemoryInfoList`] contained inside, if it exists. + /// + /// Potentially useful for doing a more refined analysis in specific places. + pub fn info(&self) -> Option<&MinidumpMemoryInfoList<'a>> { + match &self { + Self::Maps(_) => None, + Self::Info(info) => Some(info), + } + } +} + +/// Declares functions which will forward to functions of the same name on the inner +/// `UnifiedMemoryInfo` members. +macro_rules! unified_memory_forward { + () => {}; + ( $(#[$attr:meta])* $vis:vis fn $name:ident $(< $($t_param:ident : $t_type:path),* >)? ( &self $(, $param:ident : $type:ty)* ) -> $ret:ty ; $($rest:tt)* ) => { + $(#[$attr])* + $vis fn $name $(< $($t_param : $t_type),* >)? (&self $(, $param : $type)*) -> $ret { + match self { + Self::Info(info) => info.$name($($param),*), + Self::Map(map) => map.$name($($param),*), + } + } + + unified_memory_forward!($($rest)*); + }; +} + +impl<'a> UnifiedMemoryInfo<'a> { + unified_memory_forward! { + /// Write a human-readable description. + pub fn print(&self, f: &mut T) -> io::Result<()>; + + /// The range of memory this info applies to. + pub fn memory_range(&self) -> Option>; + + /// Whether this memory range was readable. + pub fn is_readable(&self) -> bool; + + /// Whether this memory range was writable. + pub fn is_writable(&self) -> bool; + + /// Whether this memory range was executable. + pub fn is_executable(&self) -> bool; + } +} + +impl<'a> MinidumpThread<'a> { + pub fn context( + &self, + system_info: &MinidumpSystemInfo, + misc: Option<&MinidumpMiscInfo>, + ) -> Option> { + MinidumpContext::read(self.context?, self.endian, system_info, misc) + .ok() + .map(Cow::Owned) + } + + pub fn stack_memory<'mem>( + &'mem self, + memory_list: &'mem UnifiedMemoryList<'a>, + ) -> Option> { + self.stack.as_ref().map(UnifiedMemory::Memory).or_else(|| { + // Sometimes the raw.stack RVA is null/busted, but the start_of_memory_range + // value is correct. So if the `read` fails, try resolving start_of_memory_range + // with the MinidumpMemoryList. (This seems to specifically be a problem with + // Windows minidumps.) + let stack_addr = self.raw.stack.start_of_memory_range; + let memory = memory_list.memory_at_address(stack_addr)?; + Some(memory) + }) + } + + /// Write a human-readable description of this `MinidumpThread` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print( + &self, + f: &mut T, + memory: Option<&UnifiedMemoryList<'a>>, + system: Option<&MinidumpSystemInfo>, + misc: Option<&MinidumpMiscInfo>, + brief: bool, + ) -> io::Result<()> { + write!( + f, + r#"MINIDUMP_THREAD + thread_id = {:#x} + suspend_count = {} + priority_class = {:#x} + priority = {:#x} + teb = {:#x} + stack.start_of_memory_range = {:#x} + stack.memory.data_size = {:#x} + stack.memory.rva = {:#x} + thread_context.data_size = {:#x} + thread_context.rva = {:#x} + +"#, + self.raw.thread_id, + self.raw.suspend_count, + self.raw.priority_class, + self.raw.priority, + self.raw.teb, + self.raw.stack.start_of_memory_range, + self.raw.stack.memory.data_size, + self.raw.stack.memory.rva, + self.raw.thread_context.data_size, + self.raw.thread_context.rva, + )?; + if let Some(system_info) = system { + if let Some(ctx) = self.context(system_info, misc) { + ctx.print(f)?; + } else { + write!(f, " (no context)\n\n")?; + } + } else { + write!(f, " (no context)\n\n")?; + } + + if brief { + writeln!(f)?; + return Ok(()); + } + + let pointer_width = system.map_or(PointerWidth::Unknown, |info| info.cpu.pointer_width()); + + // We might not need any memory, so try to limp forward with an empty + // MemoryList if we don't have one. + let dummy_memory = UnifiedMemoryList::default(); + let memory = memory.unwrap_or(&dummy_memory); + if let Some(ref stack) = self.stack_memory(memory) { + writeln!(f, "Stack")?; + + // For printing purposes, we'll treat any unknown CPU type as 64-bit + let chunk_size: usize = pointer_width.size_in_bytes().unwrap_or(8).into(); + let mut offset = 0; + for chunk in stack.bytes().chunks_exact(chunk_size) { + write!(f, " {offset:#010x}: ")?; + + match pointer_width { + PointerWidth::Bits32 => { + let value = match self.endian { + scroll::Endian::Little => u32::from_le_bytes(chunk.try_into().unwrap()), + scroll::Endian::Big => u32::from_be_bytes(chunk.try_into().unwrap()), + }; + write!(f, "{value:#010x}")?; + } + PointerWidth::Unknown | PointerWidth::Bits64 => { + let value = match self.endian { + scroll::Endian::Little => u64::from_le_bytes(chunk.try_into().unwrap()), + scroll::Endian::Big => u64::from_be_bytes(chunk.try_into().unwrap()), + }; + write!(f, "{value:#018x}")?; + } + } + + writeln!(f)?; + + offset += chunk_size; + } + } else { + writeln!(f, "No stack")?; + } + writeln!(f)?; + Ok(()) + } + + /// Gets the last error code the thread recorded, just like win32's GetLastError. + /// + /// The value is heuristically converted into a CrashReason because that's our + /// general error code handling machinery, even though this may not actually be + /// the reason for the crash! + pub fn last_error(&self, cpu: Cpu, memory: &UnifiedMemoryList) -> Option { + // Early hacky implementation: rather than implementing all the TEB layouts, + // just use the fact that we know the value we want is a 13-pointers offset + // from the start of the TEB. + let teb = self.raw.teb; + let pointer_width = cpu.pointer_width().size_in_bytes()? as u64; + let offset = pointer_width.checked_mul(13)?; + let addr = teb.checked_add(offset)?; + let val: u32 = memory + .memory_at_address(addr)? + .get_memory_at_address(addr)?; + + Some(CrashReason::from_windows_error(val)) + } +} + +impl<'a> MinidumpStream<'a> for MinidumpThreadList<'a> { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::ThreadListStream as u32; + + fn read( + bytes: &'a [u8], + all: &'a [u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result, Error> { + let mut offset = 0; + let raw_threads: Vec = read_stream_list(&mut offset, bytes, endian)?; + let mut threads = Vec::with_capacity(raw_threads.len()); + let mut thread_ids = HashMap::with_capacity(raw_threads.len()); + for raw in raw_threads.into_iter() { + thread_ids.insert(raw.thread_id, threads.len()); + + // Defer parsing of this to the `context` method, where we will have access + // to other streams that are required to parse a context properly. + let context = location_slice(all, &raw.thread_context).ok(); + + // Try to get the stack memory here, but the `stack_memory` method will + // attempt a fallback method with access to other streams. + let stack = MinidumpMemory::read(&raw.stack, all, endian).ok(); + threads.push(MinidumpThread { + raw, + context, + stack, + endian, + }); + } + Ok(MinidumpThreadList { + threads, + thread_ids, + }) + } +} + +impl<'a> MinidumpThreadList<'a> { + /// Get the thread with id `id` from this thread list if it exists. + pub fn get_thread(&self, id: u32) -> Option<&MinidumpThread<'a>> { + self.thread_ids.get(&id).map(|&index| &self.threads[index]) + } + + /// Write a human-readable description of this `MinidumpThreadList` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print( + &self, + f: &mut T, + memory: Option<&UnifiedMemoryList<'a>>, + system: Option<&MinidumpSystemInfo>, + misc: Option<&MinidumpMiscInfo>, + brief: bool, + ) -> io::Result<()> { + write!( + f, + r#"MinidumpThreadList + thread_count = {} + +"#, + self.threads.len() + )?; + + for (i, thread) in self.threads.iter().enumerate() { + writeln!(f, "thread[{i}]")?; + thread.print(f, memory, system, misc, brief)?; + } + Ok(()) + } +} + +// implement print for MinidumpThreadInfo +impl MinidumpThreadInfo { + /// Write a human-readable description of this `MinidumpThreadInfo` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + r#"MINIDUMP_THREAD_INFO + thread_id = {:#x} + dump_flags = {:#x} + dump_error = {:#x} + exit_status = {:#x} + create_time = {:#x} + exit_time = {:#x} + kernel_time = {:#x} + user_time = {:#x} + start_address = {:#x} + affinity = {:#x} + + "#, + self.raw.thread_id, + self.raw.dump_flags, + self.raw.dump_error, + self.raw.exit_status, + self.raw.create_time, + self.raw.exit_time, + self.raw.kernel_time, + self.raw.user_time, + self.raw.start_address, + self.raw.affinity, + )?; + Ok(()) + } +} + +impl Default for MinidumpThreadInfoList { + fn default() -> Self { + Self::new() + } +} + +impl MinidumpThreadInfoList { + /// Return an empty `MinidumpThreadInfoList`. + pub fn new() -> MinidumpThreadInfoList { + MinidumpThreadInfoList { + thread_infos: vec![], + thread_ids: HashMap::new(), + } + } + + /// Get the thread info with id `id` from this thread info list if it exists. + pub fn get_thread_info(&self, id: u32) -> Option<&MinidumpThreadInfo> { + self.thread_ids + .get(&id) + .map(|&index| &self.thread_infos[index]) + } + + /// Write a human-readable description of this `MinidumpModuleList` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MinidumpThreadInfoList + thread_info_count = {} + +", + self.thread_infos.len() + )?; + for (i, thread_info) in self.thread_infos.iter().enumerate() { + writeln!(f, "thread info[{}]", i)?; + thread_info.print(f)?; + } + Ok(()) + } +} + +impl<'a> MinidumpStream<'a> for MinidumpThreadInfoList { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::ThreadInfoListStream as u32; + + fn read( + bytes: &'a [u8], + _all: &'a [u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result { + let mut offset = 0; + let raw_thread_infos: Vec = + read_ex_stream_list(&mut offset, bytes, endian)?; + + let mut thread_infos = Vec::with_capacity(raw_thread_infos.len()); + let mut thread_ids = HashMap::with_capacity(raw_thread_infos.len()); + for raw in raw_thread_infos.into_iter() { + thread_ids.insert(raw.thread_id, thread_infos.len()); + thread_infos.push(MinidumpThreadInfo { raw }); + } + + Ok(MinidumpThreadInfoList { + thread_infos, + thread_ids, + }) + } +} + +impl<'a> MinidumpStream<'a> for MinidumpSystemInfo { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::SystemInfoStream as u32; + + fn read( + bytes: &[u8], + all: &[u8], + endian: scroll::Endian, + system_info: Option<&MinidumpSystemInfo>, + ) -> Result { + if let Some(system_info) = system_info { + return Ok(system_info.clone()); + } + + use std::fmt::Write; + + let raw: md::MINIDUMP_SYSTEM_INFO = bytes + .pread_with(0, endian) + .or(Err(Error::StreamReadFailure))?; + let os = Os::from_platform_id(raw.platform_id); + let cpu = Cpu::from_processor_architecture(raw.processor_architecture); + + let mut csd_offset = raw.csd_version_rva as usize; + let csd_version = read_string_utf16(&mut csd_offset, all, endian); + + // self.raw.cpu.data is actually a union which we resolve here. + let cpu_info = match cpu { + Cpu::X86 | Cpu::X86_64 => { + let mut cpu_info = String::new(); + + if let Cpu::X86 = cpu { + // The vendor's ID is an ascii string but we need to flatten out the u32's into u8's + let x86_info: md::X86CpuInfo = raw + .cpu + .data + .pread_with(0, endian) + .or(Err(Error::StreamReadFailure))?; + + cpu_info.extend( + x86_info + .vendor_id + .iter() + .flat_map(|i| IntoIterator::into_iter(i.to_le_bytes())) + .map(char::from), + ); + cpu_info.push(' '); + } + + write!( + &mut cpu_info, + "family {} model {} stepping {}", + raw.processor_level, + (raw.processor_revision >> 8) & 0xff, + raw.processor_revision & 0xff + ) + .unwrap(); + + Some(cpu_info) + } + Cpu::Arm => { + let arm_info: md::ARMCpuInfo = raw + .cpu + .data + .pread_with(0, endian) + .or(Err(Error::StreamReadFailure))?; + + // There is no good list of implementer id values, but the following + // pages provide some help: + // http://comments.gmane.org/gmane.linux.linaro.devel/6903 + // http://forum.xda-developers.com/archive/index.php/t-480226.html + let vendors = [ + (0x41, "ARM"), + (0x51, "Qualcomm"), + (0x56, "Marvell"), + (0x69, "Intel/Marvell"), + ]; + let parts = [ + (0x4100c050, "Cortex-A5"), + (0x4100c080, "Cortex-A8"), + (0x4100c090, "Cortex-A9"), + (0x4100c0f0, "Cortex-A15"), + (0x4100c140, "Cortex-R4"), + (0x4100c150, "Cortex-R5"), + (0x4100b360, "ARM1136"), + (0x4100b560, "ARM1156"), + (0x4100b760, "ARM1176"), + (0x4100b020, "ARM11-MPCore"), + (0x41009260, "ARM926"), + (0x41009460, "ARM946"), + (0x41009660, "ARM966"), + (0x510006f0, "Krait"), + (0x510000f0, "Scorpion"), + ]; + let features = [ + (md::ArmElfHwCaps::HWCAP_SWP, "swp"), + (md::ArmElfHwCaps::HWCAP_HALF, "half"), + (md::ArmElfHwCaps::HWCAP_THUMB, "thumb"), + (md::ArmElfHwCaps::HWCAP_26BIT, "26bit"), + (md::ArmElfHwCaps::HWCAP_FAST_MULT, "fastmult"), + (md::ArmElfHwCaps::HWCAP_FPA, "fpa"), + (md::ArmElfHwCaps::HWCAP_VFP, "vfpv2"), + (md::ArmElfHwCaps::HWCAP_EDSP, "edsp"), + (md::ArmElfHwCaps::HWCAP_JAVA, "java"), + (md::ArmElfHwCaps::HWCAP_IWMMXT, "iwmmxt"), + (md::ArmElfHwCaps::HWCAP_CRUNCH, "crunch"), + (md::ArmElfHwCaps::HWCAP_THUMBEE, "thumbee"), + (md::ArmElfHwCaps::HWCAP_NEON, "neon"), + (md::ArmElfHwCaps::HWCAP_VFPv3, "vfpv3"), + (md::ArmElfHwCaps::HWCAP_VFPv3D16, "vfpv3d16"), + (md::ArmElfHwCaps::HWCAP_TLS, "tls"), + (md::ArmElfHwCaps::HWCAP_VFPv4, "vfpv4"), + (md::ArmElfHwCaps::HWCAP_IDIVA, "idiva"), + (md::ArmElfHwCaps::HWCAP_IDIVT, "idivt"), + ]; + + let mut cpu_info = format!("ARMv{}", raw.processor_level); + + // Try to extract out known vendor/part names from the cpuid, + // falling back to just reporting the raw value. + let cpuid = arm_info.cpuid; + if cpuid != 0 { + let vendor_id = (cpuid >> 24) & 0xff; + let part_id = cpuid & 0xff00fff0; + + if let Some(&(_, vendor)) = vendors.iter().find(|&&(id, _)| id == vendor_id) { + write!(&mut cpu_info, " {vendor}").unwrap(); + } else { + write!(&mut cpu_info, " vendor({vendor_id:#x})").unwrap(); + } + + if let Some(&(_, part)) = parts.iter().find(|&&(id, _)| id == part_id) { + write!(&mut cpu_info, " {part}").unwrap(); + } else { + write!(&mut cpu_info, " part({part_id:#x})").unwrap(); + } + } + + // Report all the known hardware features. + let elf_hwcaps = md::ArmElfHwCaps::from_bits_truncate(arm_info.elf_hwcaps); + if !elf_hwcaps.is_empty() { + cpu_info.push_str(" features: "); + + // Iterator::intersperse is still unstable, so do it manually + let mut comma = ""; + for &(_, feature) in features + .iter() + .filter(|&&(feature, _)| elf_hwcaps.contains(feature)) + { + cpu_info.push_str(comma); + cpu_info.push_str(feature); + comma = ","; + } + } + + Some(cpu_info) + } + _ => None, + }; + + Ok(MinidumpSystemInfo { + raw, + os, + cpu, + csd_version, + cpu_info, + }) + } +} + +impl MinidumpSystemInfo { + /// Write a human-readable description of this `MinidumpSystemInfo` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MINIDUMP_SYSTEM_INFO + processor_architecture = {:#x} + processor_level = {} + processor_revision = {:#x} + number_of_processors = {} + product_type = {} + major_version = {} + minor_version = {} + build_number = {} + platform_id = {:#x} + csd_version_rva = {:#x} + suite_mask = {:#x} + (version) = {}.{}.{} {} + (cpu_info) = {} + +", + self.raw.processor_architecture, + self.raw.processor_level, + self.raw.processor_revision, + self.raw.number_of_processors, + self.raw.product_type, + self.raw.major_version, + self.raw.minor_version, + self.raw.build_number, + self.raw.platform_id, + self.raw.csd_version_rva, + self.raw.suite_mask, + self.raw.major_version, + self.raw.minor_version, + self.raw.build_number, + self.csd_version().as_deref().unwrap_or(""), + self.cpu_info().as_deref().unwrap_or(""), + )?; + // TODO: cpu info etc + Ok(()) + } + + /// If the minidump was generated on: + /// - Windows: Returns the the name of the Service Pack. + /// - macOS: Returns the product build number. + /// - Linux: Returns the contents of `uname -srvmo`. + pub fn csd_version(&self) -> Option> { + self.csd_version.as_deref().map(Cow::Borrowed) + } + + /// Returns a string describing the cpu's vendor and model. + pub fn cpu_info(&self) -> Option> { + self.cpu_info.as_deref().map(Cow::Borrowed) + } + + /// Strings identifying the version and build number of the operating + /// system. Returns a tuple in the format of (version, build number). This + /// may be useful to use if the minidump was created on a Linux machine and + /// is an producing empty-ish version number (0.0.0). + /// + /// Tries to parse the version number from the build if it cannot be found + /// in the version string. If the stream already contains a valid version + /// number or parsing from the build string fails, this will return what's + /// directly stored in the stream. + pub fn os_parts(&self) -> (String, Option) { + let os_version = format!( + "{}.{}.{}", + self.raw.major_version, self.raw.minor_version, self.raw.build_number + ); + + let os_build = self + .csd_version() + .map(|v| v.trim().to_owned()) + .filter(|v| !v.is_empty()); + + if md::PlatformId::from_u32(self.raw.platform_id) != Some(md::PlatformId::Linux) + || os_version != "0.0.0" + { + return (os_version, os_build); + } + + // Try to parse the Linux build string. Breakpad and Crashpad run + // `uname -srvmo` to generate it. The string follows this structure: + // "Linux [version] [build...] [arch] Linux/GNU" where the Linux/GNU + // bit may not always be present. + let raw_build = self.csd_version().unwrap_or(Cow::Borrowed("")); + let mut parts = raw_build.split(' '); + let version = parts.nth(1).unwrap_or("0.0.0"); + let _arch_or_os = parts.next_back().unwrap_or_default(); + if _arch_or_os == "Linux/GNU" { + let _arch = parts.next_back(); + } + let build = parts.collect::>().join(" "); + + if version == "0.0.0" { + (os_version, os_build) + } else { + (version.into(), Some(build)) + } + } +} + +// Generates an accessor for a MISC_INFO field with two possible syntaxes: +// +// * VERSION_NUMBER: FIELD_NAME -> FIELD_TYPE +// * VERSION_NUMBER: FIELD_NAME if FLAG -> FIELD_TYPE +// +// With the following definitions: +// +// * VERSION_NUMBER: The MISC_INFO version this field was introduced in +// * FIELD_NAME: The name of the field to read +// * FLAG: A MiscInfoFlag that defines if this field contains valid data +// * FIELD_TYPE: The type of the field +macro_rules! misc_accessors { + () => {}; + (@defnoflag $name:ident $t:ty [$($variant:ident)+]) => { + #[allow(unreachable_patterns)] + pub fn $name(&self) -> Option<&$t> { + match self { + $( + RawMiscInfo::$variant(ref raw) => Some(&raw.$name), + )+ + _ => None, + } + } + }; + (@def $name:ident $flag:ident $t:ty [$($variant:ident)+]) => { + #[allow(unreachable_patterns)] + pub fn $name(&self) -> Option<&$t> { + match self { + $( + RawMiscInfo::$variant(ref raw) => if md::MiscInfoFlags::from_bits_truncate(raw.flags1).contains(md::MiscInfoFlags::$flag) { Some(&raw.$name) } else { None }, + )+ + _ => None, + } + } + }; + (1: $name:ident -> $t:ty, $($rest:tt)*) => { + misc_accessors!(@defnoflag $name $t [MiscInfo MiscInfo2 MiscInfo3 MiscInfo4 MiscInfo5]); + misc_accessors!($($rest)*); + }; + (1: $name:ident if $flag:ident -> $t:ty, $($rest:tt)*) => { + misc_accessors!(@def $name $flag $t [MiscInfo MiscInfo2 MiscInfo3 MiscInfo4 MiscInfo5]); + misc_accessors!($($rest)*); + }; + + (2: $name:ident -> $t:ty, $($rest:tt)*) => { + misc_accessors!(@defnoflag $name $t [MiscInfo2 MiscInfo3 MiscInfo4 MiscInfo5]); + misc_accessors!($($rest)*); + }; + (2: $name:ident if $flag:ident -> $t:ty, $($rest:tt)*) => { + misc_accessors!(@def $name $flag $t [MiscInfo2 MiscInfo3 MiscInfo4 MiscInfo5]); + misc_accessors!($($rest)*); + }; + + (3: $name:ident -> $t:ty, $($rest:tt)*) => { + misc_accessors!(@defnoflag $name $t [MiscInfo3 MiscInfo4 MiscInfo5]); + misc_accessors!($($rest)*); + }; + (3: $name:ident if $flag:ident -> $t:ty, $($rest:tt)*) => { + misc_accessors!(@def $name $flag $t [MiscInfo3 MiscInfo4 MiscInfo5]); + misc_accessors!($($rest)*); + }; + + (4: $name:ident -> $t:ty, $($rest:tt)*) => { + misc_accessors!(@defnoflag $name $t [MiscInfo4 MiscInfo5]); + misc_accessors!($($rest)*); + }; + (4: $name:ident if $flag:ident -> $t:ty, $($rest:tt)*) => { + misc_accessors!(@def $name $flag $t [MiscInfo4 MiscInfo5]); + misc_accessors!($($rest)*); + }; + + (5: $name:ident -> $t:ty, $($rest:tt)*) => { + misc_accessors!(@defnoflag $name $t [MiscInfo5]); + misc_accessors!($($rest)*); + }; + (5: $name:ident if $flag:ident -> $t:ty, $($rest:tt)*) => { + misc_accessors!(@def $name $flag $t [MiscInfo5]); + misc_accessors!($($rest)*); + }; +} + +impl RawMiscInfo { + // Fields are grouped by the flag that guards them. + misc_accessors!( + 1: size_of_info -> u32, + 1: flags1 -> u32, + + 1: process_id if MINIDUMP_MISC1_PROCESS_ID -> u32, + + 1: process_create_time if MINIDUMP_MISC1_PROCESS_TIMES -> u32, + 1: process_user_time if MINIDUMP_MISC1_PROCESS_TIMES -> u32, + 1: process_kernel_time if MINIDUMP_MISC1_PROCESS_TIMES -> u32, + + 2: processor_max_mhz if MINIDUMP_MISC1_PROCESSOR_POWER_INFO -> u32, + 2: processor_current_mhz if MINIDUMP_MISC1_PROCESSOR_POWER_INFO -> u32, + 2: processor_mhz_limit if MINIDUMP_MISC1_PROCESSOR_POWER_INFO -> u32, + 2: processor_max_idle_state if MINIDUMP_MISC1_PROCESSOR_POWER_INFO -> u32, + 2: processor_current_idle_state if MINIDUMP_MISC1_PROCESSOR_POWER_INFO -> u32, + + 3: process_integrity_level if MINIDUMP_MISC3_PROCESS_INTEGRITY -> u32, + + 3: process_execute_flags if MINIDUMP_MISC3_PROCESS_EXECUTE_FLAGS -> u32, + + 3: protected_process if MINIDUMP_MISC3_PROTECTED_PROCESS -> u32, + + 3: time_zone_id if MINIDUMP_MISC3_TIMEZONE -> u32, + 3: time_zone if MINIDUMP_MISC3_TIMEZONE -> md::TIME_ZONE_INFORMATION, + + 4: build_string if MINIDUMP_MISC4_BUILDSTRING -> [u16; 260], + 4: dbg_bld_str if MINIDUMP_MISC4_BUILDSTRING -> [u16; 40], + + 5: xstate_data -> md::XSTATE_CONFIG_FEATURE_MSC_INFO, + + 5: process_cookie if MINIDUMP_MISC5_PROCESS_COOKIE -> u32, + ); +} + +impl<'a> MinidumpStream<'a> for MinidumpMiscInfo { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::MiscInfoStream as u32; + + fn read( + bytes: &[u8], + _all: &[u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result { + // The misc info has gone through several revisions, so try to read the largest known + // struct possible. + macro_rules! do_read { + ($(($t:ty, $variant:ident),)+) => { + $( + if bytes.len() >= <$t>::size_with(&endian) { + return Ok(MinidumpMiscInfo { + raw: RawMiscInfo::$variant(bytes.pread_with(0, endian).or(Err(Error::StreamReadFailure))?), + }); + } + )+ + } + } + + do_read!( + (md::MINIDUMP_MISC_INFO_5, MiscInfo5), + (md::MINIDUMP_MISC_INFO_4, MiscInfo4), + (md::MINIDUMP_MISC_INFO_3, MiscInfo3), + (md::MINIDUMP_MISC_INFO_2, MiscInfo2), + (md::MINIDUMP_MISC_INFO, MiscInfo), + ); + Err(Error::StreamReadFailure) + } +} + +// Generates an accessor for a MAC_CRASH_INFO field with two possible syntaxes: +// +// * VERSION_NUMBER: FIELD_NAME -> FIELD_TYPE +// * VERSION_NUMBER: string FIELD_NAME -> FIELD_TYPE +// +// With the following definitions: +// +// * VERSION_NUMBER: The MAC_CRASH_INFO version this field was introduced in +// * FIELD_NAME: The name of the field to read +// * FIELD_TYPE: The type of the field +// +// The "string" mode will retrieve the field from the variant's _RECORD_STRINGS +// struct, while the other mode will retrieve it from the variant's _RECORD +// struct. +// +// In both cases, None will be yielded if the value is null/empty. +macro_rules! mac_crash_accessors { + () => {}; + (@deffixed $name:ident $t:ty [$($variant:ident)+]) => { + #[allow(unreachable_patterns)] + pub fn $name(&self) -> Option<&$t> { + match self { + $( + RawMacCrashInfo::$variant(ref fixed, _) => { + if fixed.$name == 0 { + None + } else { + Some(&fixed.$name) + } + }, + )+ + _ => None, + } + } + }; + (@defstrings $name:ident $t:ty [$($variant:ident)+]) => { + #[allow(unreachable_patterns)] + pub fn $name(&self) -> Option<&$t> { + match self { + $( + RawMacCrashInfo::$variant(_, ref strings) => { + if strings.$name.is_empty() { + None + } else { + Some(&*strings.$name) + } + } + )+ + _ => None, + } + } + }; + (1: $name:ident -> $t:ty, $($rest:tt)*) => { + mac_crash_accessors!(@deffixed $name $t [V1 V4 V5]); + mac_crash_accessors!($($rest)*); + }; + (1: string $name:ident -> $t:ty, $($rest:tt)*) => { + mac_crash_accessors!(@defstrings $name $t [V1 V4 V5]); + mac_crash_accessors!($($rest)*); + }; + (4: $name:ident -> $t:ty, $($rest:tt)*) => { + mac_crash_accessors!(@deffixed $name $t [V4 V5]); + mac_crash_accessors!($($rest)*); + }; + (4: string $name:ident -> $t:ty, $($rest:tt)*) => { + mac_crash_accessors!(@defstrings $name $t [V4 V5]); + mac_crash_accessors!($($rest)*); + }; + (5: $name:ident -> $t:ty, $($rest:tt)*) => { + mac_crash_accessors!(@deffixed $name $t [V5]); + mac_crash_accessors!($($rest)*); + }; + (5: string $name:ident -> $t:ty, $($rest:tt)*) => { + mac_crash_accessors!(@defstrings $name $t [V5]); + mac_crash_accessors!($($rest)*); + }; +} + +impl RawMacCrashInfo { + // Fields are grouped by the flag that guards them. + mac_crash_accessors!( + 1: version -> u64, + + 4: thread -> u64, + 4: dialog_mode -> u64, + + 4: string module_path -> str, + 4: string message -> str, + 4: string signature_string -> str, + 4: string backtrace -> str, + 4: string message2 -> str, + + 5: abort_cause -> u64, + ); +} + +impl<'a> MinidumpStream<'a> for MinidumpMacCrashInfo { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::MozMacosCrashInfoStream as u32; + + fn read( + bytes: &[u8], + all: &[u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result { + // Get the main header of the stream + let header: md::MINIDUMP_MAC_CRASH_INFO = bytes + .pread_with(0, endian) + .or(Err(Error::StreamReadFailure))?; + + let strings_offset = header.record_start_size as usize; + let mut prev_version = None; + let mut infos = Vec::new(); + + // We use `take` here to better handle a corrupt record_count that is larger than the + // maximum supported size. + let records = header.records.iter().take(header.record_count as usize); + + for record_location in records { + // Peek the V1 version to get the `version` field + let record_slice = location_slice(all, record_location)?; + let base: md::MINIDUMP_MAC_CRASH_INFO_RECORD = record_slice + .pread_with(0, endian) + .or(Err(Error::StreamReadFailure))?; + + // The V1 version also includes the stream type again, but that's + // not really important, so just warn about it and keep going. + if base.stream_type != header.stream_type as u64 { + warn!( + "MozMacosCrashInfoStream records don't have the right stream type? {}", + base.stream_type + ); + } + + // Make sure every record has the same version, because they have to + // share their strings_offset which make heterogeneous records impossible. + if let Some(prev_version) = prev_version { + if prev_version != base.version { + warn!( + "MozMacosCrashInfoStream had two different versions ({} != {})", + prev_version, base.version + ); + return Err(Error::VersionMismatch); + } + } + prev_version = Some(base.version); + + // Now actually read the full record and its strings for the version + macro_rules! do_read { + ($base_version:expr, $strings_offset:expr, $infos:ident, + $(($version:expr, $fixed:ty, $strings:ty, $variant:ident),)+) => {$( + if $base_version >= $version { + let offset = &mut 0; + let fixed: $fixed = record_slice + .gread_with(offset, endian) + .or(Err(Error::StreamReadFailure))?; + + // Sanity check that we haven't blown past where the strings start. + if *offset > $strings_offset { + warn!("MozMacosCrashInfoStream's record_start_size was too small! ({})", + $strings_offset); + return Err(Error::StreamReadFailure); + } + + // We could be handling a newer version of the format than we know + // how to support, so jump to where the strings start, potentially + // skipping over some unknown fields. + *offset = $strings_offset; + let num_strings = <$strings>::num_strings(); + let mut strings = <$strings>::default(); + + // Read out all the strings we know about + for i in 0..num_strings { + let string = read_cstring_utf8(offset, record_slice) + .ok_or(Error::StreamReadFailure)?; + strings.set_string(i, string); + } + // If this is a newer version, there may be some extra variable length + // data in this record, but we don't know what it is, so don't try to parse it. + + infos.push(RawMacCrashInfo::$variant(fixed, strings)); + continue; + } + )+} + } + + do_read!( + base.version, + strings_offset, + infos, + ( + 5, + md::MINIDUMP_MAC_CRASH_INFO_RECORD_5, + md::MINIDUMP_MAC_CRASH_INFO_RECORD_STRINGS_5, + V5 + ), + ( + 4, + md::MINIDUMP_MAC_CRASH_INFO_RECORD_4, + md::MINIDUMP_MAC_CRASH_INFO_RECORD_STRINGS_4, + V4 + ), + ( + 1, + md::MINIDUMP_MAC_CRASH_INFO_RECORD, + md::MINIDUMP_MAC_CRASH_INFO_RECORD_STRINGS, + V1 + ), + ); + } + Ok(MinidumpMacCrashInfo { raw: infos }) + } +} + +impl MinidumpMacCrashInfo { + /// Write a human-readable description of this `MinidumpMacCrashInfo` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + macro_rules! write_simple_field { + ($stream:ident, $field:ident, $idx:ident, $format:literal) => { + write!(f, " {:18}= ", stringify!($field))?; + match self.raw[$idx].$field() { + Some($field) => { + writeln!(f, $format, $field)?; + } + None => writeln!(f)?, + } + }; + ($stream:ident, $field:ident, $idx:ident) => { + write_simple_field!($stream, $field, $idx, "{}"); + }; + } + writeln!(f, "MINIDUMP_MAC_CRASH_INFO")?; + writeln!(f, " num_records = {}", self.raw.len())?; + + for i in 0..self.raw.len() { + writeln!(f)?; + writeln!(f, " RECORD[{i}] ")?; + write_simple_field!(f, version, i); + write_simple_field!(f, thread, i); + write_simple_field!(f, dialog_mode, i, "{:x}"); + write_simple_field!(f, module_path, i); + write_simple_field!(f, message, i); + write_simple_field!(f, signature_string, i); + write_simple_field!(f, backtrace, i); + write_simple_field!(f, message2, i); + write_simple_field!(f, abort_cause, i, "{:x}"); + } + + writeln!(f)?; + Ok(()) + } +} + +impl<'a> MinidumpStream<'a> for MinidumpMacBootargs { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::MozMacosBootargsStream as u32; + + fn read( + bytes: &[u8], + all: &[u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result { + let raw: md::MINIDUMP_MAC_BOOTARGS = bytes + .pread_with(0, endian) + .or(Err(Error::StreamReadFailure))?; + + let stream_type = raw.stream_type; + if stream_type != Self::STREAM_TYPE { + warn!( + "MozMacosBootargsStream record doesn't have the right stream type? {}", + Self::STREAM_TYPE + ); + } + let mut bootargs_offset = raw.bootargs as usize; + let bootargs = read_string_utf16(&mut bootargs_offset, all, endian); + + Ok(MinidumpMacBootargs { raw, bootargs }) + } +} + +impl MinidumpMacBootargs { + pub fn print(&self, f: &mut T) -> io::Result<()> { + writeln!( + f, + "mac_boot_args = {}", + self.bootargs.as_deref().unwrap_or("") + )?; + writeln!(f)?; + Ok(()) + } +} + +impl<'a> MinidumpStream<'a> for MinidumpLinuxLsbRelease<'a> { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::LinuxLsbRelease as u32; + + fn read( + bytes: &'a [u8], + _all: &'a [u8], + _endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result, Error> { + Ok(Self { data: bytes }) + } +} + +impl<'a> MinidumpStream<'a> for MinidumpLinuxEnviron<'a> { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::LinuxEnviron as u32; + + fn read( + bytes: &'a [u8], + _all: &'a [u8], + _endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result, Error> { + Ok(Self { data: bytes }) + } +} + +impl<'a> MinidumpStream<'a> for MinidumpLinuxProcStatus<'a> { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::LinuxProcStatus as u32; + + fn read( + bytes: &'a [u8], + _all: &'a [u8], + _endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result, Error> { + Ok(Self { data: bytes }) + } +} + +impl<'a> MinidumpStream<'a> for MinidumpLinuxProcLimits<'a> { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::MozLinuxLimits as u32; + + fn read( + bytes: &'a [u8], + _all: &'a [u8], + _endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result, Error> { + Ok(Self { data: bytes }) + } +} + +impl<'a> MinidumpStream<'a> for MinidumpLinuxCpuInfo<'a> { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::LinuxCpuInfo as u32; + + fn read( + bytes: &'a [u8], + _all: &'a [u8], + _endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result, Error> { + Ok(Self { data: bytes }) + } +} + +impl<'a> MinidumpLinuxCpuInfo<'a> { + /// Get an iterator over the key-value pairs stored in the `/proc/cpuinfo` dump. + /// + /// Keys and values are `trim`ed of leading/trailing spaces, and if a key + /// or value was surrounded by quotes ("like this"), the quotes will be + /// stripped. + pub fn iter(&self) -> impl Iterator { + linux_list_iter(self.data, b':') + } + + /// Get the raw bytes of the `/proc/cpuinfo` dump. + pub fn raw_bytes(&self) -> Cow<'a, [u8]> { + Cow::Borrowed(self.data) + } +} + +impl<'a> MinidumpLinuxEnviron<'a> { + /// Get an iterator over the key-value pairs stored in the `/proc/self/environ` dump. + /// + /// Keys and values are `trim`ed of leading/trailing spaces, and if a key + /// or value was surrounded by quotes ("like this"), the quotes will be + /// stripped. + pub fn iter(&self) -> impl Iterator { + linux_list_iter(self.data, b'=') + } + + /// Get the raw bytes of the `/proc/self/environ` dump. + pub fn raw_bytes(&self) -> Cow<'a, [u8]> { + Cow::Borrowed(self.data) + } +} + +impl<'a> MinidumpLinuxProcStatus<'a> { + /// Get an iterator over the key-value pairs stored in the `/proc/self/status` dump. + /// + /// Keys and values are `trim`ed of leading/trailing spaces, and if a key + /// or value was surrounded by quotes ("like this"), the quotes will be + /// stripped. + pub fn iter(&self) -> impl Iterator { + linux_list_iter(self.data, b':') + } + + /// Get the raw bytes of the `/proc/self/status` dump. + pub fn raw_bytes(&self) -> Cow<'a, [u8]> { + Cow::Borrowed(self.data) + } +} + +impl<'a> MinidumpLinuxProcLimits<'a> { + /// Get an iterator over the key-value pairs stored in the `/proc/self/limits` dump. + /// + /// Keys and values are `trim`ed of leading/trailing spaces, and if a key + /// or value was surrounded by quotes ("like this"), the quotes will be + /// stripped. + pub fn iter(&self) -> impl Iterator { + LinuxOsStr::from_bytes(self.data).lines() + } + + /// Get the raw bytes of the `/proc/self/limits` dump. + pub fn raw_bytes(&self) -> Cow<'a, [u8]> { + Cow::Borrowed(self.data) + } +} + +impl<'a> MinidumpLinuxLsbRelease<'a> { + /// Get an iterator over the key-value pairs stored in the `/etc/lsb-release` dump. + /// + /// Keys and values are `trim`ed of leading/trailing spaces, and if a key + /// or value was surrounded by quotes ("like this"), the quotes will be + /// stripped. + pub fn iter(&self) -> impl Iterator { + linux_list_iter(self.data, b'=') + } + + /// Get the raw bytes of the `/etc/lsb-release` dump. + pub fn raw_bytes(&self) -> Cow<'a, [u8]> { + Cow::Borrowed(self.data) + } +} + +fn systemtime_from_timestamp(timestamp: u64) -> Option { + SystemTime::UNIX_EPOCH.checked_add(Duration::from_secs(timestamp)) +} + +impl MinidumpMiscInfo { + pub fn process_create_time(&self) -> Option { + self.raw + .process_create_time() + .and_then(|t| systemtime_from_timestamp(*t as u64)) + } + + /// Write a human-readable description of this `MinidumpMiscInfo` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + macro_rules! write_simple_field { + ($stream:ident, $field:ident, $format:literal) => { + write!(f, " {:29}= ", stringify!($field))?; + match self.raw.$field() { + Some($field) => { + writeln!(f, $format, $field)?; + } + None => writeln!(f, "(invalid)")?, + } + }; + ($stream:ident, $field:ident) => { + write_simple_field!($stream, $field, "{}"); + }; + } + writeln!(f, "MINIDUMP_MISC_INFO")?; + + write_simple_field!(f, size_of_info); + write_simple_field!(f, flags1, "{:x}"); + write_simple_field!(f, process_id); + write!(f, " process_create_time = ")?; + match self.raw.process_create_time() { + Some(&process_create_time) => { + writeln!( + f, + "{:#x} {}", + process_create_time, + format_time_t(process_create_time), + )?; + } + None => writeln!(f, "(invalid)")?, + } + write_simple_field!(f, process_user_time); + write_simple_field!(f, process_kernel_time); + + write_simple_field!(f, processor_max_mhz); + write_simple_field!(f, processor_current_mhz); + write_simple_field!(f, processor_mhz_limit); + write_simple_field!(f, processor_max_idle_state); + write_simple_field!(f, processor_current_idle_state); + + write_simple_field!(f, process_integrity_level); + write_simple_field!(f, process_execute_flags, "{:x}"); + write_simple_field!(f, protected_process); + write_simple_field!(f, time_zone_id); + + write!(f, " time_zone = ")?; + match self.raw.time_zone() { + Some(time_zone) => { + writeln!(f)?; + writeln!(f, " bias = {}", time_zone.bias)?; + writeln!( + f, + " standard_name = {}", + utf16_to_string(&time_zone.standard_name[..]) + .unwrap_or_else(|| String::from("(invalid)")) + )?; + writeln!( + f, + " standard_date = {}", + format_system_time(&time_zone.standard_date) + )?; + writeln!(f, " standard_bias = {}", time_zone.standard_bias)?; + writeln!( + f, + " daylight_name = {}", + utf16_to_string(&time_zone.daylight_name[..]) + .unwrap_or_else(|| String::from("(invalid)")) + )?; + writeln!( + f, + " daylight_date = {}", + format_system_time(&time_zone.daylight_date) + )?; + writeln!(f, " daylight_bias = {}", time_zone.daylight_bias)?; + } + None => writeln!(f, "(invalid)")?, + } + + write!(f, " build_string = ")?; + match self + .raw + .build_string() + .and_then(|string| utf16_to_string(&string[..])) + { + Some(build_string) => writeln!(f, "{build_string}")?, + None => writeln!(f, "(invalid)")?, + } + write!(f, " dbg_bld_str = ")?; + match self + .raw + .dbg_bld_str() + .and_then(|string| utf16_to_string(&string[..])) + { + Some(dbg_bld_str) => writeln!(f, "{dbg_bld_str}")?, + None => writeln!(f, "(invalid)")?, + } + + write!(f, " xstate_data = ")?; + match self.raw.xstate_data() { + Some(xstate_data) => { + writeln!(f)?; + for (i, feature) in xstate_data.iter() { + if let Some(feature) = md::XstateFeatureIndex::from_index(i) { + let feature_name = format!("{feature:?}"); + write!(f, " feature {i:2} - {feature_name:22}: ")?; + } else { + write!(f, " feature {i:2} - (unknown) : ")?; + } + writeln!(f, " offset {:4}, size {:4}", feature.offset, feature.size)?; + } + } + None => writeln!(f, "(invalid)")?, + } + + write_simple_field!(f, process_cookie); + writeln!(f)?; + Ok(()) + } +} + +impl<'a> MinidumpStream<'a> for MinidumpBreakpadInfo { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::BreakpadInfoStream as u32; + + fn read( + bytes: &[u8], + _all: &[u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result { + let raw: md::MINIDUMP_BREAKPAD_INFO = bytes + .pread_with(0, endian) + .or(Err(Error::StreamReadFailure))?; + let flags = md::BreakpadInfoValid::from_bits_truncate(raw.validity); + let dump_thread_id = if flags.contains(md::BreakpadInfoValid::DumpThreadId) { + Some(raw.dump_thread_id) + } else { + None + }; + let requesting_thread_id = if flags.contains(md::BreakpadInfoValid::RequestingThreadId) { + Some(raw.requesting_thread_id) + } else { + None + }; + Ok(MinidumpBreakpadInfo { + raw, + dump_thread_id, + requesting_thread_id, + }) + } +} + +fn option_or_invalid(what: &Option) -> Cow<'_, str> { + match *what { + Some(ref val) => Cow::Owned(format!("{val:#x}")), + None => Cow::Borrowed("(invalid)"), + } +} + +impl MinidumpBreakpadInfo { + /// Write a human-readable description of this `MinidumpBreakpadInfo` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MINIDUMP_BREAKPAD_INFO + validity = {:#x} + dump_thread_id = {} + requesting_thread_id = {} + +", + self.raw.validity, + option_or_invalid(&self.dump_thread_id), + option_or_invalid(&self.requesting_thread_id), + )?; + Ok(()) + } +} + +impl CrashReason { + /// Get a `CrashReason` from a `MINIDUMP_EXCEPTION_STREAM` for a given `Os`. + fn from_exception(raw: &md::MINIDUMP_EXCEPTION_STREAM, os: Os, cpu: Cpu) -> CrashReason { + let record = &raw.exception_record; + let exception_code = record.exception_code; + let exception_flags = record.exception_flags; + + let reason = match os { + Os::MacOs | Os::Ios => Self::from_mac_exception(raw, cpu), + Os::Linux | Os::Android => Self::from_linux_exception(raw, cpu), + Os::Windows => Self::from_windows_exception(raw, cpu), + _ => None, + }; + + // Default to a totally generic unknown error + reason.unwrap_or(CrashReason::Unknown(exception_code, exception_flags)) + } + + /// Heuristically identifies what kind of windows exception code this is. + /// + /// Augments [`CrashReason::from_windows_error`] by also including + /// `ExceptionCodeWindows`. Appropriate for an actual crash reason. + pub fn from_windows_code(exception_code: u32) -> CrashReason { + if let Some(err) = err::ExceptionCodeWindows::from_u32(exception_code) { + Self::WindowsGeneral(err) + } else { + Self::from_windows_error(exception_code) + } + } + + /// Heuristically identifies what kind of windows error code this is. + /// + /// Appropriate for things like LastErrorValue() which may be non-fatal. + pub fn from_windows_error(error_code: u32) -> CrashReason { + if let Some(err) = err::WinErrorWindows::from_u32(error_code) { + Self::WindowsWinError(err) + } else if let Some(err) = err::NtStatusWindows::from_u32(error_code) { + Self::WindowsNtStatus(err) + } else if let Some(err) = Self::from_windows_error_with_facility(error_code) { + err + } else { + Self::WindowsUnknown(error_code) + } + } + + pub fn from_windows_error_with_facility(error_code: u32) -> Option { + static SEVERITY_MASK: u32 = 0xf0000000; + static FACILITY_MASK: u32 = 0x0fff0000; + static ERROR_MASK: u32 = 0x0000ffff; + + if (error_code & SEVERITY_MASK) != 0 { + // This could be an NTSTATUS or HRESULT code of a specific facility + if let Some(facility) = + err::WinErrorFacilityWindows::from_u32((error_code & FACILITY_MASK) >> 16) + { + if let Some(error) = err::WinErrorWindows::from_u32(error_code & ERROR_MASK) { + return Some(Self::WindowsWinErrorWithFacility(facility, error)); + } + } + } + + None + } + + pub fn from_windows_exception( + raw: &md::MINIDUMP_EXCEPTION_STREAM, + _cpu: Cpu, + ) -> Option { + use err::ExceptionCodeWindows; + + let record = &raw.exception_record; + let info = &record.exception_information; + let exception_code = record.exception_code; + + let mut reason = CrashReason::from_windows_code(exception_code); + + // Refine the output for error codes that have more info + match reason { + CrashReason::WindowsGeneral(ExceptionCodeWindows::EXCEPTION_ACCESS_VIOLATION) => { + // For EXCEPTION_ACCESS_VIOLATION, Windows puts the address that + // caused the fault in exception_information[1]. + // exception_information[0] is 0 if the violation was caused by + // an attempt to read data, 1 if it was an attempt to write data, + // and 8 if this was a data execution violation. + // This information is useful in addition to the code address, which + // will be present in the crash thread's instruction field anyway. + if record.number_parameters >= 1 { + // NOTE: address := info[1]; + if let Some(ty) = err::ExceptionCodeWindowsAccessType::from_u64(info[0]) { + reason = CrashReason::WindowsAccessViolation(ty); + } + } + } + CrashReason::WindowsGeneral(ExceptionCodeWindows::EXCEPTION_IN_PAGE_ERROR) => { + // For EXCEPTION_IN_PAGE_ERROR, Windows puts the address that + // caused the fault in exception_information[1]. + // exception_information[0] is 0 if the violation was caused by + // an attempt to read data, 1 if it was an attempt to write data, + // and 8 if this was a data execution violation. + // exception_information[2] contains the underlying NTSTATUS code, + // which is the explanation for why this error occured. + // This information is useful in addition to the code address, which + // will be present in the crash thread's instruction field anyway. + if record.number_parameters >= 3 { + // NOTE: address := info[1]; + // The status code is 32-bits wide, ignore the upper 32 bits + let nt_status = info[2] & 0xffff_ffff; + if let Some(ty) = err::ExceptionCodeWindowsInPageErrorType::from_u64(info[0]) { + reason = CrashReason::WindowsInPageError(ty, nt_status); + } + } + } + CrashReason::WindowsNtStatus(err::NtStatusWindows::STATUS_STACK_BUFFER_OVERRUN) => { + // STATUS_STACK_BUFFER_OVERRUN are caused by __fastfail() + // invocations and the fast-fail code is stored in + // exception_information[0]. + if record.number_parameters >= 1 { + // The status code is 32-bits wide, ignore the upper 32 bits + let fast_fail = info[0] & 0xffff_ffff; + reason = CrashReason::WindowsStackBufferOverrun(fast_fail); + } + } + _ => { + // Do nothing interesting + } + } + + Some(reason) + } + + pub fn from_mac_exception( + raw: &md::MINIDUMP_EXCEPTION_STREAM, + cpu: Cpu, + ) -> Option { + use err::ExceptionCodeMac; + + let record = &raw.exception_record; + let info = &record.exception_information; + let exception_code = record.exception_code; + let exception_flags = record.exception_flags; + + // Default to just directly reporting this reason. + let mac_reason = err::ExceptionCodeMac::from_u32(exception_code)?; + let mut reason = CrashReason::MacGeneral(mac_reason, exception_flags); + + // Refine the output for error codes that have more info + match mac_reason { + ExceptionCodeMac::EXC_BAD_ACCESS => { + if let Some(ty) = err::ExceptionCodeMacBadAccessKernType::from_u32(exception_flags) + { + reason = CrashReason::MacBadAccessKern(ty); + } else { + match cpu { + Cpu::Arm64 => { + if let Some(ty) = + err::ExceptionCodeMacBadAccessArmType::from_u32(exception_flags) + { + reason = CrashReason::MacBadAccessArm(ty); + } + } + Cpu::Ppc => { + if let Some(ty) = + err::ExceptionCodeMacBadAccessPpcType::from_u32(exception_flags) + { + reason = CrashReason::MacBadAccessPpc(ty); + } + } + Cpu::X86 | Cpu::X86_64 => { + if let Some(ty) = + err::ExceptionCodeMacBadAccessX86Type::from_u32(exception_flags) + { + reason = CrashReason::MacBadAccessX86(ty); + } + } + _ => { + // Do nothing + } + } + } + } + ExceptionCodeMac::EXC_BAD_INSTRUCTION => match cpu { + Cpu::Arm64 => { + if let Some(ty) = + err::ExceptionCodeMacBadInstructionArmType::from_u32(exception_flags) + { + reason = CrashReason::MacBadInstructionArm(ty); + } + } + Cpu::Ppc => { + if let Some(ty) = + err::ExceptionCodeMacBadInstructionPpcType::from_u32(exception_flags) + { + reason = CrashReason::MacBadInstructionPpc(ty); + } + } + Cpu::X86 | Cpu::X86_64 => { + if let Some(ty) = + err::ExceptionCodeMacBadInstructionX86Type::from_u32(exception_flags) + { + reason = CrashReason::MacBadInstructionX86(ty); + } + } + _ => { + // Do nothing + } + }, + ExceptionCodeMac::EXC_ARITHMETIC => match cpu { + Cpu::Arm64 => { + if let Some(ty) = + err::ExceptionCodeMacArithmeticArmType::from_u32(exception_flags) + { + reason = CrashReason::MacArithmeticArm(ty); + } + } + Cpu::Ppc => { + if let Some(ty) = + err::ExceptionCodeMacArithmeticPpcType::from_u32(exception_flags) + { + reason = CrashReason::MacArithmeticPpc(ty); + } + } + Cpu::X86 | Cpu::X86_64 => { + if let Some(ty) = + err::ExceptionCodeMacArithmeticX86Type::from_u32(exception_flags) + { + reason = CrashReason::MacArithmeticX86(ty); + } + } + _ => { + // Do nothing + } + }, + ExceptionCodeMac::EXC_SOFTWARE => { + if let Some(ty) = err::ExceptionCodeMacSoftwareType::from_u32(exception_flags) { + reason = CrashReason::MacSoftware(ty); + } + } + ExceptionCodeMac::EXC_BREAKPOINT => match cpu { + Cpu::Arm64 => { + if let Some(ty) = + err::ExceptionCodeMacBreakpointArmType::from_u32(exception_flags) + { + reason = CrashReason::MacBreakpointArm(ty); + } + } + Cpu::Ppc => { + if let Some(ty) = + err::ExceptionCodeMacBreakpointPpcType::from_u32(exception_flags) + { + reason = CrashReason::MacBreakpointPpc(ty); + } + } + Cpu::X86 | Cpu::X86_64 => { + if let Some(ty) = + err::ExceptionCodeMacBreakpointX86Type::from_u32(exception_flags) + { + reason = CrashReason::MacBreakpointX86(ty); + } + } + _ => { + // Do nothing + } + }, + ExceptionCodeMac::EXC_RESOURCE => { + if let Some(ty) = + err::ExceptionCodeMacResourceType::from_u32((exception_flags >> 29) & 0x7) + { + reason = CrashReason::MacResource(ty, info[1], info[2]); + } + } + ExceptionCodeMac::EXC_GUARD => { + if let Some(ty) = + err::ExceptionCodeMacGuardType::from_u32((exception_flags >> 29) & 0x7) + { + reason = CrashReason::MacGuard(ty, info[1], info[2]); + } + } + _ => { + // Do nothing + } + } + Some(reason) + } + + pub fn from_linux_exception( + raw: &md::MINIDUMP_EXCEPTION_STREAM, + _cpu: Cpu, + ) -> Option { + let record = &raw.exception_record; + let exception_code = record.exception_code; + let exception_flags = record.exception_flags; + + let linux_reason = err::ExceptionCodeLinux::from_u32(exception_code)?; + let mut reason = CrashReason::LinuxGeneral(linux_reason, exception_flags); + // Refine the output for error codes that have more info + match linux_reason { + err::ExceptionCodeLinux::SIGILL => { + if let Some(ty) = err::ExceptionCodeLinuxSigillKind::from_u32(exception_flags) { + reason = CrashReason::LinuxSigill(ty); + } + } + err::ExceptionCodeLinux::SIGTRAP => { + if let Some(ty) = err::ExceptionCodeLinuxSigtrapKind::from_u32(exception_flags) { + reason = CrashReason::LinuxSigtrap(ty); + } + } + err::ExceptionCodeLinux::SIGFPE => { + if let Some(ty) = err::ExceptionCodeLinuxSigfpeKind::from_u32(exception_flags) { + reason = CrashReason::LinuxSigfpe(ty); + } + } + err::ExceptionCodeLinux::SIGSEGV => { + if let Some(ty) = err::ExceptionCodeLinuxSigsegvKind::from_u32(exception_flags) { + reason = CrashReason::LinuxSigsegv(ty); + } + } + err::ExceptionCodeLinux::SIGBUS => { + if let Some(ty) = err::ExceptionCodeLinuxSigbusKind::from_u32(exception_flags) { + reason = CrashReason::LinuxSigbus(ty); + } + } + err::ExceptionCodeLinux::SIGSYS => { + if let Some(ty) = err::ExceptionCodeLinuxSigsysKind::from_u32(exception_flags) { + reason = CrashReason::LinuxSigsys(ty); + } + } + _ => { + // No refinements + } + } + Some(reason) + } +} + +impl fmt::Display for CrashReason { + /// A string describing the crash reason. + /// + /// This is OS- and possibly CPU-specific. + /// For example, "EXCEPTION_ACCESS_VIOLATION" (Windows), + /// "EXC_BAD_ACCESS / KERN_INVALID_ADDRESS" (Mac OS X), "SIGSEGV" + /// (other Unix). + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use CrashReason::*; + + fn write_nt_status(f: &mut fmt::Formatter<'_>, raw_nt_status: u64) -> fmt::Result { + let nt_status = err::NtStatusWindows::from_u64(raw_nt_status); + if let Some(nt_status) = nt_status { + write!(f, "{nt_status:?}") + } else { + write!(f, "{raw_nt_status:#010x}") + } + } + + fn write_fast_fail(f: &mut fmt::Formatter<'_>, raw_fast_fail: u64) -> fmt::Result { + let fast_fail = err::FastFailCode::from_u64(raw_fast_fail); + if let Some(fast_fail) = fast_fail { + write!(f, "{fast_fail:?}") + } else { + write!(f, "{raw_fast_fail:#010x}") + } + } + + fn write_exc_resource( + f: &mut fmt::Formatter<'_>, + ex: err::ExceptionCodeMacResourceType, + code: u64, + subcode: u64, + ) -> fmt::Result { + let flavor = (code >> 58) & 0x7; + write!(f, "EXC_RESOURCE / {ex:?} / ")?; + match ex { + err::ExceptionCodeMacResourceType::RESOURCE_TYPE_CPU => { + // See https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/osfmk/kern/exc_resource.h#L71-L99 + if let Some(cpu_flavor) = + err::ExceptionCodeMacResourceCpuFlavor::from_u64(flavor) + { + let interval = (code >> 7) & 0x1ffffff; + let cpu_limit = code & 0x7; + let cpu_consumed = subcode & 0x7; + write!( + f, + "{cpu_flavor:?} interval: {interval}s CPU limit: {cpu_limit}% CPU consumed: {cpu_consumed}%" + ) + } else { + write!(f, "{code:#018x} / {subcode:#018x}") + } + } + err::ExceptionCodeMacResourceType::RESOURCE_TYPE_WAKEUPS => { + // See https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/osfmk/kern/exc_resource.h#L105-L134 + if let Some(wakeups_flavor) = + err::ExceptionCodeMacResourceWakeupsFlavor::from_u64(flavor) + { + let interval = (code >> 20) & 0xfffff; + let wakeups_permitted = code & 0xfff; + let wakeups_observed = subcode & 0xfff; + write!( + f, + "{wakeups_flavor:?} interval: {interval}s wakeups permitted: {wakeups_permitted} wakeups observed: {wakeups_observed}" + ) + } else { + write!(f, "{code:#018x} / {subcode:#018x}") + } + } + err::ExceptionCodeMacResourceType::RESOURCE_TYPE_MEMORY => { + // See https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/osfmk/kern/exc_resource.h#L139-L162 + if let Some(memory_flavor) = + err::ExceptionCodeMacResourceMemoryFlavor::from_u64(flavor) + { + let hwm_limit = code & 0x1fff; + write!(f, "{memory_flavor:?} high watermark limit: {hwm_limit}MiB") + } else { + write!(f, "{code:#018x} / {subcode:#018x}") + } + } + err::ExceptionCodeMacResourceType::RESOURCE_TYPE_IO => { + // See https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/osfmk/kern/exc_resource.h#L168-L196 + if let Some(io_flavor) = err::ExceptionCodeMacResourceIOFlavor::from_u64(flavor) + { + let interval = (code >> 15) & 0x1ffff; + let io_limit = code & 0x7fff; + let io_observed = subcode & 0x7fff; + write!( + f, + "{io_flavor:?} interval: {interval}s I/O limit: {io_limit}% I/O observed: {io_observed}%" + ) + } else { + write!(f, "{code:#018x} / {subcode:#018x}") + } + } + err::ExceptionCodeMacResourceType::RESOURCE_TYPE_THREADS => { + // See https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/osfmk/kern/exc_resource.h#L199-L207 + if let Some(threads_flavor) = + err::ExceptionCodeMacResourceThreadsFlavor::from_u64(flavor) + { + let hwm_limit = code & 0x7fff; + write!(f, "{threads_flavor:?} high watermark limit: {hwm_limit}") + } else { + write!(f, "{code:#018x} / {subcode:#018x}") + } + } + } + } + + fn write_exc_guard( + f: &mut fmt::Formatter<'_>, + ex: err::ExceptionCodeMacGuardType, + code: u64, + subcode: u64, + ) -> fmt::Result { + let flavor = (code >> 32) & 0x1fffffff; + write!(f, "EXC_GUARD / {ex:?}")?; + match ex { + err::ExceptionCodeMacGuardType::GUARD_TYPE_NONE => { + write!(f, "") + } + err::ExceptionCodeMacGuardType::GUARD_TYPE_MACH_PORT => { + // See https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/osfmk/kern/exc_guard.h#L69-L81 + if let Some(mach_port_flavor) = + err::ExceptionCodeMacGuardMachPortFlavor::from_u64(flavor) + { + // FIXME: GUARD_EXC_STRICT_REPLY, GUARD_EXC_MOD_REFS and GUARD_EXC_IMMOVABLE have additional flags defined here: + // https://github.com/apple-oss-distributions/xnu/blob/1031c584a5e37aff177559b9f69dbd3c8c3fd30a/osfmk/mach/port.h#L518-L538 + // They also encode additional data in the subcode (a mach reply type or a kernel return value), one has to + // check Apple's code to figure out each one. Since we haven't encountered them yet in the wild there's no + // hurry to decode those. + let port_name = code & 0xfffffff; + if subcode != 0 { + write!( + f, + " / {mach_port_flavor:?} port name: {port_name} subcode: {subcode}", + ) + } else { + write!(f, " / {mach_port_flavor:?} port name: {port_name}",) + } + } else { + write!(f, " / {code:#018x} / {subcode:#018x}") + } + } + err::ExceptionCodeMacGuardType::GUARD_TYPE_FD => { + // See https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/osfmk/kern/exc_guard.h#L85-L97 + if let Some(fd_flavor) = err::ExceptionCodeMacGuardFDFlavor::from_u64(flavor) { + let fd = code & 0xfffffff; + write!( + f, + " / {fd_flavor:?} file descriptor: {fd} guard identifier: {subcode}", + ) + } else { + write!(f, " / {code:#018x} / {subcode:#018x}") + } + } + err::ExceptionCodeMacGuardType::GUARD_TYPE_USER => { + // See https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/osfmk/kern/exc_guard.h#L101-L113 + let namespace = code & 0xffffffff; + write!(f, "/ namespace: {namespace} guard identifier: {subcode}",) + } + err::ExceptionCodeMacGuardType::GUARD_TYPE_VN => { + // See https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/osfmk/kern/exc_guard.h#L117-L129 + if let Some(vn_flavor) = err::ExceptionCodeMacGuardVNFlavor::from_u64(flavor) { + let pid = code & 0xfffffff; + write!(f, " / {vn_flavor:?} pid: {pid} guard identifier: {subcode}",) + } else { + write!(f, " / {code:#018x} / {subcode:#018x}") + } + } + err::ExceptionCodeMacGuardType::GUARD_TYPE_VIRT_MEMORY => { + // See https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/osfmk/kern/exc_guard.h#L133-L145 + if let Some(virt_memory_flavor) = + err::ExceptionCodeMacGuardVirtMemoryFlavor::from_u64(flavor) + { + write!(f, " / {virt_memory_flavor:?} offset: {subcode}") + } else { + write!(f, " / {code:#018x} / {subcode:#018x}") + } + } + err::ExceptionCodeMacGuardType::GUARD_TYPE_REJECTED_SC => { + // See https://github.com/apple-oss-distributions/xnu/blob/1031c584a5e37aff177559b9f69dbd3c8c3fd30a/osfmk/kern/exc_guard.h#L149-L163 + if let Some(rejected_sc_flavor) = + err::ExceptionCodeMacGuardRejecteSysCallFlavor::from_u64(flavor) + { + let syscall = subcode; + write!(f, " / {rejected_sc_flavor:?} syscall: {syscall}",) + } else { + write!(f, " / {code:#018x} / {subcode:#018x}") + } + } + } + } + + fn write_signal( + f: &mut fmt::Formatter<'_>, + ex: err::ExceptionCodeLinux, + flags: u32, + ) -> fmt::Result { + if let Some(si_code) = err::ExceptionCodeLinuxSicode::from_i32(flags as i32) { + if si_code == err::ExceptionCodeLinuxSicode::SI_USER { + write!(f, "{ex:?}") + } else { + write!(f, "{ex:?} / {si_code:?}") + } + } else { + write!(f, "{ex:?} / {flags:#010x}") + } + } + + // OK this is kinda a gross hack but I *really* don't want + // to write out all these strings again, so let's just lean on Debug + // repeating the name of the enum variant! + match *self { + // ======================== Mac/iOS ============================ + + // These codes get special messages + MacGeneral(err::ExceptionCodeMac::SIMULATED, _) => write!(f, "Simulated Exception"), + + // Thse codes just repeat their names + MacGeneral(ex, flags) => write!(f, "{ex:?} / {flags:#010x}"), + MacBadAccessKern(ex) => write!(f, "EXC_BAD_ACCESS / {ex:?}"), + MacBadAccessArm(ex) => write!(f, "EXC_BAD_ACCESS / {ex:?}"), + MacBadAccessPpc(ex) => write!(f, "EXC_BAD_ACCESS / {ex:?}"), + MacBadAccessX86(ex) => write!(f, "EXC_BAD_ACCESS / {ex:?}"), + MacBadInstructionArm(ex) => write!(f, "EXC_BAD_INSTRUCTION / {ex:?}"), + MacBadInstructionPpc(ex) => write!(f, "EXC_BAD_INSTRUCTION / {ex:?}"), + MacBadInstructionX86(ex) => write!(f, "EXC_BAD_INSTRUCTION / {ex:?}"), + MacArithmeticArm(ex) => write!(f, "EXC_ARITHMETIC / {ex:?}"), + MacArithmeticPpc(ex) => write!(f, "EXC_ARITHMETIC / {ex:?}"), + MacArithmeticX86(ex) => write!(f, "EXC_ARITHMETIC / {ex:?}"), + MacSoftware(ex) => write!(f, "EXC_SOFTWARE / {ex:?}"), + MacBreakpointArm(ex) => write!(f, "EXC_BREAKPOINT / {ex:?}"), + MacBreakpointPpc(ex) => write!(f, "EXC_BREAKPOINT / {ex:?}"), + MacBreakpointX86(ex) => write!(f, "EXC_BREAKPOINT / {ex:?}"), + MacResource(ex, code, subcode) => write_exc_resource(f, ex, code, subcode), + MacGuard(ex, code, subcode) => write_exc_guard(f, ex, code, subcode), + + // ===================== Linux/Android ========================= + + // These codes just repeat their names + LinuxGeneral(ex, flags) => write_signal(f, ex, flags), + LinuxSigill(ex) => write!(f, "SIGILL / {ex:?}"), + LinuxSigtrap(ex) => write!(f, "SIGTRAP / {ex:?}"), + LinuxSigbus(ex) => write!(f, "SIGBUS / {ex:?}"), + LinuxSigfpe(ex) => write!(f, "SIGFPE / {ex:?}"), + LinuxSigsegv(ex) => write!(f, "SIGSEGV / {ex:?}"), + LinuxSigsys(ex) => write!(f, "SIGSYS / {ex:?}"), + + // ======================== Windows ============================= + + // These codes get special messages + WindowsGeneral(err::ExceptionCodeWindows::OUT_OF_MEMORY) => write!(f, "Out of Memory"), + WindowsGeneral(err::ExceptionCodeWindows::UNHANDLED_CPP_EXCEPTION) => { + write!(f, "Unhandled C++ Exception") + } + WindowsGeneral(err::ExceptionCodeWindows::SIMULATED) => { + write!(f, "Simulated Exception") + } + // These codes just repeat their names + WindowsGeneral(ex) => write!(f, "{ex:?}"), + WindowsWinError(winerror) => write!(f, "{winerror:?}"), + WindowsWinErrorWithFacility(facility, winerror) => { + write!(f, "{facility:?} / {winerror:?}") + } + WindowsNtStatus(nt_status) => write_nt_status(f, nt_status as _), + WindowsAccessViolation(ex) => write!(f, "EXCEPTION_ACCESS_VIOLATION_{ex:?}"), + WindowsInPageError(ex, nt_status) => { + write!(f, "EXCEPTION_IN_PAGE_ERROR_{ex:?} / ")?; + write_nt_status(f, nt_status) + } + WindowsStackBufferOverrun(fast_fail) => { + write!(f, "EXCEPTION_STACK_BUFFER_OVERRUN / ")?; + write_fast_fail(f, fast_fail) + } + WindowsUnknown(code) => write!(f, "unknown {code:#010x}"), + + Unknown(code, flags) => write!(f, "unknown {code:#010x} / {flags:#010x}"), + } + } +} + +impl<'a> MinidumpStream<'a> for MinidumpException<'a> { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::ExceptionStream as u32; + + fn read( + bytes: &'a [u8], + all: &'a [u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result { + let raw: md::MINIDUMP_EXCEPTION_STREAM = bytes + .pread_with(0, endian) + .or(Err(Error::StreamReadFailure))?; + let context = location_slice(all, &raw.thread_context).ok(); + let thread_id = raw.thread_id; + Ok(MinidumpException { + raw, + thread_id, + context, + endian, + }) + } +} + +impl<'a> MinidumpException<'a> { + /// Get the cpu context of the crashing (or otherwise minidump-requesting) thread. + /// + /// CPU contexts are a platform-specific format, so SystemInfo is required + /// to reliably parse them. We used to use heuristics to avoid this requirement, + /// but this made us too brittle to otherwise-backwards-compatible additions + /// to the format. + /// + /// MiscInfo can contain additional details on the cpu context's format, but + /// is optional because those details can be safely ignored (at the cost of + /// being unable to parse some very obscure cpu state). + pub fn context( + &self, + system_info: &MinidumpSystemInfo, + misc: Option<&MinidumpMiscInfo>, + ) -> Option> { + MinidumpContext::read(self.context?, self.endian, system_info, misc) + .ok() + .map(Cow::Owned) + } + + /// Get the address that "caused" the crash. + /// + /// The meaning of this value depends on the kind of crash this was. + /// + /// By default, it's the instruction pointer at the time of the crash. + /// However, if the crash was caused by an illegal memory access, the + /// the address would be the memory address. + /// + /// So for instance, if you crashed from dereferencing a null pointer, + /// the crash_address will be 0 (or close to it, due to offsets). + pub fn get_crash_address(&self, os: Os, cpu: Cpu) -> u64 { + let addr = match ( + os, + err::ExceptionCodeWindows::from_u32(self.raw.exception_record.exception_code), + ) { + (Os::Windows, Some(err::ExceptionCodeWindows::EXCEPTION_ACCESS_VIOLATION)) + | (Os::Windows, Some(err::ExceptionCodeWindows::EXCEPTION_IN_PAGE_ERROR)) + if self.raw.exception_record.number_parameters >= 2 => + { + self.raw.exception_record.exception_information[1] + } + _ => self.raw.exception_record.exception_address, + }; + + // Sometimes on 32-bit these values can be incorrectly sign-extended, + // so mask and zero-extend them here. + match cpu.pointer_width() { + PointerWidth::Bits32 => addr as u32 as u64, + _ => addr, + } + } + + /// Get the crash reason for an exception. + /// + /// The returned value reflects our best attempt to recover a + /// "native" error for the crashing system based on the OS and + /// things like raw error codes. + /// + /// This is an imperfect process, because OSes may have overlapping + /// error types (e.g. WinError and NTSTATUS overlap, so we have to + /// pick one arbirarily). + /// + /// The raw error codes can be extracted from [MinidumpException::raw][]. + pub fn get_crash_reason(&self, os: Os, cpu: Cpu) -> CrashReason { + CrashReason::from_exception(&self.raw, os, cpu) + } + + /// The id of the thread that caused the crash (or otherwise requested + /// the minidump, even if there wasn't actually a crash). + pub fn get_crashing_thread_id(&self) -> u32 { + self.thread_id + } + + /// Write a human-readable description of this `MinidumpException` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print( + &self, + f: &mut T, + system: Option<&MinidumpSystemInfo>, + misc: Option<&MinidumpMiscInfo>, + ) -> io::Result<()> { + write!( + f, + "MINIDUMP_EXCEPTION + thread_id = {:#x} + exception_record.exception_code = {:#x} + exception_record.exception_flags = {:#x} + exception_record.exception_record = {:#x} + exception_record.exception_address = {:#x} + exception_record.number_parameters = {} +", + self.thread_id, + self.raw.exception_record.exception_code, + self.raw.exception_record.exception_flags, + self.raw.exception_record.exception_record, + self.raw.exception_record.exception_address, + self.raw.exception_record.number_parameters, + )?; + for i in 0..self.raw.exception_record.number_parameters as usize { + writeln!( + f, + " exception_record.exception_information[{:2}] = {:#x}", + i, self.raw.exception_record.exception_information[i] + )?; + } + write!( + f, + " thread_context.data_size = {} + thread_context.rva = {:#x} +", + self.raw.thread_context.data_size, self.raw.thread_context.rva + )?; + if let Some(system_info) = system { + if let Some(context) = self.context(system_info, misc) { + writeln!(f)?; + context.print(f)?; + } else { + write!( + f, + " (no context) + + " + )?; + } + } else { + write!( + f, + " (no context) + + " + )?; + } + Ok(()) + } +} + +impl<'a> MinidumpStream<'a> for MinidumpAssertion { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::AssertionInfoStream as u32; + + fn read( + bytes: &'a [u8], + _all: &'a [u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result { + let raw: md::MINIDUMP_ASSERTION_INFO = bytes + .pread_with(0, endian) + .or(Err(Error::StreamReadFailure))?; + Ok(MinidumpAssertion { raw }) + } +} + +fn utf16_to_string(data: &[u16]) -> Option { + use std::slice; + + let len = data.iter().take_while(|c| **c != 0).count(); + let s16 = &data[..len]; + let bytes = unsafe { slice::from_raw_parts(s16.as_ptr() as *const u8, s16.len() * 2) }; + encoding_rs::UTF_16LE + .decode_without_bom_handling_and_without_replacement(bytes) + .map(String::from) +} + +impl MinidumpAssertion { + /// Get the assertion expression as a `String` if one exists. + pub fn expression(&self) -> Option { + utf16_to_string(&self.raw.expression) + } + /// Get the function name where the assertion happened as a `String` if it exists. + pub fn function(&self) -> Option { + utf16_to_string(&self.raw.function) + } + /// Get the source file name where the assertion happened as a `String` if it exists. + pub fn file(&self) -> Option { + utf16_to_string(&self.raw.file) + } + + /// Write a human-readable description of this `MinidumpAssertion` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MDAssertion + expression = {} + function = {} + file = {} + line = {} + type = {} + +", + self.expression().unwrap_or_default(), + self.function().unwrap_or_default(), + self.file().unwrap_or_default(), + self.raw.line, + self.raw._type, + )?; + Ok(()) + } +} + +fn read_string_list( + all: &[u8], + location: &md::MINIDUMP_LOCATION_DESCRIPTOR, + endian: scroll::Endian, +) -> Result, Error> { + let data = location_slice(all, location).or(Err(Error::StreamReadFailure))?; + if data.is_empty() { + return Ok(Vec::new()); + } + + let mut offset = 0; + + let count: u32 = data + .gread_with(&mut offset, endian) + .or(Err(Error::StreamReadFailure))?; + + let (count, _) = ensure_count_in_bound(all, count as usize, ::size_with(&endian), 0)?; + + let mut strings = Vec::with_capacity(count); + for _ in 0..count { + let rva: md::RVA = data + .gread_with(&mut offset, endian) + .or(Err(Error::StreamReadFailure))?; + + let string = read_string_utf8(&mut (rva as usize), all, endian) + .ok_or(Error::StreamReadFailure)? + .to_owned(); + + strings.push(string); + } + + Ok(strings) +} + +fn read_simple_string_dictionary( + all: &[u8], + location: &md::MINIDUMP_LOCATION_DESCRIPTOR, + endian: scroll::Endian, +) -> Result, Error> { + let mut dictionary = BTreeMap::new(); + + let data = location_slice(all, location).or(Err(Error::StreamReadFailure))?; + if data.is_empty() { + return Ok(dictionary); + } + + let mut offset = 0; + + let count: u32 = data + .gread_with(&mut offset, endian) + .or(Err(Error::StreamReadFailure))?; + + for _ in 0..count { + let entry: md::MINIDUMP_SIMPLE_STRING_DICTIONARY_ENTRY = data + .gread_with(&mut offset, endian) + .or(Err(Error::StreamReadFailure))?; + + let key = read_string_utf8(&mut (entry.key as usize), all, endian) + .ok_or(Error::StreamReadFailure)?; + let value = read_string_utf8(&mut (entry.value as usize), all, endian) + .ok_or(Error::StreamReadFailure)?; + + dictionary.insert(key.to_owned(), value.to_owned()); + } + + Ok(dictionary) +} + +fn read_annotation_objects( + all: &[u8], + location: &md::MINIDUMP_LOCATION_DESCRIPTOR, + endian: scroll::Endian, +) -> Result, Error> { + let mut dictionary = BTreeMap::new(); + + let data = location_slice(all, location).or(Err(Error::StreamReadFailure))?; + if data.is_empty() { + return Ok(dictionary); + } + + let mut offset = 0; + + let count: u32 = data + .gread_with(&mut offset, endian) + .or(Err(Error::StreamReadFailure))?; + + for _ in 0..count { + let raw: md::MINIDUMP_ANNOTATION = data + .gread_with(&mut offset, endian) + .or(Err(Error::StreamReadFailure))?; + + let key = read_string_utf8(&mut (raw.name as usize), all, endian) + .ok_or(Error::StreamReadFailure)?; + + let value = match raw.ty { + md::MINIDUMP_ANNOTATION::TYPE_INVALID => MinidumpAnnotation::Invalid, + md::MINIDUMP_ANNOTATION::TYPE_STRING => { + let string = read_string_utf8_unterminated(&mut (raw.value as usize), all, endian) + .ok_or(Error::StreamReadFailure)? + .to_owned(); + + MinidumpAnnotation::String(string) + } + _ if raw.ty >= md::MINIDUMP_ANNOTATION::TYPE_USER_DEFINED => { + MinidumpAnnotation::UserDefined(raw) + } + _ => MinidumpAnnotation::Unsupported(raw), + }; + + dictionary.insert(key.to_owned(), value); + } + + Ok(dictionary) +} + +impl MinidumpModuleCrashpadInfo { + pub fn read( + link: md::MINIDUMP_MODULE_CRASHPAD_INFO_LINK, + all: &[u8], + endian: scroll::Endian, + ) -> Result { + let raw: md::MINIDUMP_MODULE_CRASHPAD_INFO = all + .pread_with(link.location.rva as usize, endian) + .or(Err(Error::StreamReadFailure))?; + + let list_annotations = read_string_list(all, &raw.list_annotations, endian)?; + let simple_annotations = + read_simple_string_dictionary(all, &raw.simple_annotations, endian)?; + let annotation_objects = read_annotation_objects(all, &raw.annotation_objects, endian)?; + + Ok(Self { + raw, + module_index: link.minidump_module_list_index as usize, + list_annotations, + simple_annotations, + annotation_objects, + }) + } +} + +fn read_crashpad_module_links( + all: &[u8], + location: &md::MINIDUMP_LOCATION_DESCRIPTOR, + endian: scroll::Endian, +) -> Result, Error> { + let data = location_slice(all, location).or(Err(Error::StreamReadFailure))?; + if data.is_empty() { + return Ok(Vec::new()); + } + + let mut offset = 0; + + let count: u32 = data + .gread_with(&mut offset, endian) + .or(Err(Error::StreamReadFailure))?; + + let (count, _) = ensure_count_in_bound( + all, + count as usize, + ::size_with(&endian), + 0, + )?; + + let mut module_links = Vec::with_capacity(count); + for _ in 0..count { + let link: md::MINIDUMP_MODULE_CRASHPAD_INFO_LINK = data + .gread_with(&mut offset, endian) + .or(Err(Error::StreamReadFailure))?; + + let info = MinidumpModuleCrashpadInfo::read(link, all, endian)?; + module_links.push(info); + } + + Ok(module_links) +} + +impl<'a> MinidumpStream<'a> for MinidumpCrashpadInfo { + const STREAM_TYPE: u32 = MINIDUMP_STREAM_TYPE::CrashpadInfoStream as u32; + + fn read( + bytes: &'a [u8], + all: &'a [u8], + endian: scroll::Endian, + _system_info: Option<&MinidumpSystemInfo>, + ) -> Result { + let raw: md::MINIDUMP_CRASHPAD_INFO = bytes + .pread_with(0, endian) + .or(Err(Error::StreamReadFailure))?; + + if raw.version == 0 { + // 0 is an invalid version, but all future versions are compatible with v1. + return Err(Error::VersionMismatch); + } + + let simple_annotations = + read_simple_string_dictionary(all, &raw.simple_annotations, endian)?; + + let module_list = read_crashpad_module_links(all, &raw.module_list, endian)?; + + Ok(Self { + raw, + simple_annotations, + module_list, + }) + } +} + +impl MinidumpCrashpadInfo { + /// Write a human-readable description of this `MinidumpCrashpadInfo` to `f`. + /// + /// This is very verbose, it is the format used by `minidump_dump`. + pub fn print(&self, f: &mut T) -> io::Result<()> { + write!( + f, + "MDRawCrashpadInfo + version = {} + report_id = {} + client_id = {} +", + self.raw.version, self.raw.report_id, self.raw.client_id, + )?; + + for (name, value) in &self.simple_annotations { + writeln!(f, " simple_annotations[\"{name}\"] = {value}")?; + } + + for (index, module) in self.module_list.iter().enumerate() { + writeln!( + f, + " module_list[{}].minidump_module_list_index = {}", + index, module.module_index, + )?; + writeln!( + f, + " module_list[{}].version = {}", + index, module.raw.version, + )?; + + for (annotation_index, annotation) in module.list_annotations.iter().enumerate() { + writeln!( + f, + " module_list[{index}].list_annotations[{annotation_index}] = {annotation}", + )?; + } + + for (name, value) in &module.simple_annotations { + writeln!( + f, + " module_list[{index}].simple_annotations[\"{name}\"] = {value}", + )?; + } + + for (name, value) in &module.annotation_objects { + write!( + f, + " module_list[{index}].annotation_objects[\"{name}\"] = ", + )?; + + match value { + MinidumpAnnotation::Invalid => writeln!(f, ""), + MinidumpAnnotation::String(string) => writeln!(f, "{string}"), + MinidumpAnnotation::UserDefined(_) => writeln!(f, ""), + MinidumpAnnotation::Unsupported(_) => writeln!(f, ""), + }?; + } + } + + writeln!(f)?; + + Ok(()) + } +} + +/// An index into the contents of a memory-mapped minidump. +pub type MmapMinidump = Minidump<'static, Mmap>; + +impl MmapMinidump { + /// Read a `Minidump` from a `Path` to a file on disk. + /// + /// See [the type definition](Minidump.html) for an example. + pub fn read_path

(path: P) -> Result + where + P: AsRef, + { + let f = File::open(path).or(Err(Error::FileNotFound))?; + let mmap = unsafe { Mmap::map(&f).or(Err(Error::IoError))? }; + Minidump::read(mmap) + } +} + +/// A stream in the minidump that this implementation can interpret, +#[derive(Debug)] +pub struct MinidumpImplementedStream { + pub stream_type: MINIDUMP_STREAM_TYPE, + pub location: md::MINIDUMP_LOCATION_DESCRIPTOR, + pub vendor: &'static str, +} + +/// A stream in the minidump that this implementation has no knowledge of. +#[derive(Debug, Clone)] +pub struct MinidumpUnknownStream { + pub stream_type: u32, + pub location: md::MINIDUMP_LOCATION_DESCRIPTOR, + pub vendor: &'static str, +} + +/// A stream in the minidump that this implementation is aware of but doesn't +/// yet support. +#[derive(Debug, Clone)] +pub struct MinidumpUnimplementedStream { + pub stream_type: MINIDUMP_STREAM_TYPE, + pub location: md::MINIDUMP_LOCATION_DESCRIPTOR, + pub vendor: &'static str, +} + +impl<'a, T> Minidump<'a, T> +where + T: Deref + 'a, +{ + /// Read a `Minidump` from the provided `data`. + /// + /// Typically this will be a `Vec` or `&[u8]` with the full contents of the minidump, + /// but you can also use something like `memmap::Mmap`. + pub fn read(data: T) -> Result, Error> { + let mut offset = 0; + let mut endian = LE; + let mut header: md::MINIDUMP_HEADER = data + .gread_with(&mut offset, endian) + .or(Err(Error::MissingHeader))?; + if header.signature != md::MINIDUMP_SIGNATURE { + if header.signature.swap_bytes() != md::MINIDUMP_SIGNATURE { + return Err(Error::HeaderMismatch); + } + // Try again with big-endian. + endian = BE; + offset = 0; + header = data + .gread_with(&mut offset, endian) + .or(Err(Error::MissingHeader))?; + if header.signature != md::MINIDUMP_SIGNATURE { + return Err(Error::HeaderMismatch); + } + } + if (header.version & 0x0000ffff) != md::MINIDUMP_VERSION { + return Err(Error::VersionMismatch); + } + + offset = header.stream_directory_rva as usize; + + let mut streams = BTreeMap::new(); + for i in 0..header.stream_count { + let dir: md::MINIDUMP_DIRECTORY = data + .gread_with(&mut offset, endian) + .or(Err(Error::MissingDirectory))?; + if let Some((old_idx, old_dir)) = streams.insert(dir.stream_type, (i, dir.clone())) { + if let Some(known_stream_type) = MINIDUMP_STREAM_TYPE::from_u32(dir.stream_type) { + if !(known_stream_type == MINIDUMP_STREAM_TYPE::UnusedStream + && old_dir.location.data_size == 0 + && dir.location.data_size == 0) + { + warn!("Minidump contains multiple streams of type {} ({:?}) at indices {} ({} bytes) and {} ({} bytes) (using {})", + dir.stream_type, + known_stream_type, + old_idx, + old_dir.location.data_size, + i, + dir.location.data_size, + i, + ); + } + } else { + warn!("Minidump contains multiple streams of unknown type {} at indices {} ({} bytes) and {} ({} bytes) (using {})", + dir.stream_type, + old_idx, + old_dir.location.data_size, + i, + dir.location.data_size, + i, + ); + } + } + } + let system_info = streams + .get(&MinidumpSystemInfo::STREAM_TYPE) + .and_then(|(_, dir)| { + location_slice(data.deref(), &dir.location) + .ok() + .and_then(|bytes| { + let all_bytes = data.deref(); + MinidumpSystemInfo::read(bytes, all_bytes, endian, None).ok() + }) + }); + + Ok(Minidump { + data, + header, + streams, + endian, + system_info, + _phantom: PhantomData, + }) + } + + /// Read and parse the specified [`MinidumpStream`][] `S` from the Minidump, if it exists. + /// + /// Because Minidump Streams can have totally different formats and meanings, the only + /// way to coherently access one is by specifying a static type that provides an + /// interpretation and interface of that format. + /// + /// As such, typical usage of this interface is to just statically request every + /// stream your care about. Depending on what analysis you're trying to perform, you may: + /// + /// * Consider it an error for a stream to be missing (using `?` or `unwrap`) + /// * Branch on the presence of stream to conditionally refine your analysis + /// * Use a stream's `Default` implementation to make progress (with `unwrap_or_default`) + /// + /// ``` + /// use minidump::*; + /// + /// fn main() -> Result<(), Error> { + /// // Read the minidump from a file + /// let mut dump = minidump::Minidump::read_path("../testdata/test.dmp")?; + /// + /// // Statically request (and require) several streams we care about: + /// let system_info = dump.get_stream::()?; + /// let exception = dump.get_stream::()?; + /// + /// // Combine the contents of the streams to perform more refined analysis + /// let crash_reason = exception.get_crash_reason(system_info.os, system_info.cpu); + /// + /// // Conditionally analyze a stream + /// if let Ok(threads) = dump.get_stream::() { + /// // Use `Default` to try to make some progress when a stream is missing. + /// // This is especially natural for MinidumpMemoryList because + /// // everything needs to handle memory lookups failing anyway. + /// let mem = dump.get_memory().unwrap_or_default(); + /// + /// for thread in &threads.threads { + /// let stack = thread.stack_memory(&mem); + /// // ... + /// } + /// } + /// + /// Ok(()) + /// } + /// ``` + /// + /// Some streams are impossible to fully parse/interpret without the contents + /// of other streams (for instance, many things require [`MinidumpSystemInfo`][] to interpret + /// hardware-specific details). As a result, some parsing of the stream may be + /// further deferred to methods on the Stream type where those dependencies can be provided + /// (e.g. [`MinidumpException::get_crash_reason`][]). + /// + /// Note that the lifetime of the returned stream is bound to the lifetime of the + /// `Minidump` struct itself and not to the lifetime of the data backing this minidump. + /// This is a consequence of how this struct relies on [`Deref`][] to access the data. + /// + /// ## Currently Supported Streams + /// + /// * [`MinidumpAssertion`][] + /// * [`MinidumpBreakpadInfo`][] + /// * [`MinidumpCrashpadInfo`][] + /// * [`MinidumpException`][] + /// * [`MinidumpLinuxCpuInfo`][] + /// * [`MinidumpLinuxEnviron`][] + /// * [`MinidumpLinuxLsbRelease`][] + /// * [`MinidumpLinuxMaps`][] + /// * [`MinidumpLinuxProcStatus`][] + /// * [`MinidumpMacCrashInfo`][] + /// * [`MinidumpMacBootargs`][] + /// * [`MinidumpMemoryList`][] + /// * [`MinidumpMemory64List`][] + /// * [`MinidumpMemoryInfoList`][] + /// * [`MinidumpMiscInfo`][] + /// * [`MinidumpModuleList`][] + /// * [`MinidumpSystemInfo`][] + /// * [`MinidumpThreadList`][] + /// * [`MinidumpThreadNames`][] + /// * [`MinidumpUnloadedModuleList`][] + /// * [`MinidumpHandleDataStream`][] + /// + pub fn get_stream(&'a self) -> Result + where + S: MinidumpStream<'a>, + { + match self.get_raw_stream(S::STREAM_TYPE) { + Err(e) => Err(e), + Ok(bytes) => { + let all_bytes = self.data.deref(); + S::read(bytes, all_bytes, self.endian, self.system_info.as_ref()) + } + } + } + + /// Get a stream of raw data from the minidump. + /// + /// This can be used to get the contents of arbitrary minidump streams. + /// For streams of known types you almost certainly want to use + /// [`Minidump::get_stream`][] instead. + /// + /// Note that the lifetime of the returned stream is bound to the lifetime of the this + /// `Minidump` struct itself and not to the lifetime of the data backing this minidump. + /// This is a consequence of how this struct relies on [Deref] to access the data. + pub fn get_raw_stream(&'a self, stream_type: u32) -> Result<&'a [u8], Error> { + match self.streams.get(&stream_type) { + None => Err(Error::StreamNotFound), + Some((_, dir)) => { + let bytes = self.data.deref(); + location_slice(bytes, &dir.location) + } + } + } + + /// Get whichever of the two MemoryLists are available in the minidump, + /// preferring [`MinidumpMemory64List`][]. + pub fn get_memory(&'a self) -> Option> { + self.get_stream::() + .map(UnifiedMemoryList::Memory64) + .or_else(|_| { + self.get_stream::() + .map(UnifiedMemoryList::Memory) + }) + .ok() + } + + /// A listing of all the streams in the Minidump that this library is *aware* of, + /// but has no further analysis for. + /// + /// If there are multiple copies of the same stream type (which should not happen for + /// well-formed Minidumps), then only one of them will be yielded, arbitrarily. + pub fn unimplemented_streams(&self) -> impl Iterator + '_ { + static UNIMPLEMENTED_STREAMS: [MINIDUMP_STREAM_TYPE; 30] = [ + // Presumably will never have an implementation: + MINIDUMP_STREAM_TYPE::UnusedStream, + MINIDUMP_STREAM_TYPE::ReservedStream0, + MINIDUMP_STREAM_TYPE::ReservedStream1, + MINIDUMP_STREAM_TYPE::LastReservedStream, + // Presumably should be implemented: + MINIDUMP_STREAM_TYPE::ThreadExListStream, + MINIDUMP_STREAM_TYPE::CommentStreamA, + MINIDUMP_STREAM_TYPE::CommentStreamW, + MINIDUMP_STREAM_TYPE::FunctionTable, + MINIDUMP_STREAM_TYPE::HandleOperationListStream, + MINIDUMP_STREAM_TYPE::TokenStream, + MINIDUMP_STREAM_TYPE::JavaScriptDataStream, + MINIDUMP_STREAM_TYPE::SystemMemoryInfoStream, + MINIDUMP_STREAM_TYPE::ProcessVmCountersStream, + MINIDUMP_STREAM_TYPE::IptTraceStream, + // Windows CE streams, very unlikely to be found in the wild. + // Their contents are documented here: https://docs.microsoft.com/en-us/previous-versions/windows/embedded/ms939618(v=msdn.10) + MINIDUMP_STREAM_TYPE::ceStreamNull, + MINIDUMP_STREAM_TYPE::ceStreamSystemInfo, + MINIDUMP_STREAM_TYPE::ceStreamException, + MINIDUMP_STREAM_TYPE::ceStreamModuleList, + MINIDUMP_STREAM_TYPE::ceStreamProcessList, + MINIDUMP_STREAM_TYPE::ceStreamThreadList, + MINIDUMP_STREAM_TYPE::ceStreamThreadContextList, + MINIDUMP_STREAM_TYPE::ceStreamThreadCallStackList, + MINIDUMP_STREAM_TYPE::ceStreamMemoryVirtualList, + MINIDUMP_STREAM_TYPE::ceStreamMemoryPhysicalList, + MINIDUMP_STREAM_TYPE::ceStreamBucketParameters, + MINIDUMP_STREAM_TYPE::ceStreamProcessModuleMap, + MINIDUMP_STREAM_TYPE::ceStreamDiagnosisList, + // non-standard streams (should also be implemented): + MINIDUMP_STREAM_TYPE::LinuxCmdLine, + MINIDUMP_STREAM_TYPE::LinuxAuxv, + MINIDUMP_STREAM_TYPE::LinuxDsoDebug, + ]; + self.streams.iter().filter_map(|(_, (_, stream))| { + MINIDUMP_STREAM_TYPE::from_u32(stream.stream_type).and_then(|stream_type| { + if UNIMPLEMENTED_STREAMS.contains(&stream_type) { + return Some(MinidumpUnimplementedStream { + stream_type, + location: stream.location, + vendor: stream_vendor(stream.stream_type), + }); + } + None + }) + }) + } + + /// A listing of all the streams in the Minidump that this library has no knowledge of. + /// + /// If there are multiple copies of the same stream (which should not happen for + /// well-formed Minidumps), then only one of them will be yielded, arbitrarily. + pub fn unknown_streams(&self) -> impl Iterator + '_ { + self.streams.iter().filter_map(|(_, (_, stream))| { + if MINIDUMP_STREAM_TYPE::from_u32(stream.stream_type).is_none() { + return Some(MinidumpUnknownStream { + stream_type: stream.stream_type, + location: stream.location, + vendor: stream_vendor(stream.stream_type), + }); + } + None + }) + } + + /// A listing of all the streams in the Minidump. + /// + /// If there are multiple copies of the same stream (which should not happen for + /// well-formed Minidumps), then only one of them will be yielded, arbitrarily. + pub fn all_streams(&self) -> impl Iterator + '_ { + self.streams.iter().map(|(_, (_, stream))| stream) + } + + /// Write a verbose description of the `Minidump` to `f`. + pub fn print(&self, f: &mut W) -> io::Result<()> { + fn get_stream_name(stream_type: u32) -> Cow<'static, str> { + if let Some(stream) = MINIDUMP_STREAM_TYPE::from_u32(stream_type) { + Cow::Owned(format!("{stream:?}")) + } else { + Cow::Borrowed("unknown") + } + } + + write!( + f, + r#"MDRawHeader + signature = {:#x} + version = {:#x} + stream_count = {} + stream_directory_rva = {:#x} + checksum = {:#x} + time_date_stamp = {:#x} {} + flags = {:#x} + +"#, + self.header.signature, + self.header.version, + self.header.stream_count, + self.header.stream_directory_rva, + self.header.checksum, + self.header.time_date_stamp, + format_time_t(self.header.time_date_stamp), + self.header.flags, + )?; + let mut streams = self.streams.iter().collect::>(); + streams.sort_by(|&(&_, &(a, _)), &(&_, &(b, _))| a.cmp(&b)); + for &(_, &(i, ref stream)) in streams.iter() { + write!( + f, + r#"mDirectory[{}] +MDRawDirectory + stream_type = {:#x} ({}) + location.data_size = {} + location.rva = {:#x} + +"#, + i, + stream.stream_type, + get_stream_name(stream.stream_type), + stream.location.data_size, + stream.location.rva + )?; + } + writeln!(f, "Streams:")?; + streams.sort_by(|&(&a, &(_, _)), &(&b, &(_, _))| a.cmp(&b)); + for (_, &(i, ref stream)) in streams { + writeln!( + f, + " stream type {:#x} ({}) at index {}", + stream.stream_type, + get_stream_name(stream.stream_type), + i + )?; + } + writeln!(f)?; + Ok(()) + } +} + +fn stream_vendor(stream_type: u32) -> &'static str { + if stream_type <= MINIDUMP_STREAM_TYPE::LastReservedStream as u32 { + "Official" + } else { + match stream_type & 0xFFFF0000 { + 0x4767_0000 => "Google Extension", + 0x4d7a_0000 => "Mozilla Extension", + _ => "Unknown Extension", + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use md::GUID; + use minidump_common::{ + errors::NtStatusWindows, + format::{PlatformId, ProcessorArchitecture}, + }; + use minidump_synth::{ + AnnotationValue, CrashpadInfo, DumpString, Exception, + HandleDescriptor as SynthHandleDescriptor, Memory, MemoryInfo as SynthMemoryInfo, + MiscFieldsBuildString, MiscFieldsPowerInfo, MiscFieldsProcessTimes, MiscFieldsTimeZone, + MiscInfo5Fields, MiscStream, Module as SynthModule, ModuleCrashpadInfo, SimpleStream, + SynthMinidump, SystemInfo, Thread, ThreadName, UnloadedModule as SynthUnloadedModule, + STOCK_VERSION_INFO, + }; + use test_assembler::*; + + fn read_synth_dump<'a>(dump: SynthMinidump) -> Result>, Error> { + Minidump::read(dump.finish().unwrap()) + } + + #[ctor::ctor] + fn init_logger() { + env_logger::builder().is_test(true).init(); + } + + #[test] + fn test_simple_synth_dump() { + const STREAM_TYPE: u32 = 0x11223344; + let dump = SynthMinidump::with_endian(Endian::Little).add_stream(SimpleStream { + stream_type: STREAM_TYPE, + section: Section::with_endian(Endian::Little).D32(0x55667788), + }); + let dump = read_synth_dump(dump).unwrap(); + assert_eq!(dump.endian, LE); + assert_eq!( + dump.get_raw_stream(STREAM_TYPE).unwrap(), + &[0x88, 0x77, 0x66, 0x55] + ); + + assert_eq!( + dump.get_raw_stream(0xaabbccddu32), + Err(Error::StreamNotFound) + ); + } + + #[test] + fn test_simple_synth_dump_bigendian() { + const STREAM_TYPE: u32 = 0x11223344; + let dump = SynthMinidump::with_endian(Endian::Big).add_stream(SimpleStream { + stream_type: STREAM_TYPE, + section: Section::with_endian(Endian::Big).D32(0x55667788), + }); + let dump = read_synth_dump(dump).unwrap(); + assert_eq!(dump.endian, BE); + assert_eq!( + dump.get_raw_stream(STREAM_TYPE).unwrap(), + &[0x55, 0x66, 0x77, 0x88] + ); + + assert_eq!( + dump.get_raw_stream(0xaabbccddu32), + Err(Error::StreamNotFound) + ); + } + + #[test] + fn test_thread_names() { + let good_thread_id = 17; + let corrupt_thread_id = 123; + + let good_name = DumpString::new("MyCoolThread", Endian::Little); + // No corrupt name, will dangle + + let good_thread_name_entry = + ThreadName::new(Endian::Little, good_thread_id, Some(&good_name)); + let corrupt_thread_name_entry = ThreadName::new(Endian::Little, corrupt_thread_id, None); + + let dump = SynthMinidump::with_endian(Endian::Little) + .add_thread_name(good_thread_name_entry) + .add_thread_name(corrupt_thread_name_entry) + .add(good_name); + + let dump = read_synth_dump(dump).unwrap(); + let thread_names = dump.get_stream::().unwrap(); + assert_eq!(thread_names.names.len(), 1); + assert_eq!( + &*thread_names.get_name(good_thread_id).unwrap(), + "MyCoolThread" + ); + assert_eq!(thread_names.get_name(corrupt_thread_id), None); + } + + #[test] + fn test_module_list() { + let name = DumpString::new("single module", Endian::Little); + let cv_record = Section::with_endian(Endian::Little) + .D32(md::CvSignature::Pdb70 as u32) // signature + // signature, a GUID + .D32(0xabcd1234) + .D16(0xf00d) + .D16(0xbeef) + .append_bytes(b"\x01\x02\x03\x04\x05\x06\x07\x08") + .D32(1) // age + .append_bytes(b"c:\\foo\\file.pdb\0"); // pdb_file_name + let module = SynthModule::new( + Endian::Little, + 0xa90206ca83eb2852, + 0xada542bd, + &name, + 0xb1054d2a, + 0x34571371, + Some(&STOCK_VERSION_INFO), + ) + .cv_record(&cv_record); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_module(module) + .add(name) + .add(cv_record); + let dump = read_synth_dump(dump).unwrap(); + let module_list = dump.get_stream::().unwrap(); + let modules = module_list.iter().collect::>(); + assert_eq!(modules.len(), 1); + assert_eq!(modules[0].base_address(), 0xa90206ca83eb2852); + assert_eq!(modules[0].size(), 0xada542bd); + assert_eq!(modules[0].code_file(), "single module"); + // time_date_stamp and size_of_image concatenated + assert_eq!( + modules[0].code_identifier().unwrap(), + CodeId::new("B1054D2Aada542bd".to_string()) + ); + assert_eq!(modules[0].debug_file().unwrap(), "c:\\foo\\file.pdb"); + assert_eq!( + modules[0].debug_identifier().unwrap(), + DebugId::from_breakpad("ABCD1234F00DBEEF01020304050607081").unwrap() + ); + } + + #[test] + fn test_module_list_pdb20() { + let name = DumpString::new("single module", Endian::Little); + let cv_record = Section::with_endian(Endian::Little) + .D32(md::CvSignature::Pdb20 as u32) // cv_signature + .D32(0x0) // cv_offset + .D32(0xabcd1234) // signature + .D32(1) // age + .append_bytes(b"c:\\foo\\file.pdb\0"); // pdb_file_name + let module = SynthModule::new( + Endian::Little, + 0xa90206ca83eb2852, + 0xada542bd, + &name, + 0xb1054d2a, + 0x34571371, + Some(&STOCK_VERSION_INFO), + ) + .cv_record(&cv_record); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_module(module) + .add(name) + .add(cv_record); + let dump = read_synth_dump(dump).unwrap(); + let module_list = dump.get_stream::().unwrap(); + let modules = module_list.iter().collect::>(); + assert_eq!(modules.len(), 1); + assert_eq!(modules[0].base_address(), 0xa90206ca83eb2852); + assert_eq!(modules[0].size(), 0xada542bd); + assert_eq!(modules[0].code_file(), "single module"); + // time_date_stamp and size_of_image concatenated + assert_eq!( + modules[0].code_identifier().unwrap(), + CodeId::new("B1054D2Aada542bd".to_string()) + ); + assert_eq!(modules[0].debug_file().unwrap(), "c:\\foo\\file.pdb"); + assert_eq!( + modules[0].debug_identifier().unwrap(), + DebugId::from_pdb20(0xabcd1234, 1) + ); + } + + #[test] + fn test_unloaded_module_list() { + let name = DumpString::new("single module", Endian::Little); + let module = SynthUnloadedModule::new( + Endian::Little, + 0xa90206ca83eb2852, + 0xada542bd, + &name, + 0xb1054d2a, + 0x34571371, + ); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_unloaded_module(module) + .add(name); + let dump = read_synth_dump(dump).unwrap(); + let module_list = dump.get_stream::().unwrap(); + let modules = module_list.iter().collect::>(); + assert_eq!(modules.len(), 1); + assert_eq!(modules[0].base_address(), 0xa90206ca83eb2852); + assert_eq!(modules[0].size(), 0xada542bd); + assert_eq!(modules[0].code_file(), "single module"); + // time_date_stamp and size_of_image concatenated + assert_eq!( + modules[0].code_identifier().unwrap(), + CodeId::new("B1054D2Aada542bd".to_string()) + ); + } + + #[test] + fn test_memory_info() { + let info1_alloc_protection = md::MemoryProtection::PAGE_GUARD; + let info1_protection = md::MemoryProtection::PAGE_EXECUTE_READ; + let info1_state = md::MemoryState::MEM_FREE; + let info1_ty = md::MemoryType::MEM_MAPPED; + let info1 = SynthMemoryInfo::new( + Endian::Little, + 0xa90206ca83eb2852, + 0xa802064a83eb2752, + info1_alloc_protection.bits(), + 0xf80e064a93eb2356, + info1_state.bits(), + info1_protection.bits(), + info1_ty.bits(), + ); + + let info2_alloc_protection = md::MemoryProtection::PAGE_EXECUTE_READ; + let info2_protection = md::MemoryProtection::PAGE_READONLY; + let info2_state = md::MemoryState::MEM_COMMIT; + let info2_ty = md::MemoryType::MEM_PRIVATE; + let info2 = SynthMemoryInfo::new( + Endian::Little, + 0xd70206ca83eb2852, + 0xb802064383eb2752, + info2_alloc_protection.bits(), + 0xe80e064a93eb2356, + info2_state.bits(), + info2_protection.bits(), + info2_ty.bits(), + ); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_memory_info(info1) + .add_memory_info(info2); + + let dump = read_synth_dump(dump).unwrap(); + + // Read both kinds of info to test this path on UnifiedMemoryInfo + let info_list = dump.get_stream::().ok(); + let maps = dump.get_stream::().ok(); + assert!(info_list.is_some()); + assert!(maps.is_none()); + + let unified_info = UnifiedMemoryInfoList::new(info_list, maps).unwrap(); + let info_list = unified_info.info().unwrap(); + assert!(unified_info.maps().is_none()); + + // Assert that unified and the info_list agree + for (info, unified) in info_list.iter().zip(unified_info.iter()) { + if let UnifiedMemoryInfo::Info(info2) = unified { + assert_eq!(info, info2); + } else { + unreachable!(); + } + } + + let infos = info_list.iter().collect::>(); + + assert_eq!(infos.len(), 2); + + assert_eq!(infos[0].raw.base_address, 0xa90206ca83eb2852); + assert_eq!(infos[0].raw.allocation_base, 0xa802064a83eb2752); + assert_eq!( + infos[0].raw.allocation_protection, + info1_alloc_protection.bits() + ); + assert_eq!(infos[0].raw.region_size, 0xf80e064a93eb2356); + assert_eq!(infos[0].raw.state, info1_state.bits()); + assert_eq!(infos[0].raw.protection, info1_protection.bits()); + assert_eq!(infos[0].raw._type, info1_ty.bits()); + + assert_eq!(infos[0].allocation_protection, info1_alloc_protection); + assert_eq!(infos[0].protection, info1_protection); + assert_eq!(infos[0].state, info1_state); + assert_eq!(infos[0].ty, info1_ty); + assert!(infos[0].is_executable()); + + assert_eq!(infos[1].raw.base_address, 0xd70206ca83eb2852); + assert_eq!(infos[1].raw.allocation_base, 0xb802064383eb2752); + assert_eq!( + infos[1].raw.allocation_protection, + info2_alloc_protection.bits() + ); + assert_eq!(infos[1].raw.region_size, 0xe80e064a93eb2356); + assert_eq!(infos[1].raw.state, info2_state.bits()); + assert_eq!(infos[1].raw.protection, info2_protection.bits()); + assert_eq!(infos[1].raw._type, info2_ty.bits()); + + assert_eq!(infos[1].allocation_protection, info2_alloc_protection); + assert_eq!(infos[1].protection, info2_protection); + assert_eq!(infos[1].state, info2_state); + assert_eq!(infos[1].ty, info2_ty); + assert!(!infos[1].is_executable()); + } + + #[test] + fn test_linux_maps() { + use procfs_core::process::{MMPermissions, MMapPath}; + + // TODO: is it okay to give up wonky whitespace support? + let input = + b"a90206ca83eb2852-b90206ca83eb3852 r-xp 10bac9000 fd:05 1196511 /usr/lib64/libtdb1.so\n\ + c70206ca83eb2852-de0206ca83eb2852 -w-s 10bac9000 fd:05 1196511 /usr/lib64/libtdb2.so (deleted)"; + + let dump = SynthMinidump::with_endian(Endian::Little).set_linux_maps(input); + let dump = read_synth_dump(dump).unwrap(); + + // Read both kinds of info to test this path on UnifiedMemoryInfo + let info_list = dump.get_stream::().ok(); + let maps = dump.get_stream::().unwrap(); + assert!(info_list.is_none()); + + let unified_info = UnifiedMemoryInfoList::new(info_list, Some(maps)).unwrap(); + let maps = unified_info.maps().unwrap(); + assert!(unified_info.info().is_none()); + + // Assert that unified and the maps agree + for (info, unified) in maps.iter().zip(unified_info.iter()) { + if let UnifiedMemoryInfo::Map(info2) = unified { + assert_eq!(info, info2); + } else { + unreachable!(); + } + } + + let maps = maps.iter().collect::>(); + assert_eq!(maps.len(), 2); + + assert_eq!(maps[0].map.address.0, 0xa90206ca83eb2852); + assert_eq!(maps[0].map.address.1, 0xb90206ca83eb3852); + assert_eq!( + maps[0].map.pathname, + MMapPath::Path("/usr/lib64/libtdb1.so".into()) + ); + assert!( + maps[0].map.perms + == MMPermissions::READ | MMPermissions::EXECUTE | MMPermissions::PRIVATE + ); + + assert_eq!(maps[1].map.address.0, 0xc70206ca83eb2852); + assert_eq!(maps[1].map.address.1, 0xde0206ca83eb2852); + assert_eq!( + maps[1].map.pathname, + MMapPath::Path("/usr/lib64/libtdb2.so (deleted)".into()) + ); + assert!(maps[1].map.perms == MMPermissions::WRITE | MMPermissions::SHARED); + + let mut unified_infos = unified_info.by_addr(); + + assert!(matches!(unified_infos.next(), Some(UnifiedMemoryInfo::Map(m)) if m == maps[0])); + assert!(matches!(unified_infos.next(), Some(UnifiedMemoryInfo::Map(m)) if m == maps[1])); + } + + #[test] + fn test_linux_map_parse() { + use procfs_core::process::{MMPermissions, MMapPath::*}; + let parse = |input| MinidumpLinuxMapInfo::from_line(input).unwrap(); + let maybe_parse = MinidumpLinuxMapInfo::from_line; + + // TODO: is it okay to give up wonky whitespace support? + + { + // Normal file + let map = parse(b"10a00-10b00 r-xp 10bac9000 fd:05 1196511 /usr/lib64/libtdb1.so"); + + assert_eq!(map.map.address.0, 0x10a00); + assert_eq!(map.map.address.1, 0x10b00); + assert_eq!(map.memory_range(), Some(Range::new(0x10a00, 0x10b00))); + assert_eq!(map.map.pathname, Path("/usr/lib64/libtdb1.so".into())); + + assert!( + map.map.perms + == MMPermissions::READ | MMPermissions::EXECUTE | MMPermissions::PRIVATE + ); + assert!(map.is_readable()); + assert!(map.is_executable()); + } + + { + // Deleted file (also some whitespace in the file name) + let map = parse(b"ffffffffff600000-ffffffffff601000 -wxs 10bac9000 fd:05 1196511 /usr/lib64/ libtdb1.so (deleted)"); + + assert_eq!(map.map.address.0, 0xffffffffff600000); + assert_eq!(map.map.address.1, 0xffffffffff601000); + assert_eq!( + map.memory_range(), + Some(Range::new(0xffffffffff600000, 0xffffffffff601000)) + ); + assert_eq!( + map.map.pathname, + Path("/usr/lib64/ libtdb1.so (deleted)".into()) + ); + assert!( + map.map.perms + == MMPermissions::WRITE | MMPermissions::EXECUTE | MMPermissions::SHARED + ); + assert!(map.is_writable()); + assert!(map.is_executable()); + } + + { + // Stack + let map = parse(b"10a00-10b00 ------- 10bac9000 fd:05 1196511 [stack]"); + + assert_eq!(map.map.address.0, 0x10a00); + assert_eq!(map.map.address.1, 0x10b00); + assert_eq!(map.memory_range(), Some(Range::new(0x10a00, 0x10b00))); + assert_eq!(map.map.pathname, Stack); + assert!(map.map.perms == MMPermissions::NONE); + } + + { + // Stack with tid + let map = parse(b"10a00-10b00 ------- 10bac9000 fd:05 1196511 [stack:1234567]"); + + assert_eq!(map.map.address.0, 0x10a00); + assert_eq!(map.map.address.1, 0x10b00); + assert_eq!(map.memory_range(), Some(Range::new(0x10a00, 0x10b00))); + assert_eq!(map.map.pathname, TStack(1234567)); + assert!(map.map.perms == MMPermissions::NONE); + } + + { + // Heap + let map = parse(b"10a00-10b00 -- 10bac9000 fd:05 1196511 [heap]"); + + assert_eq!(map.map.address.0, 0x10a00); + assert_eq!(map.map.address.1, 0x10b00); + assert_eq!(map.memory_range(), Some(Range::new(0x10a00, 0x10b00))); + assert_eq!(map.map.pathname, Heap); + assert!(map.map.perms == MMPermissions::NONE); + } + + { + // Vdso + let map = parse(b"10a00-10b00 r-wx- 10bac9000 fd:05 1196511 [vdso]"); + + assert_eq!(map.map.address.0, 0x10a00); + assert_eq!(map.map.address.1, 0x10b00); + assert_eq!(map.memory_range(), Some(Range::new(0x10a00, 0x10b00))); + assert_eq!(map.map.pathname, Vdso); + assert!( + map.map.perms + == MMPermissions::READ | MMPermissions::WRITE | MMPermissions::EXECUTE + ); + } + + { + // Unknown Special + let map = parse(b"10a00-10b00 r-wx- 10bac9000 fd:05 1196511 [asdfasd]"); + + assert_eq!(map.map.address.0, 0x10a00); + assert_eq!(map.map.address.1, 0x10b00); + assert_eq!(map.memory_range(), Some(Range::new(0x10a00, 0x10b00))); + assert_eq!(map.map.pathname, Other("asdfasd".into())); + assert!( + map.map.perms + == MMPermissions::READ | MMPermissions::WRITE | MMPermissions::EXECUTE + ); + } + + { + // Anonymous + let map = parse(b"10a00-10b00 -r- 10bac9000 fd:05 1196511 "); + + assert_eq!(map.map.address.0, 0x10a00); + assert_eq!(map.map.address.1, 0x10b00); + assert_eq!(map.memory_range(), Some(Range::new(0x10a00, 0x10b00))); + assert_eq!(map.map.pathname, Anonymous); + assert!(map.map.perms == MMPermissions::READ); + } + + /* + { + // Truncated defaults to anonymous + let map = parse(b"10a00-10b00"); + + assert_eq!(map.map.address.0, 0x10a00); + assert_eq!(map.map.address.1, 0x10b00); + assert_eq!(map.memory_range(), Some(Range::new(0x10a00, 0x10b00))); + assert_eq!(map.map.pathname, Anonymous); + assert!(map.map.perms == MMPermissions::NONE); + } + */ + + { + // Reversed ranges result in None for memory_range() + let map = parse(b"fffff-10000 -r- 10bac9000 fd:05 1196511 "); + + assert_eq!(map.map.address.0, 0xfffff); + assert_eq!(map.map.address.1, 0x10000); + assert_eq!(map.memory_range(), None); + } + + { + // Equal ranges are valid + let map = parse(b"fffff-fffff --- 10bac9000 fd:05 1196511 "); + + assert_eq!(map.map.address.0, 0xfffff); + assert_eq!(map.map.address.1, 0xfffff); + assert_eq!(map.memory_range(), Some(Range::new(0xfffff, 0xfffff))); + } + + { + // blank line + assert!(maybe_parse(b"").is_none()); + assert!(maybe_parse(b" ").is_none()); + } + + { + // bad addresses + let map = maybe_parse(b"-10b00 r-xp 10bac9000 fd:05 1196511 /usr/lib64/libtdb1.so"); + assert!(map.is_none()); + + let map = maybe_parse(b"10b00- r-xp 10bac9000 fd:05 1196511 /usr/lib64/libtdb1.so"); + assert!(map.is_none()); + + let map = maybe_parse(b"10b00 r-xp 10bac9000 fd:05 1196511 /usr/lib64/libtdb1.so"); + assert!(map.is_none()); + } + + { + // bad [stack:] + let map = maybe_parse(b"10a00-10b00 r-xp 10bac9000 fd:05 1196511 [stack:]"); + assert!(map.is_none()); + + let map = maybe_parse(b"10a00-10b00 r-xp 10bac9000 fd:05 1196511 [stack:a10]"); + assert!(map.is_none()); + } + } + + #[test] + fn test_module_list_overlap() { + let name1 = DumpString::new("module 1", Endian::Little); + let name2 = DumpString::new("module 2", Endian::Little); + let name3 = DumpString::new("module 3", Endian::Little); + let name4 = DumpString::new("module 4", Endian::Little); + let name5 = DumpString::new("module 5", Endian::Little); + let cv_record = Section::with_endian(Endian::Little) + .D32(md::CvSignature::Pdb70 as u32) // signature + // signature, a GUID + .D32(0xabcd1234) + .D16(0xf00d) + .D16(0xbeef) + .append_bytes(b"\x01\x02\x03\x04\x05\x06\x07\x08") + .D32(1) // age + .append_bytes(b"c:\\foo\\file.pdb\0"); // pdb_file_name + let module1 = SynthModule::new( + Endian::Little, + 0x100000000, + 0x4000, + &name1, + 0xb1054d2a, + 0x34571371, + Some(&STOCK_VERSION_INFO), + ) + .cv_record(&cv_record); + // module2 overlaps module1 exactly + let module2 = SynthModule::new( + Endian::Little, + 0x100000000, + 0x4000, + &name2, + 0xb1054d2a, + 0x34571371, + Some(&STOCK_VERSION_INFO), + ) + .cv_record(&cv_record); + // module3 overlaps module1 partially + let module3 = SynthModule::new( + Endian::Little, + 0x100000001, + 0x4000, + &name3, + 0xb1054d2a, + 0x34571371, + Some(&STOCK_VERSION_INFO), + ) + .cv_record(&cv_record); + // module4 is fully contained within module1 + let module4 = SynthModule::new( + Endian::Little, + 0x100000001, + 0x3000, + &name4, + 0xb1054d2a, + 0x34571371, + Some(&STOCK_VERSION_INFO), + ) + .cv_record(&cv_record); + // module5 is cool, though. + let module5 = SynthModule::new( + Endian::Little, + 0x100004000, + 0x4000, + &name5, + 0xb1054d2a, + 0x34571371, + Some(&STOCK_VERSION_INFO), + ) + .cv_record(&cv_record); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_module(module1) + .add_module(module2) + .add_module(module3) + .add_module(module4) + .add_module(module5) + .add(name1) + .add(name2) + .add(name3) + .add(name4) + .add(name5) + .add(cv_record); + let dump = read_synth_dump(dump).unwrap(); + let module_list = dump.get_stream::().unwrap(); + let modules = module_list.iter().collect::>(); + assert_eq!(modules.len(), 5); + assert_eq!(modules[0].base_address(), 0x100000000); + assert_eq!(modules[0].size(), 0x4000); + assert_eq!(modules[0].code_file(), "module 1"); + assert_eq!(modules[1].base_address(), 0x100000000); + assert_eq!(modules[1].size(), 0x4000); + assert_eq!(modules[1].code_file(), "module 2"); + assert_eq!(modules[2].base_address(), 0x100000001); + assert_eq!(modules[2].size(), 0x4000); + assert_eq!(modules[2].code_file(), "module 3"); + assert_eq!(modules[3].base_address(), 0x100000001); + assert_eq!(modules[3].size(), 0x3000); + assert_eq!(modules[3].code_file(), "module 4"); + assert_eq!(modules[4].base_address(), 0x100004000); + assert_eq!(modules[4].size(), 0x4000); + assert_eq!(modules[4].code_file(), "module 5"); + + // module_at_address should discard overlapping modules. + assert_eq!(module_list.by_addr().count(), 2); + assert_eq!( + module_list + .module_at_address(0x100001000) + .unwrap() + .code_file(), + "module 1" + ); + assert_eq!( + module_list + .module_at_address(0x100005000) + .unwrap() + .code_file(), + "module 5" + ); + } + + #[test] + fn test_memory_list() { + const CONTENTS: &[u8] = b"memory_contents"; + let memory = Memory::with_section( + Section::with_endian(Endian::Little).append_bytes(CONTENTS), + 0x309d68010bd21b2c, + ); + let dump = SynthMinidump::with_endian(Endian::Little).add_memory(memory); + let dump = read_synth_dump(dump).unwrap(); + let memory_list = dump.get_stream::>().unwrap(); + let regions = memory_list.iter().collect::>(); + assert_eq!(regions.len(), 1); + assert_eq!(regions[0].base_address, 0x309d68010bd21b2c); + assert_eq!(regions[0].size, CONTENTS.len() as u64); + assert_eq!(®ions[0].bytes, &CONTENTS); + } + + #[test] + fn test_memory64_list() { + const CONTENTS0: &[u8] = b"memory_contents"; + const CONTENTS1: &[u8] = b"another_block"; + let memory0 = Memory::with_section( + Section::with_endian(Endian::Little).append_bytes(CONTENTS0), + 0x309d68010bd21b2c, + ); + let memory1 = Memory::with_section( + Section::with_endian(Endian::Little).append_bytes(CONTENTS1), + 0x1234, + ); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_memory64(memory0) + .add_memory64(memory1); + let dump = read_synth_dump(dump).unwrap(); + let memory_list = dump.get_stream::>().unwrap(); + let regions = memory_list.iter().collect::>(); + assert_eq!(regions.len(), 2); + assert_eq!(regions[0].base_address, 0x309d68010bd21b2c); + assert_eq!(regions[0].size, CONTENTS0.len() as u64); + assert_eq!(®ions[0].bytes, &CONTENTS0); + + assert_eq!(regions[1].base_address, 0x1234); + assert_eq!(regions[1].size, CONTENTS1.len() as u64); + assert_eq!(®ions[1].bytes, &CONTENTS1); + } + + #[test] + fn test_memory_list_lifetimes() { + // A memory list should not own any of the minidump data. + const CONTENTS: &[u8] = b"memory_contents"; + let memory = Memory::with_section( + Section::with_endian(Endian::Little).append_bytes(CONTENTS), + 0x309d68010bd21b2c, + ); + let dump = SynthMinidump::with_endian(Endian::Little).add_memory(memory); + let dump = read_synth_dump(dump).unwrap(); + let mem_slices: Vec<&[u8]> = { + let mem_list: MinidumpMemoryList<'_> = dump.get_stream().unwrap(); + mem_list.iter().map(|mem| mem.bytes).collect() + }; + assert_eq!(mem_slices[0], CONTENTS); + } + + #[test] + fn test_memory_overflow() { + let memory1 = Memory::with_section( + Section::with_endian(Endian::Little).append_repeated(0, 2), + u64::MAX, + ); + let dump = SynthMinidump::with_endian(Endian::Little).add_memory(memory1); + let dump = read_synth_dump(dump).unwrap(); + let memory_list = dump.get_stream::>().unwrap(); + + assert!(memory_list.memory_at_address(u64::MAX).is_none()); + assert_eq!(memory_list.regions.len(), 1); + assert!(memory_list.regions[0].memory_range().is_none()); + } + + #[test] + fn test_memory_list_overlap() { + let memory1 = Memory::with_section( + Section::with_endian(Endian::Little).append_repeated(0, 0x1000), + 0x1000, + ); + // memory2 overlaps memory1 exactly + let memory2 = Memory::with_section( + Section::with_endian(Endian::Little).append_repeated(1, 0x1000), + 0x1000, + ); + // memory3 overlaps memory1 partially + let memory3 = Memory::with_section( + Section::with_endian(Endian::Little).append_repeated(2, 0x1000), + 0x1001, + ); + // memory4 is fully contained within memory1 + let memory4 = Memory::with_section( + Section::with_endian(Endian::Little).append_repeated(3, 0x100), + 0x1001, + ); + // memory5 is cool, though. + let memory5 = Memory::with_section( + Section::with_endian(Endian::Little).append_repeated(4, 0x1000), + 0x2000, + ); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_memory(memory1) + .add_memory(memory2) + .add_memory(memory3) + .add_memory(memory4) + .add_memory(memory5); + let dump = read_synth_dump(dump).unwrap(); + let memory_list = dump.get_stream::>().unwrap(); + let regions = memory_list.iter().collect::>(); + assert_eq!(regions.len(), 5); + assert_eq!(regions[0].base_address, 0x1000); + assert_eq!(regions[0].size, 0x1000); + assert_eq!(regions[1].base_address, 0x1000); + assert_eq!(regions[1].size, 0x1000); + assert_eq!(regions[2].base_address, 0x1001); + assert_eq!(regions[2].size, 0x1000); + assert_eq!(regions[3].base_address, 0x1001); + assert_eq!(regions[3].size, 0x100); + assert_eq!(regions[4].base_address, 0x2000); + assert_eq!(regions[4].size, 0x1000); + + // memory_at_address should discard overlapping regions. + assert_eq!(memory_list.by_addr().count(), 2); + let m1 = memory_list.memory_at_address(0x1a00).unwrap(); + assert_eq!(m1.base_address, 0x1000); + assert_eq!(m1.size, 0x1000); + assert_eq!(m1.bytes, &[0u8; 0x1000][..]); + let m2 = memory_list.memory_at_address(0x2a00).unwrap(); + assert_eq!(m2.base_address, 0x2000); + assert_eq!(m2.size, 0x1000); + assert_eq!(m2.bytes, &[4u8; 0x1000][..]); + } + + #[test] + fn test_misc_info() { + const PID: u32 = 0x1234abcd; + const PROCESS_TIMES: MiscFieldsProcessTimes = MiscFieldsProcessTimes { + process_create_time: 0xf0f0b0b0, + process_user_time: 0xf030a020, + process_kernel_time: 0xa010b420, + }; + + let mut misc = MiscStream::new(Endian::Little); + misc.process_id = Some(PID); + misc.process_times = Some(PROCESS_TIMES); + let dump = SynthMinidump::with_endian(Endian::Little).add_stream(misc); + let dump = read_synth_dump(dump).unwrap(); + let misc = dump.get_stream::().unwrap(); + assert_eq!(misc.raw.process_id(), Some(&PID)); + assert_eq!( + misc.process_create_time().unwrap(), + systemtime_from_timestamp(PROCESS_TIMES.process_create_time as u64).unwrap() + ); + assert_eq!( + *misc.raw.process_user_time().unwrap(), + PROCESS_TIMES.process_user_time + ); + assert_eq!( + *misc.raw.process_kernel_time().unwrap(), + PROCESS_TIMES.process_kernel_time + ); + } + + #[test] + fn test_misc_info_large() { + const PID: u32 = 0x1234abcd; + const PROCESS_TIMES: MiscFieldsProcessTimes = MiscFieldsProcessTimes { + process_create_time: 0xf0f0b0b0, + process_user_time: 0xf030a020, + process_kernel_time: 0xa010b420, + }; + let mut misc = MiscStream::new(Endian::Little); + misc.process_id = Some(PID); + misc.process_times = Some(PROCESS_TIMES); + // Make it larger. + misc.pad_to_size = Some(mem::size_of::() + 32); + let dump = SynthMinidump::with_endian(Endian::Little).add_stream(misc); + let dump = read_synth_dump(dump).unwrap(); + let misc = dump.get_stream::().unwrap(); + assert_eq!(misc.raw.process_id(), Some(&PID)); + assert_eq!( + misc.process_create_time().unwrap(), + systemtime_from_timestamp(PROCESS_TIMES.process_create_time as u64).unwrap(), + ); + assert_eq!( + *misc.raw.process_user_time().unwrap(), + PROCESS_TIMES.process_user_time + ); + assert_eq!( + *misc.raw.process_kernel_time().unwrap(), + PROCESS_TIMES.process_kernel_time + ); + } + + fn ascii_string_to_utf16(input: &str) -> Vec { + input.chars().map(|c| c as u16).collect() + } + + #[test] + fn test_misc_info_5() { + // MISC_INFO fields + const PID: u32 = 0x1234abcd; + const PROCESS_TIMES: MiscFieldsProcessTimes = MiscFieldsProcessTimes { + process_create_time: 0xf0f0b0b0, + process_user_time: 0xf030a020, + process_kernel_time: 0xa010b420, + }; + + // MISC_INFO_2 fields + const POWER_INFO: MiscFieldsPowerInfo = MiscFieldsPowerInfo { + processor_max_mhz: 0x45873234, + processor_current_mhz: 0x2134018a, + processor_mhz_limit: 0x3423aead, + processor_max_idle_state: 0x123aef12, + processor_current_idle_state: 0x1205af3a, + }; + + // MISC_INFO_3 fields + const PROCESS_INTEGRITY_LEVEL: u32 = 0x35603403; + const PROCESS_EXECUTE_FLAGS: u32 = 0xa4e09da1; + const PROTECTED_PROCESS: u32 = 0x12345678; + + let mut standard_name = [0; 32]; + let mut daylight_name = [0; 32]; + let bare_standard_name = ascii_string_to_utf16("Pacific Standard Time"); + let bare_daylight_name = ascii_string_to_utf16("Pacific Daylight Time"); + standard_name[..bare_standard_name.len()].copy_from_slice(&bare_standard_name); + daylight_name[..bare_daylight_name.len()].copy_from_slice(&bare_daylight_name); + + const TIME_ZONE_ID: u32 = 2; + const BIAS: i32 = 2; + const STANDARD_BIAS: i32 = 1; + const DAYLIGHT_BIAS: i32 = -60; + const STANDARD_DATE: md::SYSTEMTIME = md::SYSTEMTIME { + year: 0, + month: 11, + day_of_week: 2, + day: 1, + hour: 2, + minute: 33, + second: 51, + milliseconds: 123, + }; + const DAYLIGHT_DATE: md::SYSTEMTIME = md::SYSTEMTIME { + year: 0, + month: 3, + day_of_week: 4, + day: 2, + hour: 3, + minute: 41, + second: 19, + milliseconds: 512, + }; + + let time_zone = MiscFieldsTimeZone { + time_zone_id: TIME_ZONE_ID, + time_zone: md::TIME_ZONE_INFORMATION { + bias: BIAS, + standard_bias: STANDARD_BIAS, + daylight_bias: DAYLIGHT_BIAS, + daylight_name, + standard_name, + standard_date: STANDARD_DATE.clone(), + daylight_date: DAYLIGHT_DATE.clone(), + }, + }; + + // MISC_INFO_4 fields + let mut build_string = [0; 260]; + let mut dbg_bld_str = [0; 40]; + let bare_build_string = ascii_string_to_utf16("hello"); + let bare_dbg_bld_str = ascii_string_to_utf16("world"); + build_string[..bare_build_string.len()].copy_from_slice(&bare_build_string); + dbg_bld_str[..bare_dbg_bld_str.len()].copy_from_slice(&bare_dbg_bld_str); + + let build_strings = MiscFieldsBuildString { + build_string, + dbg_bld_str, + }; + + // MISC_INFO_5 fields + const SIZE_OF_INFO: u32 = mem::size_of::() as u32; + const CONTEXT_SIZE: u32 = 0x1234523f; + const PROCESS_COOKIE: u32 = 0x1234dfe0; + const KNOWN_FEATURE_IDX: usize = md::XstateFeatureIndex::LEGACY_SSE as usize; + const UNKNOWN_FEATURE_IDX: usize = 39; + let mut enabled_features = 0; + let mut features = [md::XSTATE_FEATURE::default(); 64]; + // One known feature and one unknown feature. + enabled_features |= 1 << KNOWN_FEATURE_IDX; + features[KNOWN_FEATURE_IDX] = md::XSTATE_FEATURE { + offset: 0, + size: 140, + }; + enabled_features |= 1 << UNKNOWN_FEATURE_IDX; + features[UNKNOWN_FEATURE_IDX] = md::XSTATE_FEATURE { + offset: 320, + size: 1100, + }; + let misc_5 = MiscInfo5Fields { + xstate_data: md::XSTATE_CONFIG_FEATURE_MSC_INFO { + size_of_info: SIZE_OF_INFO, + context_size: CONTEXT_SIZE, + enabled_features, + features, + }, + process_cookie: Some(PROCESS_COOKIE), + }; + + let mut misc = MiscStream::new(Endian::Little); + misc.process_id = Some(PID); + misc.process_times = Some(PROCESS_TIMES); + misc.power_info = Some(POWER_INFO); + misc.process_integrity_level = Some(PROCESS_INTEGRITY_LEVEL); + misc.protected_process = Some(PROTECTED_PROCESS); + misc.process_execute_flags = Some(PROCESS_EXECUTE_FLAGS); + misc.time_zone = Some(time_zone); + misc.build_strings = Some(build_strings); + misc.misc_5 = Some(misc_5); + + let dump = SynthMinidump::with_endian(Endian::Little).add_stream(misc); + let dump = read_synth_dump(dump).unwrap(); + let misc = dump.get_stream::().unwrap(); + + // MISC_INFO fields + assert_eq!(misc.raw.process_id(), Some(&PID)); + assert_eq!( + misc.process_create_time().unwrap(), + systemtime_from_timestamp(PROCESS_TIMES.process_create_time as u64).unwrap() + ); + assert_eq!( + *misc.raw.process_user_time().unwrap(), + PROCESS_TIMES.process_user_time + ); + assert_eq!( + *misc.raw.process_kernel_time().unwrap(), + PROCESS_TIMES.process_kernel_time + ); + + // MISC_INFO_2 fields + assert_eq!( + *misc.raw.processor_max_mhz().unwrap(), + POWER_INFO.processor_max_mhz, + ); + assert_eq!( + *misc.raw.processor_current_mhz().unwrap(), + POWER_INFO.processor_current_mhz, + ); + assert_eq!( + *misc.raw.processor_mhz_limit().unwrap(), + POWER_INFO.processor_mhz_limit, + ); + assert_eq!( + *misc.raw.processor_max_idle_state().unwrap(), + POWER_INFO.processor_max_idle_state, + ); + assert_eq!( + *misc.raw.processor_current_idle_state().unwrap(), + POWER_INFO.processor_current_idle_state, + ); + + // MISC_INFO_3 fields + assert_eq!(*misc.raw.time_zone_id().unwrap(), TIME_ZONE_ID); + let time_zone = misc.raw.time_zone().unwrap(); + assert_eq!(time_zone.bias, BIAS); + assert_eq!(time_zone.standard_bias, STANDARD_BIAS); + assert_eq!(time_zone.daylight_bias, DAYLIGHT_BIAS); + assert_eq!(time_zone.standard_date, STANDARD_DATE); + assert_eq!(time_zone.daylight_date, DAYLIGHT_DATE); + assert_eq!(time_zone.standard_name, standard_name); + assert_eq!(time_zone.daylight_name, daylight_name); + + // MISC_INFO_4 fields + assert_eq!(*misc.raw.build_string().unwrap(), build_string,); + assert_eq!(*misc.raw.dbg_bld_str().unwrap(), dbg_bld_str,); + + // MISC_INFO_5 fields + assert_eq!(*misc.raw.process_cookie().unwrap(), PROCESS_COOKIE,); + + let xstate = misc.raw.xstate_data().unwrap(); + assert_eq!(xstate.size_of_info, SIZE_OF_INFO); + assert_eq!(xstate.context_size, CONTEXT_SIZE); + assert_eq!(xstate.enabled_features, enabled_features); + assert_eq!(xstate.features, features); + + let mut xstate_iter = xstate.iter(); + assert_eq!( + xstate_iter.next().unwrap(), + (KNOWN_FEATURE_IDX, features[KNOWN_FEATURE_IDX]), + ); + assert_eq!( + xstate_iter.next().unwrap(), + (UNKNOWN_FEATURE_IDX, features[UNKNOWN_FEATURE_IDX]), + ); + assert_eq!(xstate_iter.next(), None); + assert_eq!(xstate_iter.next(), None); + } + + #[test] + fn test_elf_build_id() { + // Add a module with a long ELF build id + let name1 = DumpString::new("module 1", Endian::Little); + const MODULE1_BUILD_ID: &[u8] = &[ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, + 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + ]; + let cv_record1 = Section::with_endian(Endian::Little) + .D32(md::CvSignature::Elf as u32) // signature + .append_bytes(MODULE1_BUILD_ID); + let module1 = SynthModule::new( + Endian::Little, + 0x100000000, + 0x4000, + &name1, + 0xb1054d2a, + 0x34571371, + Some(&STOCK_VERSION_INFO), + ) + .cv_record(&cv_record1); + // Add a module with a short ELF build id + let name2 = DumpString::new("module 2", Endian::Little); + const MODULE2_BUILD_ID: &[u8] = &[0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07]; + let cv_record2 = Section::with_endian(Endian::Little) + .D32(md::CvSignature::Elf as u32) // signature + .append_bytes(MODULE2_BUILD_ID); + let module2 = SynthModule::new( + Endian::Little, + 0x200000000, + 0x4000, + &name2, + 0xb1054d2a, + 0x34571371, + Some(&STOCK_VERSION_INFO), + ) + .cv_record(&cv_record2); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_module(module1) + .add_module(module2) + .add(name1) + .add(cv_record1) + .add(name2) + .add(cv_record2); + let dump = read_synth_dump(dump).unwrap(); + let module_list = dump.get_stream::().unwrap(); + let modules = module_list.iter().collect::>(); + assert_eq!(modules.len(), 2); + assert_eq!(modules[0].base_address(), 0x100000000); + assert_eq!(modules[0].code_file(), "module 1"); + // The full build ID. + assert_eq!( + modules[0].code_identifier().unwrap(), + CodeId::new("000102030405060708090a0b0c0d0e0f1011121314151617".to_string()) + ); + assert_eq!(modules[0].debug_file().unwrap(), "module 1"); + // The first 16 bytes of the build ID interpreted as a GUID. + assert_eq!( + modules[0].debug_identifier().unwrap(), + DebugId::from_breakpad("030201000504070608090A0B0C0D0E0F0").unwrap() + ); + + assert_eq!(modules[1].base_address(), 0x200000000); + assert_eq!(modules[1].code_file(), "module 2"); + // The full build ID. + assert_eq!( + modules[1].code_identifier().unwrap(), + CodeId::new("0001020304050607".to_string()) + ); + assert_eq!(modules[1].debug_file().unwrap(), "module 2"); + // The first 16 bytes of the build ID interpreted as a GUID, padded with + // zeroes in this case. + assert_eq!( + modules[1].debug_identifier().unwrap(), + DebugId::from_breakpad("030201000504070600000000000000000").unwrap() + ); + } + + #[test] + fn test_os() { + let dump = SynthMinidump::with_endian(Endian::Little).add_system_info( + SystemInfo::new(Endian::Little).set_platform_id(PlatformId::MacOs as u32), + ); + + let dump = read_synth_dump(dump).unwrap(); + let system_info = dump.get_stream::().unwrap(); + assert_eq!(system_info.os, Os::MacOs); + } + + #[test] + fn test_macos_ids() { + let name = DumpString::new("macos module", Endian::Little); + let cv_record = Section::with_endian(Endian::Little) + // signature + .D32(md::CvSignature::Pdb70 as u32) + // signature, a GUID + .D32(0xaabbccdd) + .D16(0xeeff) + .D16(0x0011) + .append_bytes(b"\x22\x33\x44\x55\x66\x77\x88\x99") + // age, breakpad writes 0 + .D32(0) + // pdb_file_name + .append_bytes(b"helpivecrashed.dylib\0"); + let module = SynthModule::new( + Endian::Little, + 0x100000000, + 0x4000, + &name, + 0xb1054d2a, + 0x34571371, + Some(&STOCK_VERSION_INFO), + ) + .cv_record(&cv_record); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_system_info( + SystemInfo::new(Endian::Little).set_platform_id(PlatformId::MacOs as u32), + ) + .add_module(module) + .add(name) + .add(cv_record); + let dump = read_synth_dump(dump).unwrap(); + let system_info = dump.get_stream::().unwrap(); + assert_eq!(system_info.os, Os::MacOs); + + let module_list = dump.get_stream::().unwrap(); + let modules = module_list.iter().collect::>(); + assert_eq!(modules.len(), 1); + // should be the uuid stored in cv record + assert_eq!( + modules[0].code_identifier().unwrap(), + CodeId::new("AABBCCDDEEFF00112233445566778899".to_owned()) + ); + // should match code identifier, but with the age appended to it + assert_eq!( + modules[0].debug_identifier().unwrap(), + DebugId::from_breakpad("AABBCCDDEEFF001122334455667788990").unwrap() + ); + assert_eq!(modules[0].code_file(), "macos module"); + assert_eq!(modules[0].debug_file().unwrap(), "helpivecrashed.dylib"); + } + + #[test] + fn test_windows_code_id_no_cv() { + let name = DumpString::new("windows module", Endian::Little); + let module = SynthModule::new( + Endian::Little, + 0x100000000, + 0x4000, // size of image + &name, + 0xb105_4d2a, // datetime + 0x34571371, + Some(&STOCK_VERSION_INFO), + ); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_system_info( + SystemInfo::new(Endian::Little) + .set_platform_id(PlatformId::VER_PLATFORM_WIN32_NT as u32), + ) + .add_module(module) + .add(name); + let dump = read_synth_dump(dump).unwrap(); + let system_info = dump.get_stream::().unwrap(); + assert_eq!(system_info.os, Os::Windows); + + let module_list = dump.get_stream::().unwrap(); + let modules = module_list.iter().collect::>(); + assert_eq!(modules.len(), 1); + // should match datetime + size of image + assert_eq!( + modules[0].code_identifier().unwrap(), + CodeId::new("B1054D2A4000".to_owned()) + ); + } + + #[test] + fn test_null_id() { + // Add a module with an ELF build id of nothing but zeros + let name1 = DumpString::new("module 1", Endian::Little); + const MODULE1_BUILD_ID: &[u8] = &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + let cv_record1 = Section::with_endian(Endian::Little) + .D32(md::CvSignature::Elf as u32) // signature + .append_bytes(MODULE1_BUILD_ID); + let module1 = SynthModule::new( + Endian::Little, + 0x100000000, + 0x4000, + &name1, + 0xb1054d2a, + 0x34571371, + Some(&STOCK_VERSION_INFO), + ) + .cv_record(&cv_record1); + + // Add a module with a PDB70 build id of nothing but zeros + let name2 = DumpString::new("module 2", Endian::Little); + let cv_record2 = Section::with_endian(Endian::Little) + // signature + .D32(md::CvSignature::Pdb70 as u32) + // signature, a GUID + .D32(0x0) + .D16(0x0) + .D16(0x0) + .append_bytes(b"\0\0\0\0\0\0\0\0") + // age, breakpad writes 0 + .D32(0) + // pdb_file_name + .append_bytes(b"\0"); + let module2 = SynthModule::new( + Endian::Little, + 0x100000000, + 0x4000, + &name2, + 0xb1054d2a, + 0x34571371, + Some(&STOCK_VERSION_INFO), + ) + .cv_record(&cv_record2); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_module(module1) + .add_module(module2) + .add(name1) + .add(cv_record1) + .add(name2) + .add(cv_record2); + + let dump = read_synth_dump(dump).unwrap(); + let module_list = dump.get_stream::().unwrap(); + let modules = module_list.iter().collect::>(); + + assert!(modules[0].debug_identifier().is_none()); + assert!(modules[1].debug_identifier().is_none()); + } + + #[test] + fn test_thread_list_x86() { + let context = minidump_synth::x86_context(Endian::Little, 0xabcd1234, 0x1010); + let stack = Memory::with_section( + Section::with_endian(Endian::Little).append_repeated(0, 0x1000), + 0x1000, + ); + let arch = md::ProcessorArchitecture::PROCESSOR_ARCHITECTURE_INTEL as u16; + let system_info = SystemInfo::new(Endian::Little).set_processor_architecture(arch); + let thread = Thread::new(Endian::Little, 0x1234, &stack, &context); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_thread(thread) + .add(context) + .add_memory(stack) + .add_system_info(system_info); + let dump = read_synth_dump(dump).unwrap(); + let mut thread_list = dump.get_stream::>().unwrap(); + let system_info = dump.get_stream::().unwrap(); + let misc_info = dump.get_stream::().ok(); + assert_eq!(thread_list.threads.len(), 1); + let mut thread = thread_list.threads.pop().unwrap(); + assert_eq!(thread.raw.thread_id, 0x1234); + let context = thread + .context(&system_info, misc_info.as_ref()) + .expect("Should have a thread context"); + match &context.raw { + MinidumpRawContext::X86(raw) => { + assert_eq!(raw.eip, 0xabcd1234); + assert_eq!(raw.esp, 0x1010); + } + _ => panic!("Got unexpected raw context type!"), + } + let stack = thread.stack.take().expect("Should have stack memory"); + assert_eq!(stack.base_address, 0x1000); + assert_eq!(stack.size, 0x1000); + } + + #[test] + fn test_thread_list_amd64() { + let context = + minidump_synth::amd64_context(Endian::Little, 0x1234abcd1234abcd, 0x1000000010000000); + let stack = Memory::with_section( + Section::with_endian(Endian::Little).append_repeated(0, 0x1000), + 0x1000000010000000, + ); + let arch = md::ProcessorArchitecture::PROCESSOR_ARCHITECTURE_AMD64 as u16; + let system_info = SystemInfo::new(Endian::Little).set_processor_architecture(arch); + let thread = Thread::new(Endian::Little, 0x1234, &stack, &context); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_thread(thread) + .add(context) + .add_memory(stack) + .add_system_info(system_info); + let dump = read_synth_dump(dump).unwrap(); + let mut thread_list = dump.get_stream::>().unwrap(); + let system_info = dump.get_stream::().unwrap(); + let misc_info = dump.get_stream::().ok(); + assert_eq!(thread_list.threads.len(), 1); + let mut thread = thread_list.threads.pop().unwrap(); + assert_eq!(thread.raw.thread_id, 0x1234); + let context = thread + .context(&system_info, misc_info.as_ref()) + .expect("Should have a thread context"); + match &context.raw { + MinidumpRawContext::Amd64(raw) => { + assert_eq!(raw.rip, 0x1234abcd1234abcd); + assert_eq!(raw.rsp, 0x1000000010000000); + } + _ => panic!("Got unexpected raw context type!"), + } + let stack = thread.stack.take().expect("Should have stack memory"); + assert_eq!(stack.base_address, 0x1000000010000000); + assert_eq!(stack.size, 0x1000); + } + + #[test] + fn test_crashpad_info_missing() { + let dump = SynthMinidump::with_endian(Endian::Little); + let dump = read_synth_dump(dump).unwrap(); + + assert!(matches!( + dump.get_stream::(), + Err(Error::StreamNotFound) + )); + } + + #[test] + fn test_crashpad_info_ids() { + let report_id = GUID { + data1: 1, + data2: 2, + data3: 3, + data4: [4, 5, 6, 7, 8, 9, 10, 11], + }; + + let client_id = GUID { + data1: 11, + data2: 10, + data3: 9, + data4: [8, 7, 6, 5, 4, 3, 2, 1], + }; + + let crashpad_info = CrashpadInfo::new(Endian::Little) + .report_id(report_id) + .client_id(client_id); + + let dump = SynthMinidump::with_endian(Endian::Little).add_crashpad_info(crashpad_info); + let dump = read_synth_dump(dump).unwrap(); + + let crashpad_info = dump.get_stream::().unwrap(); + + assert_eq!(crashpad_info.raw.report_id, report_id); + assert_eq!(crashpad_info.raw.client_id, client_id); + } + + #[test] + fn test_crashpad_info_annotations() { + let module = ModuleCrashpadInfo::new(42, Endian::Little) + .add_list_annotation("annotation") + .add_simple_annotation("simple", "module") + .add_annotation_object("string", AnnotationValue::String("value".to_owned())) + .add_annotation_object("invalid", AnnotationValue::Invalid) + .add_annotation_object("custom", AnnotationValue::Custom(0x8001, vec![42])); + + let crashpad_info = CrashpadInfo::new(Endian::Little) + .add_module(module) + .add_simple_annotation("simple", "info"); + + let dump = SynthMinidump::with_endian(Endian::Little).add_crashpad_info(crashpad_info); + let dump = read_synth_dump(dump).unwrap(); + + let crashpad_info = dump.get_stream::().unwrap(); + let module = &crashpad_info.module_list[0]; + + assert_eq!(crashpad_info.simple_annotations["simple"], "info"); + assert_eq!(module.module_index, 42); + assert_eq!(module.list_annotations, vec!["annotation".to_owned()]); + assert_eq!(module.simple_annotations["simple"], "module"); + assert_eq!( + module.annotation_objects["string"], + MinidumpAnnotation::String("value".to_owned()) + ); + assert_eq!( + module.annotation_objects["invalid"], + MinidumpAnnotation::Invalid + ); + } + + #[test] + fn test_exception_x86() { + // Defaults to x86 + let system_info = SystemInfo::new(Endian::Little); + + let mut exception = Exception::new(Endian::Little); + + // Check that we clear the erroneous high bits for 32-bit + exception.exception_record.exception_address = 0xf0e1_d2c3_b4a5_9687; + // FIXME: test other fields too + + let dump = SynthMinidump::with_endian(Endian::Little) + .add_system_info(system_info) + .add_exception(exception); + + let dump = read_synth_dump(dump).unwrap(); + + let system_stream = dump.get_stream::().unwrap(); + let exception_stream = dump.get_stream::().unwrap(); + assert_eq!( + exception_stream.get_crash_address(system_stream.os, system_stream.cpu), + 0xb4a5_9687 + ); + } + + #[test] + fn test_exception_x64() { + // Defaults to x86 + let system_info = SystemInfo::new(Endian::Little) + .set_processor_architecture(ProcessorArchitecture::PROCESSOR_ARCHITECTURE_AMD64 as u16); + + let mut exception = Exception::new(Endian::Little); + + // Check that we don't truncate this on 64-bit + exception.exception_record.exception_address = 0xf0e1_d2c3_b4a5_9687; + // FIXME: test other fields too + + let dump = SynthMinidump::with_endian(Endian::Little) + .add_system_info(system_info) + .add_exception(exception); + + let dump = read_synth_dump(dump).unwrap(); + + let system_stream = dump.get_stream::().unwrap(); + let exception_stream = dump.get_stream::().unwrap(); + assert_eq!( + exception_stream.get_crash_address(system_stream.os, system_stream.cpu), + 0xf0e1_d2c3_b4a5_9687 + ); + } + + #[test] + fn test_fuzzed_oom() { + // https://github.com/rust-minidump/rust-minidump/issues/381 + let data = b"MDMP\x93\xa7\x00\x00\x00\xffffdYfffff@\n\nfp\n\xbb\xff\xff\xff\n\xff\n"; + assert!(Minidump::read(data.as_ref()).is_err()); + + // https://github.com/getsentry/symbolic/issues/478 + let data = b"MDMP\x93\xa7\x00\x00\r\x00\x00\x00 \xff\xff\xff\xff\xff\xff\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; + assert!(Minidump::read(data.as_ref()).is_err()); + } + + #[test] + fn test_empty_module() { + let name = DumpString::new("/SYSV00000000 (deleted)", Endian::Little); + let module = SynthModule::new( + Endian::Little, + 0x7f602915e000, + 0x26000, + &name, + 0x0, + 0x0, + // All of these are completely zeroed out in the wild. + Some(&md::VS_FIXEDFILEINFO { + signature: 0, + struct_version: 0, + file_version_hi: 0, + file_version_lo: 0, + product_version_hi: 0, + product_version_lo: 0, + file_flags_mask: 0, + file_flags: 0, + file_os: 0, + file_type: 0, + file_subtype: 0, + file_date_hi: 0, + file_date_lo: 0, + }), + ); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_module(module) + .add(name); + let dump = read_synth_dump(dump).unwrap(); + let module_list = dump.get_stream::().unwrap(); + let modules = module_list.iter().collect::>(); + assert_eq!(modules.len(), 1); + assert_eq!(modules[0].code_identifier(), None); + assert_eq!(modules[0].debug_identifier(), None); + assert_eq!(modules[0].code_file(), "/SYSV00000000 (deleted)"); + assert_eq!(modules[0].debug_file(), None); + assert_eq!(modules[0].raw.base_of_image, 0x7f602915e000); + assert_eq!(modules[0].raw.size_of_image, 0x26000); + } + + #[test] + fn test_handle_data_stream() { + const HANDLE_VALUE: u64 = 123; + const TYPE_NAME: &str = "This is a type name"; + const OBJECT_NAME: &str = "And this is an object name"; + + let type_name = DumpString::new(TYPE_NAME, Endian::Little); + let object_name = DumpString::new(OBJECT_NAME, Endian::Little); + let handle = SynthHandleDescriptor::new( + Endian::Little, + HANDLE_VALUE, + Some(&type_name), + Some(&object_name), + 0xf00ff00f, + 0xcafecafe, + 0xcacacaca, + 0xbeefbeef, + ); + let dump = SynthMinidump::with_endian(Endian::Little) + .add_handle_descriptor(handle) + .add(type_name) + .add(object_name); + let dump = read_synth_dump(dump).unwrap(); + let handle_data_stream = dump + .get_stream::() + .expect("The HANDLE_DATA_STREAM must be present"); + let handles = handle_data_stream.iter().collect::>(); + assert_eq!(handles.len(), 1); + assert_eq!( + handles[0] + .raw + .handle() + .expect("The `handle` field must be present"), + &HANDLE_VALUE + ); + assert_eq!( + handles[0] + .type_name + .as_ref() + .expect("The `type_name` field must be populated"), + TYPE_NAME + ); + assert_eq!( + handles[0] + .object_name + .as_ref() + .expect("The `object_name` field must be populated"), + OBJECT_NAME + ); + } + + #[test] + fn test_windows_status_code() { + let address = 0x1234_5678_u64; + let amd64_system_info = SystemInfo::new(Endian::Little) + .set_processor_architecture(ProcessorArchitecture::PROCESSOR_ARCHITECTURE_AMD64 as u16) + .set_platform_id(PlatformId::VER_PLATFORM_WIN32_NT as u32); + let mut exception = Exception::new(Endian::Little); + exception.exception_record.exception_code = + err::ExceptionCodeWindows::EXCEPTION_IN_PAGE_ERROR as u32; + exception.exception_record.exception_address = address; + exception.exception_record.number_parameters = 3; + exception.exception_record.exception_information[0] = + err::ExceptionCodeWindowsInPageErrorType::WRITE as u64; + exception.exception_record.exception_information[1] = address; + exception.exception_record.exception_information[2] = + err::NtStatusWindows::STATUS_DISK_FULL as u64; + + let dump = SynthMinidump::with_endian(Endian::Little) + .add_system_info(amd64_system_info) + .add_exception(exception); + let dump = read_synth_dump(dump).unwrap(); + let system_stream = dump.get_stream::().unwrap(); + let exception_stream = dump.get_stream::().unwrap(); + assert_eq!( + exception_stream.get_crash_reason(system_stream.os, system_stream.cpu), + CrashReason::WindowsInPageError( + err::ExceptionCodeWindowsInPageErrorType::WRITE, + NtStatusWindows::STATUS_DISK_FULL as u64 + ) + ); + + // Let's try again but for 32-bit x86 + let x86_system_info = SystemInfo::new(Endian::Little) + .set_processor_architecture(ProcessorArchitecture::PROCESSOR_ARCHITECTURE_INTEL as u16) + .set_platform_id(PlatformId::VER_PLATFORM_WIN32_NT as u32); + let mut exception = Exception::new(Endian::Little); + exception.exception_record.exception_code = + err::ExceptionCodeWindows::EXCEPTION_IN_PAGE_ERROR as u32; + exception.exception_record.exception_address = address; + exception.exception_record.number_parameters = 3; + exception.exception_record.exception_information[0] = + err::ExceptionCodeWindowsInPageErrorType::WRITE as u64; + exception.exception_record.exception_information[1] = address; + // Sign extend the error code like 32-bit windbg.dll does + exception.exception_record.exception_information[2] = + 0xffff_ffff_0000_0000 | (err::NtStatusWindows::STATUS_DISK_FULL as u64); + + let dump = SynthMinidump::with_endian(Endian::Little) + .add_system_info(x86_system_info) + .add_exception(exception); + let dump = read_synth_dump(dump).unwrap(); + let system_stream = dump.get_stream::().unwrap(); + let exception_stream = dump.get_stream::().unwrap(); + assert_eq!( + exception_stream.get_crash_reason(system_stream.os, system_stream.cpu), + CrashReason::WindowsInPageError( + err::ExceptionCodeWindowsInPageErrorType::WRITE, + NtStatusWindows::STATUS_DISK_FULL as u64 + ) + ); + } + + #[test] + fn test_linux_abort_si_code() { + let amd64_system_info = SystemInfo::new(Endian::Little) + .set_processor_architecture(ProcessorArchitecture::PROCESSOR_ARCHITECTURE_AMD64 as u16) + .set_platform_id(PlatformId::Linux as u32); + let mut exception = Exception::new(Endian::Little); + exception.exception_record.exception_code = err::ExceptionCodeLinux::SIGABRT as u32; + exception.exception_record.exception_flags = err::ExceptionCodeLinuxSicode::SI_TKILL as u32; + + let dump = SynthMinidump::with_endian(Endian::Little) + .add_system_info(amd64_system_info) + .add_exception(exception); + let dump = read_synth_dump(dump).unwrap(); + let system_stream = dump.get_stream::().unwrap(); + let exception_stream = dump.get_stream::().unwrap(); + assert_eq!( + exception_stream + .get_crash_reason(system_stream.os, system_stream.cpu) + .to_string(), + "SIGABRT / SI_TKILL" + ); + } +} diff --git a/third_party/rust/minidump/src/strings.rs b/third_party/rust/minidump/src/strings.rs new file mode 100644 index 000000000000..eb3c25b1fa36 --- /dev/null +++ b/third_party/rust/minidump/src/strings.rs @@ -0,0 +1,178 @@ +use std::borrow::{Borrow, Cow}; +use std::ops::Deref; +use std::str::Utf8Error; + +/// OsStr, but specifically for Linux (since we aren't always processing native dumps). +#[derive(Debug, PartialOrd, Ord, Eq, PartialEq)] +pub struct LinuxOsStr([u8]); + +/// OsString, but specifically for Linux (since we aren't always processing native dumps). +#[derive(Default, Debug, PartialOrd, Ord, Eq, PartialEq, Clone)] +pub struct LinuxOsString(Vec); + +impl LinuxOsStr { + pub fn new() -> &'static Self { + Self::from_bytes(b"") + } + + pub fn from_bytes(inner: &[u8]) -> &Self { + // This is the idiom std uses for creating a type that wraps a slice. + // Yes, there really isn't a way to do this without unsafe. No, it's + // not at all a safety concern. + unsafe { &*(inner as *const [u8] as *const LinuxOsStr) } + } + + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// Tries to interpret the LinuxOsStr as a utf8 str. + /// + /// While linux OsStrs are "arbitrary bytes" in general, there are often + /// parts that are known to be utf8 (ascii even). + /// + /// For instance, when parsing /proc/self/mem, most of the line is ascii + /// like "r-xp" or "1a23-4fe2". However the "path" at the end of each line + /// is a true LinuxOsStr and may not be proper utf8. + pub fn to_str(&self) -> Result<&str, Utf8Error> { + std::str::from_utf8(self) + } + + /// Converts to a utf8 string lossily (uses the usual std lossy algorithm). + pub fn to_string_lossy(&self) -> Cow { + // Ok so this is the end of the line for dancing around and acting + // like we can just be fine with Linux OS strings being arbitrary bags + // of bytes. We need some way to print this value in a reasonable way, + // and the best precedent I can find for that is std::Path::display. + // This wraps the a Path (which is just an OsStr) and provides a + // Display impl. + // + // What does this Display impl do..? + // + // It just calls from_utf8_lossy. + // + // Whelp. Ok. + // + // (Strictly speaking it wraps it up in the internal/unstable + // Utf8Lossy iterator so it avoids the allocation, but we don't + // have that luxury, so we might as well make the allocation/conversion + // explicit.) + String::from_utf8_lossy(self.as_bytes()) + } + + // ~Copies of a bunch of string APIs since [u8] doesn't have them (reasonably) + pub fn split_once(&self, separator: u8) -> Option<(&LinuxOsStr, &LinuxOsStr)> { + self.iter().position(|&b| b == separator).map(|idx| { + ( + Self::from_bytes(&self[..idx]), + Self::from_bytes(&self[idx + 1..]), + ) + }) + } + pub fn rsplit_once(&self, separator: u8) -> Option<(&LinuxOsStr, &LinuxOsStr)> { + self.iter().rposition(|&b| b == separator).map(|idx| { + ( + Self::from_bytes(&self[..idx]), + Self::from_bytes(&self[idx + 1..]), + ) + }) + } + + pub fn split(&self, separator: u8) -> impl Iterator { + self.as_bytes() + .split(move |&b| b == separator) + .map(LinuxOsStr::from_bytes) + } + + pub fn split_ascii_whitespace(&self) -> impl Iterator { + // Quick and dirty impl: just split on every individual whitespace + // char but discard all the empty substrings. + self.as_bytes() + .split(|b| b.is_ascii_whitespace()) + .filter(|slice| !slice.is_empty()) + .map(LinuxOsStr::from_bytes) + } + + pub fn lines(&self) -> impl Iterator { + // Intentionally doesn't mess around with stuff like \r + // since we're processing files generated by the OS, but maybe + // this will be a problem later? + self.split(b'\n') + } + + pub fn trim_ascii_whitespace(&self) -> &LinuxOsStr { + let input = self.as_bytes(); + + let mut first = None; + let mut last = None; + + // Find first non-whitespace index + for (i, &c) in input.iter().enumerate() { + if !c.is_ascii_whitespace() { + first = Some(i); + break; + } + } + + // Find last non-whitespace index + for (i, &c) in input.iter().enumerate().rev() { + if !c.is_ascii_whitespace() { + last = Some(i); + break; + } + } + + if let (Some(first), Some(last)) = (first, last) { + Self::from_bytes(&input[first..=last]) + } else { + // string was entirely whitespace, return an empty string starting + // at its position (so that it's still strictly a substring). + Self::from_bytes(&input[0..0]) + } + } +} + +impl LinuxOsString { + /// Create a new LinuxOsString from an array of bytes. + pub fn from_vec(vec: Vec) -> Self { + Self(vec) + } + + pub fn new() -> Self { + Self(Vec::new()) + } + + pub fn as_os_str(&self) -> &LinuxOsStr { + self + } +} + +impl Borrow for LinuxOsString { + fn borrow(&self) -> &LinuxOsStr { + LinuxOsStr::from_bytes(&self.0) + } +} + +impl ToOwned for LinuxOsStr { + type Owned = LinuxOsString; + + fn to_owned(&self) -> LinuxOsString { + LinuxOsString::from_vec(self.0.to_owned()) + } +} + +impl Deref for LinuxOsString { + type Target = LinuxOsStr; + + fn deref(&self) -> &LinuxOsStr { + LinuxOsStr::from_bytes(&self.0) + } +} + +impl Deref for LinuxOsStr { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + &self.0 + } +} diff --git a/third_party/rust/minidump/src/system_info.rs b/third_party/rust/minidump/src/system_info.rs new file mode 100644 index 000000000000..ac268d7bff8f --- /dev/null +++ b/third_party/rust/minidump/src/system_info.rs @@ -0,0 +1,170 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +//! Information about the system that produced a `Minidump`. + +use num_traits::FromPrimitive; +use std::borrow::Cow; +use std::fmt; + +use minidump_common::format as md; +use minidump_common::format::PlatformId; +use minidump_common::format::ProcessorArchitecture::*; + +/// Known operating systems +/// +/// This is a slightly nicer layer over the `PlatformId` enum defined in the minidump-common crate. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Os { + Windows, + MacOs, + Ios, + Linux, + Solaris, + Android, + Ps3, + NaCl, + Unknown(u32), +} + +impl Os { + /// Get an `Os` value matching the `platform_id` value from `MINIDUMP_SYSTEM_INFO` + pub fn from_platform_id(id: u32) -> Os { + match PlatformId::from_u32(id) { + Some(PlatformId::VER_PLATFORM_WIN32_WINDOWS) + | Some(PlatformId::VER_PLATFORM_WIN32_NT) => Os::Windows, + Some(PlatformId::MacOs) => Os::MacOs, + Some(PlatformId::Ios) => Os::Ios, + Some(PlatformId::Linux) => Os::Linux, + Some(PlatformId::Solaris) => Os::Solaris, + Some(PlatformId::Android) => Os::Android, + Some(PlatformId::Ps3) => Os::Ps3, + Some(PlatformId::NaCl) => Os::NaCl, + _ => Os::Unknown(id), + } + } + + /// Get a human-readable friendly name for an `Os` + pub fn long_name(&self) -> Cow<'_, str> { + match *self { + Os::Windows => Cow::Borrowed("Windows NT"), + Os::MacOs => Cow::Borrowed("Mac OS X"), + Os::Ios => Cow::Borrowed("iOS"), + Os::Linux => Cow::Borrowed("Linux"), + Os::Solaris => Cow::Borrowed("Solaris"), + Os::Android => Cow::Borrowed("Android"), + Os::Ps3 => Cow::Borrowed("PS3"), + Os::NaCl => Cow::Borrowed("NaCl"), + Os::Unknown(val) => Cow::Owned(format!("0x{val:#08x}")), + } + } +} + +impl fmt::Display for Os { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match *self { + Os::Windows => "windows", + Os::MacOs => "mac", + Os::Ios => "ios", + Os::Linux => "linux", + Os::Solaris => "solaris", + Os::Android => "android", + Os::Ps3 => "ps3", + Os::NaCl => "nacl", + Os::Unknown(_) => "unknown", + } + ) + } +} + +/// Known CPU types +/// +/// This is a slightly nicer layer over the `ProcessorArchitecture` enum defined in +/// the minidump-common crate. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub enum Cpu { + X86, + X86_64, + Ppc, + Ppc64, + Sparc, + Arm, + Arm64, + Mips, + Mips64, + Unknown(u16), +} + +/// Supported CPU pointer widths +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum PointerWidth { + Bits32, + Bits64, + Unknown, +} + +impl Cpu { + /// Get a `Cpu` value matching the `processor_architecture` value from `MINIDUMP_SYSTEM_INFO` + pub fn from_processor_architecture(arch: u16) -> Cpu { + match md::ProcessorArchitecture::from_u16(arch) { + Some(PROCESSOR_ARCHITECTURE_INTEL) | Some(PROCESSOR_ARCHITECTURE_IA32_ON_WIN64) => { + Cpu::X86 + } + Some(PROCESSOR_ARCHITECTURE_AMD64) => Cpu::X86_64, + Some(PROCESSOR_ARCHITECTURE_PPC) => Cpu::Ppc, + Some(PROCESSOR_ARCHITECTURE_PPC64) => Cpu::Ppc64, + Some(PROCESSOR_ARCHITECTURE_SPARC) => Cpu::Sparc, + Some(PROCESSOR_ARCHITECTURE_ARM) => Cpu::Arm, + Some(PROCESSOR_ARCHITECTURE_ARM64) | Some(PROCESSOR_ARCHITECTURE_ARM64_OLD) => { + Cpu::Arm64 + } + Some(PROCESSOR_ARCHITECTURE_MIPS) => Cpu::Mips, + Some(PROCESSOR_ARCHITECTURE_MIPS64) => Cpu::Mips64, + _ => Cpu::Unknown(arch), + } + } + + /// The native pointer width of this platform + pub fn pointer_width(&self) -> PointerWidth { + match self { + Cpu::X86 | Cpu::Ppc | Cpu::Sparc | Cpu::Arm | Cpu::Mips => PointerWidth::Bits32, + Cpu::X86_64 | Cpu::Ppc64 | Cpu::Arm64 | Cpu::Mips64 => PointerWidth::Bits64, + Cpu::Unknown(_) => PointerWidth::Unknown, + } + } +} + +impl fmt::Display for Cpu { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match *self { + Cpu::X86 => "x86", + Cpu::X86_64 => "amd64", + Cpu::Ppc => "ppc", + Cpu::Ppc64 => "ppc64", + Cpu::Sparc => "sparc", + Cpu::Arm => "arm", + Cpu::Arm64 => "arm64", + Cpu::Mips => "mips", + Cpu::Mips64 => "mips64", + Cpu::Unknown(_) => "unknown", + } + ) + } +} + +impl PointerWidth { + pub fn size_in_bytes(self) -> Option { + match self { + Self::Bits32 => Some(4), + Self::Bits64 => Some(8), + Self::Unknown => None, + } + } +} diff --git a/third_party/rust/minidump/tests/test_minidump.rs b/third_party/rust/minidump/tests/test_minidump.rs new file mode 100644 index 000000000000..5bd3ed692e5a --- /dev/null +++ b/third_party/rust/minidump/tests/test_minidump.rs @@ -0,0 +1,377 @@ +// Copyright 2015 Ted Mielczarek. See the COPYRIGHT +// file at the top-level directory of this distribution. + +use debugid::{CodeId, DebugId}; +use minidump::system_info::{Cpu, Os}; +use minidump::*; +use minidump_common::format as md; +use num_traits::cast::FromPrimitive; +use std::fs::File; +use std::io::Read; +use std::path::PathBuf; +use std::time::SystemTime; + +fn get_test_minidump_path(filename: &str) -> PathBuf { + let mut path = PathBuf::from(file!()); + path.pop(); + path.pop(); + path.pop(); + path.push("../"); + path.push("testdata"); + path.push(filename); + println!("{path:?}"); + path +} + +fn read_test_minidump() -> Result { + let path = get_test_minidump_path("test.dmp"); + Minidump::read_path(path) +} + +fn read_linux_minidump() -> Result { + let path = get_test_minidump_path("linux-mini.dmp"); + Minidump::read_path(path) +} + +#[ctor::ctor] +fn init_logger() { + env_logger::builder().is_test(true).init(); +} + +#[test] +fn test_minidump_read_path() { + read_test_minidump().unwrap(); +} + +#[test] +fn test_minidump_read() { + let path = get_test_minidump_path("test.dmp"); + let mut f = File::open(path).unwrap(); + let mut buf = vec![]; + f.read_to_end(&mut buf).unwrap(); + let _dump = Minidump::read(buf).unwrap(); +} + +#[test] +fn test_module_list() { + let dump = read_test_minidump().unwrap(); + let module_list = dump.get_stream::().unwrap(); + assert_eq!( + module_list.module_at_address(0x400000).unwrap().code_file(), + "c:\\test_app.exe" + ); + let modules = module_list.iter().collect::>(); + let module_files = modules.iter().map(|m| m.code_file()).collect::>(); + assert_eq!(modules.len(), 13); + assert_eq!(modules[0].base_address(), 0x400000); + assert_eq!(modules[0].size(), 0x2d000); + assert_eq!(modules[0].code_file(), "c:\\test_app.exe"); + assert_eq!( + modules[0].code_identifier().unwrap(), + CodeId::new("45D35F6C2d000".to_string()) + ); + assert_eq!(modules[0].debug_file().unwrap(), "c:\\test_app.pdb"); + assert_eq!( + modules[0].debug_identifier().unwrap(), + DebugId::from_breakpad("5A9832E5287241C1838ED98914E9B7FF1").unwrap() + ); + assert!(modules[0].version().is_none()); + + assert_eq!(modules[12].base_address(), 0x76bf0000); + assert_eq!(modules[12].size(), 0xb000); + assert_eq!(modules[12].code_file(), "C:\\WINDOWS\\system32\\psapi.dll"); + assert_eq!( + modules[12].code_identifier().unwrap(), + CodeId::new("411096CAb000".to_string()) + ); + assert_eq!(modules[12].debug_file().unwrap(), "psapi.pdb"); + assert_eq!( + modules[12].debug_identifier().unwrap(), + DebugId::from_breakpad("A5C3A1F9689F43D8AD228A09293889702").unwrap() + ); + assert_eq!(modules[12].version().unwrap(), "5.1.2600.2180"); + + assert_eq!( + module_files, + vec![ + r"c:\test_app.exe", + r"C:\WINDOWS\system32\ntdll.dll", + r"C:\WINDOWS\system32\kernel32.dll", + r"C:\WINDOWS\system32\ole32.dll", + r"C:\WINDOWS\system32\advapi32.dll", + r"C:\WINDOWS\system32\rpcrt4.dll", + r"C:\WINDOWS\system32\gdi32.dll", + r"C:\WINDOWS\system32\user32.dll", + r"C:\WINDOWS\system32\msvcrt.dll", + r"C:\WINDOWS\system32\imm32.dll", + r"C:\WINDOWS\system32\dbghelp.dll", + r"C:\WINDOWS\system32\version.dll", + r"C:\WINDOWS\system32\psapi.dll", + ] + ); + + assert_eq!( + module_list + .by_addr() + .map(|m| m.code_file()) + .collect::>(), + vec![ + r"c:\test_app.exe", + r"C:\WINDOWS\system32\dbghelp.dll", + r"C:\WINDOWS\system32\imm32.dll", + r"C:\WINDOWS\system32\psapi.dll", + r"C:\WINDOWS\system32\ole32.dll", + r"C:\WINDOWS\system32\version.dll", + r"C:\WINDOWS\system32\msvcrt.dll", + r"C:\WINDOWS\system32\user32.dll", + r"C:\WINDOWS\system32\advapi32.dll", + r"C:\WINDOWS\system32\rpcrt4.dll", + r"C:\WINDOWS\system32\gdi32.dll", + r"C:\WINDOWS\system32\kernel32.dll", + r"C:\WINDOWS\system32\ntdll.dll", + ] + ); +} + +#[test] +fn test_system_info() { + let dump = read_test_minidump().unwrap(); + let system_info = dump.get_stream::().unwrap(); + assert_eq!(system_info.os, Os::Windows); + assert_eq!(system_info.cpu, Cpu::X86); + assert_eq!( + system_info.cpu_info().unwrap(), + "GenuineIntel family 6 model 13 stepping 8" + ); + assert_eq!(&system_info.csd_version().unwrap(), "Service Pack 2"); +} + +#[test] +fn test_misc_info() { + let dump = read_test_minidump().unwrap(); + let misc_info = dump.get_stream::().unwrap(); + assert_eq!(misc_info.raw.process_id(), Some(&3932)); + assert_eq!(misc_info.raw.process_create_time(), Some(&0x45d35f73)); + assert_eq!( + misc_info + .process_create_time() + .unwrap() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(), + 1171480435, // = 2007-02-14T19:13:55 + ); +} + +#[test] +fn test_breakpad_info() { + let dump = read_test_minidump().unwrap(); + let breakpad_info = dump.get_stream::().unwrap(); + assert_eq!(breakpad_info.dump_thread_id.unwrap(), 0x11c0); + assert_eq!(breakpad_info.requesting_thread_id.unwrap(), 0xbf4); +} + +#[test] +fn test_crashpad_info() { + let path = get_test_minidump_path("simple-crashpad.dmp"); + let dump = Minidump::read_path(&path).unwrap(); + let crashpad_info = dump.get_stream::().unwrap(); + + let report_id = md::GUID { + data1: 0x42F9_DE72, + data2: 0x518A, + data3: 0x43DD, + data4: [0x97, 0xD7, 0x8D, 0xDC, 0x32, 0x8D, 0x36, 0x62], + }; + assert_eq!(crashpad_info.raw.report_id, report_id); + + let client_id = md::GUID { + data1: 0x6FD2_B3B9, + data2: 0x9833, + data3: 0x4B2F, + data4: [0xBB, 0xF7, 0xB, 0xCF, 0x50, 0x1B, 0xAD, 0x7E], + }; + assert_eq!(crashpad_info.raw.client_id, client_id); + + assert_eq!(crashpad_info.simple_annotations["hello"], "world"); + assert_eq!(crashpad_info.module_list.len(), 2); + + let module = &crashpad_info.module_list[0]; + assert_eq!(module.module_index, 16); + assert_eq!(module.list_annotations, vec!["abort() called".to_owned()]); + assert!(module.simple_annotations.is_empty()); + assert!(module.annotation_objects.is_empty()); +} + +#[test] +fn test_assertion() { + let path = get_test_minidump_path("invalid-parameter.dmp"); + let dump = Minidump::read_path(&path).unwrap(); + let assertion = dump.get_stream::().unwrap(); + assert_eq!(assertion.expression().unwrap(), "format != nullptr"); + assert_eq!(assertion.function().unwrap(), "common_vfprintf"); + assert_eq!( + assertion.file().unwrap(), + r"minkernel\crts\ucrt\src\appcrt\stdio\output.cpp" + ); + assert_eq!(assertion.raw.line, 32); + assert_eq!( + md::AssertionType::from_u32(assertion.raw._type), + Some(md::AssertionType::InvalidParameter) + ); +} + +#[test] +fn test_exception() { + let dump = read_test_minidump().unwrap(); + let exception = dump.get_stream::().unwrap(); + let system_info = dump.get_stream::().unwrap(); + let misc_info = dump.get_stream::().ok(); + assert_eq!(exception.thread_id, 0xbf4); + assert_eq!(exception.raw.exception_record.exception_code, 0xc0000005); + if let Some(ctx) = exception + .context(&system_info, misc_info.as_ref()) + .as_deref() + { + assert_eq!(ctx.get_instruction_pointer(), 0x40429e); + assert_eq!(ctx.get_stack_pointer(), 0x12fe84); + if let MinidumpContext { + raw: MinidumpRawContext::X86(ref raw), + ref valid, + } = *ctx + { + assert_eq!(raw.eip, 0x40429e); + assert_eq!(*valid, MinidumpContextValidity::All); + } else { + panic!("Wrong context type"); + } + } else { + panic!("Missing context"); + } +} + +#[test] +fn test_thread_list() { + let dump = read_test_minidump().unwrap(); + let thread_list = dump.get_stream::>().unwrap(); + let system_info = dump.get_stream::().unwrap(); + let misc_info = dump.get_stream::().ok(); + let memory_list = dump.get_memory().unwrap_or_default(); + + let threads = &thread_list.threads; + assert_eq!(threads.len(), 2); + assert_eq!(threads[0].raw.thread_id, 0xbf4); + assert_eq!(threads[1].raw.thread_id, 0x11c0); + let id = threads[1].raw.thread_id; + assert_eq!(thread_list.get_thread(id).unwrap().raw.thread_id, id); + if let Some(ctx) = threads[0] + .context(&system_info, misc_info.as_ref()) + .as_deref() + { + assert_eq!(ctx.get_instruction_pointer(), 0x7c90eb94); + assert_eq!(ctx.get_stack_pointer(), 0x12f320); + if let MinidumpContext { + raw: MinidumpRawContext::X86(ref raw), + ref valid, + } = *ctx + { + assert_eq!(raw.eip, 0x7c90eb94); + assert_eq!(*valid, MinidumpContextValidity::All); + } else { + panic!("Wrong context type"); + } + } else { + panic!("Missing context"); + } + if let Some(ref stack) = threads[0].stack_memory(&memory_list) { + // Try the beginning + assert_eq!(stack.get_memory_at_address::(0x12f31c).unwrap(), 0); + assert_eq!(stack.get_memory_at_address::(0x12f31c).unwrap(), 0); + assert_eq!(stack.get_memory_at_address::(0x12f31c).unwrap(), 0); + assert_eq!( + stack.get_memory_at_address::(0x12f31c).unwrap(), + 0x7c90e9c000000000 + ); + // And the end + assert_eq!(stack.get_memory_at_address::(0x12ffff).unwrap(), 0); + assert_eq!(stack.get_memory_at_address::(0x12fffe).unwrap(), 0); + assert_eq!(stack.get_memory_at_address::(0x12fffc).unwrap(), 0); + assert_eq!( + stack.get_memory_at_address::(0x12fff8).unwrap(), + 0x405443 + ); + } else { + panic!("Missing stack memory"); + } +} + +#[test] +fn test_empty_minidump() { + match Minidump::read(&b""[..]) { + Ok(_) => panic!("Should have failed to read minidump"), + Err(e) => assert_eq!(e, Error::MissingHeader), + } +} + +#[test] +fn backwards_range() { + let data = include_bytes!("../../testdata/invalid-range.dmp"); + + match Minidump::read(&data[..]) { + Ok(f) => { + // TODO verify this is correct + // This seems to call `MinidumpStream::read()` with a `bytes` that is the entire + // minidump! + let _ = f + .get_stream::() + .expect_err("range should be invalid"); + } + Err(e) => { + panic!("Expected to parse the header, got {:?}", e); + } + } +} + +#[test] +fn test_record_count_mac_info() { + let data = include_bytes!("../../testdata/invalid-record-count.dmp"); + + match Minidump::read(&data[..]) { + Ok(f) => { + let _ = f.get_stream::(); + } + Err(e) => { + panic!("Expected to parse the header, got {:?}", e); + } + } +} + +#[test] +fn test_linux_os_version() { + let dump = read_linux_minidump().unwrap(); + let system_info = dump.get_stream::().unwrap(); + + assert_eq!(system_info.os_parts().0, "4.9.60-linuxkit-aufs"); + assert_eq!( + system_info.os_parts().1.unwrap(), + "#1 SMP Mon Nov 6 16:00:12 UTC 2017", + ); +} + +#[test] +fn test_full_dump_memory() { + let path = get_test_minidump_path("full-dump.dmp"); + let dump = Minidump::read_path(&path).unwrap(); + let memory_list = dump.get_stream::>().unwrap(); + assert_eq!(memory_list.iter().count(), 54); + let blocks: Vec<_> = memory_list.iter().take(3).collect(); + assert_eq!(blocks[0].base_address, 0x007FFE0000); + assert_eq!(blocks[0].size, 0x1000); + assert_eq!(blocks[0].bytes[0..8], [0, 0, 0, 0, 0, 0, 0xA0, 0x0F]); + assert_eq!(blocks[1].base_address, 0x007FFE9000); + assert_eq!(blocks[1].size, 0x1000); + assert_eq!(blocks[1].bytes[0..8], [0x48, 0x61, 0x6C, 0x54, 0, 0, 0, 0]); + assert_eq!(blocks[2].base_address, 0x9897D0D000); + assert_eq!(blocks[2].size, 0x3000); + assert_eq!(blocks[2].bytes[0..8], [0, 0, 0, 0, 0, 0, 0, 0]); +} diff --git a/third_party/rust/object/.cargo-checksum.json b/third_party/rust/object/.cargo-checksum.json index a8b6bddc5474..07ddbc862783 100644 --- a/third_party/rust/object/.cargo-checksum.json +++ b/third_party/rust/object/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"CHANGELOG.md":"19f059b47fe070336813f4561b7da9be7f8dc1968daf3b1565b1a55900673469","Cargo.toml":"8075e73eb31b58aacb542434e0f21851895a335efc6cc23724411c478716c213","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"0b74dfa0bcee5c420c6b7f67b4b2658f9ab8388c97b8e733975f2cecbdd668a6","README.md":"3772fdfd89e6321cc298413b8c0ba847bd125dfe64785e8faeaac05a87cbb2d3","clippy.toml":"e1e95839ba8e8bbf07f99ff49e2f151b9048c7981301a5480571842bbaf78ca2","src/archive.rs":"d6cead723242c26db2967b63385b79ed2008980a8c64b123a5eecffd7ed388fc","src/common.rs":"27a01eb0a1efcd10a988bc3922a2159e97fd133fbc07ff90faca4ff1722bd605","src/elf.rs":"ceb4f748b65ea092cf057db45193b0b2c7f9204a1e677848aaf0463f9dd3ac6a","src/endian.rs":"b4f63a85ccd3d5c11615baf5ba946c82046c221a1b8c74ee8c6fa4360782980c","src/lib.rs":"7f1261ebed84cd10169c1846a70dcafa7c8b25c06f32981335a9a676f45856a4","src/macho.rs":"e62808b0c84494b68e7e69b2f7fd9c0620b2ef61b6a2af3403de30284cf6b898","src/pe.rs":"8432e949a5bbf7ca28ba27f6a8ee926fe2e8632ffd88845714a6f64d6f7c0a40","src/pod.rs":"d2967732f0052e6cfa18a2dd62c57bc3b640a20eb9a6db9f39836000ceabb399","src/read/any.rs":"8ef08da67f083ee3f2758b5d64349d8032d5c63198d0c55815cd17054281a207","src/read/archive.rs":"479574cff125a74fc5512d75c1531da3bb006005fe544ffd2531a7d4f35a9bb4","src/read/coff/comdat.rs":"5204e6e44a33d1fe04ae551892d9dae2bd8ca4a0d9f1b46dd06a9e512b8ec6cc","src/read/coff/file.rs":"007c52c0a2d6f763a3c74e13f5cc15cf870a0701b87f01346ad550e52c5e5146","src/read/coff/import.rs":"1de0db515a0a6b1eee355d93a715f6d2ff152d1b815e1e859af0b0e5ded36efc","src/read/coff/mod.rs":"66bdcc772291cb53be378a66723e3c246e30df5aa4e4d4ce8e55bfeb156849a9","src/read/coff/relocation.rs":"57deb22ed0c9c31de0fe0bb483ded94459bcfd93b9f979191500d223a8517fe2","src/read/coff/section.rs":"de023dc46d4e1a37e77ab8e7080ff9fc9588633d790ae664e138d70e8fc8a39f","src/read/coff/symbol.rs":"e3b629112fe73e4627daa45093df1a1a2844ffe395d1c4de479f0f4c2bdac624","src/read/elf/attributes.rs":"1a5bc8dc081903d6b318761f50010ab08dee74b7f2d68ffbbd1aff7ed7b7c538","src/read/elf/comdat.rs":"253e00621d9a5249cfa20a2ebda76127113b42fc960235308333c9d35acdd628","src/read/elf/compression.rs":"097ff8bdc78d01a1532b11c1c0cae3b35905128c7d98b471de188d46da3ff970","src/read/elf/dynamic.rs":"8f59bd6d352f6810be6b6dc02c2f88229f15aa02a42f8fc09bcf3f284d4b1021","src/read/elf/file.rs":"860f6ed4e98e8672daef71065ea885f47d25593f6adeb7907545dadb9d398768","src/read/elf/hash.rs":"b330af7e2356512cfdf162986437c81a3b149a91e26bf82455a6976e2571a618","src/read/elf/mod.rs":"fceb322ac4e2cb182e0abd93f794622e58ae7c64e83c8703c9c888af5d93889d","src/read/elf/note.rs":"4b7e33582bff27918240d34d70f0099260dfd3122345c12ce10e6f7fcdf79e35","src/read/elf/relocation.rs":"7b95b7fbac280645f6d5900cc0158653bc8fb3adc9beeeae1b3f61eb77ada5d0","src/read/elf/section.rs":"c626a53afa196ba5b81a3d6278956f1d1ddf04ff2ac81f934425cfaef870aafd","src/read/elf/segment.rs":"ebcefba06c6f4a3e60403a1112c59f3390c39840aac011a8b040c77f8d29d655","src/read/elf/symbol.rs":"d50755b3acd34b9c77dd51f94d436265e79ed8f6f167b593e2a482543abde30b","src/read/elf/version.rs":"2c5f63e526cf26f83bdaaead4124949fc53b6198983e4681b6a3895794ad8754","src/read/macho/dyld_cache.rs":"c5247e9121a57fd411c86aea435b9c3d6d03d3cbb3fb7dfe6e75576013d71f14","src/read/macho/fat.rs":"d27a1052f2e47cd5b798a6359f33c3bfe7f7971b13259f6545118213ace7f5dd","src/read/macho/file.rs":"2531c245722202173ff762179af2c88af0108156457c848e7d756e348e72bc80","src/read/macho/load_command.rs":"d8c0ebc8f65dafa4fbb1f9a3c944fdeee96fddacfcc863650b94d9a62e8fe37a","src/read/macho/mod.rs":"23b353da3b7e076c68a067776e6a5b346a746116ac42c2c90bafd95ad54a90b3","src/read/macho/relocation.rs":"477d98b507550c29ceceba49308d14e1f086cb01a4ea9af691e995d1d29767a4","src/read/macho/section.rs":"9b21b3a02f509bb09983d116d7f0938cd61d5293e5e31907964611495b8a575e","src/read/macho/segment.rs":"0dee483eb9b6c731e69a9fe0bd9ef84b2a797a8203e5e870e15a6a2165326cc3","src/read/macho/symbol.rs":"3d3ad557c205d834ba305a9eaf3b97b2ed05f1906cd8430ad2fadabf81f14b9f","src/read/mod.rs":"b1224dc6654be52b8c2f6403503ffa662b576d05c0382146a386915d23c894dd","src/read/pe/data_directory.rs":"c08c095a4287c55bf7d7774bbade1d7610d8e82433b0de23af8c4c7ef23d75e3","src/read/pe/export.rs":"07ac5ec7b67d4a09037d8f11eb4426d96515687ee299df2a3d8cd4fd93eb2036","src/read/pe/file.rs":"507d800775adae687ca9ac7dca1a6f42463103af0731a5fe548d4745a9bee686","src/read/pe/import.rs":"ea20dfc0d462ba20e149bf9408f4ec1d0b202abf1f15536f6d091f0c0e756ac8","src/read/pe/mod.rs":"69832b7f4ccd93b59e08bafcbd0d3226c450d7801ad49ab554b38b660c8997fd","src/read/pe/relocation.rs":"0335c06b6d37df4939c8b88044313e88661ee45e5a57d2eec40143f2fe481838","src/read/pe/resource.rs":"0209eb96391bc367633b6d868505cb30947157702b0c85ef6677e5a1283d448f","src/read/pe/rich.rs":"ae9b2fc927bab2661e8d200a10128aebde37d26b50cb9069e9af9eb7bacee591","src/read/pe/section.rs":"bafde5a1584f6ce0456e32756e825c1b2b9dc0ec220ab29e668c2ff700600b87","src/read/read_cache.rs":"e9dbaa385435f5ef6ca5951c26ed1f6793ad3a8f3aee918257a5df6783d4b36d","src/read/read_ref.rs":"14966a1da9951633a7e73aedccfeadbbed4a977a8fb9d415d572250f6ebaf438","src/read/traits.rs":"8d094ba6ac06639bad448c70378d5154195d4ab36b13588b72105a869dfc053e","src/read/util.rs":"7ed8c5c88a52549734df67d2cbd3f0ea1a571728cae62152e57018f3140f9ebc","src/read/wasm.rs":"31c755ce17bba20ba287f53af7b7787bdbb5f8920333412fb11d81239102b423","src/read/xcoff/comdat.rs":"9a0a8a16682a28a54b51d28d382578e4b1e0212a34460eb93b50e8f97e4bf745","src/read/xcoff/file.rs":"17f751578d052cb8f74ee56a4e17b053b06e82e4efbe943907943bc561fb301e","src/read/xcoff/mod.rs":"d0179d3f95797464ca5919563454d1123ce8c35dfc5f40ecd6ca0d002a9824a8","src/read/xcoff/relocation.rs":"84993f477231cd1b8c79c385bc0e892640de89ddae268b845bc82c41999953b4","src/read/xcoff/section.rs":"c5ce72c214398125c7a4bc160fc5a7b27a6d92035dadcc84fd2852b73c75c123","src/read/xcoff/segment.rs":"627dabf3003aa1442bb4a2292cd68e1f572c3b95864a99e50a505b2894ffc804","src/read/xcoff/symbol.rs":"23be12b614937f8ea5de90d097fb8034f82bff798aa09636cd53a0670e39a984","src/write/coff.rs":"1dcb1c6c939af192591d44e135e6d139b82563d8bca07249e93fa3d434b1f6c7","src/write/elf/mod.rs":"1bb945edad539b4f19dda5d46c9b86fa4ea3721eedda77ca2595b5519c3e30f2","src/write/elf/object.rs":"2176f0f640eed5a05be77208301a10667533b495551662186e6c708bd02c9c18","src/write/elf/writer.rs":"84336230a24413a41e342735c153c3421c70707ec92c2df02e7d3536e5d41b55","src/write/macho.rs":"bd6afba576273a162f6d2e9658c8258a664c38d0169864310dffbf1fd7582a43","src/write/mod.rs":"c6c9a54ef807a3bee0c3074be86b0aed90872e1e703f39f2fa5c06bc9ed89767","src/write/pe.rs":"6c72185705a3e067c481f2b9f81c64a84e062e67781928e58fd1150314dad8f9","src/write/string.rs":"0033a6f5137b42988ac41dbaa2efb94a4d74d8b043c9a34c40125e8ee6912420","src/write/util.rs":"7a1083d305e9446767ce2d5f69be2c4c155495cf97e595f8fa53c4e153ccf186","src/write/xcoff.rs":"f192dd34fb442cd53a004e50508f6a787c9e9cd9089a15f63fdf6054eb6bd63f","src/xcoff.rs":"fbd50fc4b61ccfdf218185ea4eafe8cf9793e8d034e7ce243fb54ecae12af5ce","tests/integration.rs":"0fa704827e4da1be38dac2e3820d92f6b20c4d415803b04f67c3516020b1de97","tests/parse_self.rs":"81b44b2dd1de9a5d8c18d9bd8926156e39fb83931837afa8ca344da0d309aeee","tests/read/coff.rs":"d3ec2079f00237640d01cb66eb24c55c85d7a775bb94f9f5c9f77e21cb7a785d","tests/read/mod.rs":"7833826f169ac3be2b4f274e5fc8cf4a51742bd0010803ff0dc20ea5643a7e61","tests/round_trip/bss.rs":"849d69b063fd757fed02219dd81e9d13b82068a2025d2cc5cfd40cf557e31bda","tests/round_trip/coff.rs":"8a25aab7164a5c8aa7a21279f8bae1f4d5f68a8d09c29a4ecd0d0c14564851cc","tests/round_trip/comdat.rs":"a8f729e218fee21e90b9f39b5cfcb4f80bc3ce26d3a297323667e6eb14f882cc","tests/round_trip/common.rs":"ced08ff559ca4d343ceef54bb4c581a3405cd96d6a1628ba43b7aab82070800b","tests/round_trip/elf.rs":"d7351d888ccad246a646ab3bea1afc3d445adeb28c5d3c8f157f7cde3717281c","tests/round_trip/macho.rs":"8cf6297f1b9e31153b15f2e409e68b561f135a233d32b601a47f5fd4dfa014cc","tests/round_trip/mod.rs":"14db36fae698b75fedc4dc832465394350049f54b01b1215022a44ebe920f7e9","tests/round_trip/section_flags.rs":"0e17639e5f86d576f039a294c274ce8db2e2a8add31a2fffc33a6e93a6d2791e","tests/round_trip/tls.rs":"23a49a1036b9173ece82a3080745930e5925e745280ab38866c9d3c29f463e63"},"package":"77ac5bbd07aea88c60a577a1ce218075ffd59208b2d7ca97adf9bfc5aeb21ebe"} \ No newline at end of file +{"files":{"CHANGELOG.md":"1fd0cf87592838700fe817851e022bd45db5a5e8bdcd247c0733cedc6bf5b309","Cargo.toml":"1a70000be28bb6e1b3c448f579c95384b70b6a7bcf98946c84c816ea919b5943","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"0b74dfa0bcee5c420c6b7f67b4b2658f9ab8388c97b8e733975f2cecbdd668a6","README.md":"59a3ce4c55e0429024d583218dc13a6dd67025913916e991788c6614db820499","src/archive.rs":"d6cead723242c26db2967b63385b79ed2008980a8c64b123a5eecffd7ed388fc","src/build/bytes.rs":"3c3ac3bed7e61914509358922baf72729dab6ae47207f6bc83380b4a85ecf711","src/build/elf.rs":"f12be07d80c851b0f3aa808edab0061b7bdd09f2777586bae235497a65eda884","src/build/error.rs":"2a77a45eb45728c80fe6dcce14d9c50bd8f17a22dd815e9ad98320da7c2e1e27","src/build/mod.rs":"beac0a3fb3a479b7948f2a0235e6872a547401baecd1e92320cf373e5db05a07","src/build/table.rs":"20348b635d8855b78b5c12e3cc91f804cb09b6cb38e78473ba7a300594398dbd","src/common.rs":"c0cbf055a418d07b8b290bb196ee711a76b6f16fb6010f19c3f9270b75d1ea19","src/elf.rs":"65562e4622639f5adb50a9ba19a039831b03615f2101d21996c3462ef8bd10af","src/endian.rs":"b4f63a85ccd3d5c11615baf5ba946c82046c221a1b8c74ee8c6fa4360782980c","src/lib.rs":"be64f0aae47b05e61a4deb262484ab4b40efe2bb29808f1616d3a853d7e7901c","src/macho.rs":"991b77798570fbed7a989481c945f62a5d4df89380a2508e198e707f213f2e46","src/pe.rs":"cd61391b064bea4c99cff515d9c635227218cf62b7aa719b66e3b1935379c497","src/pod.rs":"235ab16a4bea8f2a52a3edda78f1b893bd8166ae14f86dfedcce817005bec15f","src/read/any.rs":"4f1524a49f533890b8b6bc5b9abdb34f24a9cf491325a76043959abacd2ca0b0","src/read/archive.rs":"7e897a7044045afccbad68bbb5ccf8f456790a5bf22be4d23b0637831f36bcea","src/read/coff/comdat.rs":"2c5a1b0b69c7b10b79f2ec305480942b9e0dd693b8f1790b0f3e9d918d805560","src/read/coff/file.rs":"f5962815b5e0cbcd0c408fe515a3e373d3ca460f34651b3873250f4c046538c7","src/read/coff/import.rs":"3b7cf82936e989aba71d3f4aea9edeffe314e0e729a57fad93be37287abc8ddb","src/read/coff/mod.rs":"c3b0c4f09993b07a85fda3dd2de9c80258aaadce6c0598eb25e55fab4c6bbf1c","src/read/coff/relocation.rs":"a56b5d660f708df7991819ad6f52ba49178a633b5040ba8cab2cfc290e306c89","src/read/coff/section.rs":"0311f810d0a23496895d2506aab9c04fe13837ec589e787cfb93e44a82e3e4f3","src/read/coff/symbol.rs":"30275b6a1ad2b86b9bdef26efe0ba49cd16cc9434696361d16876adbe2f80536","src/read/elf/attributes.rs":"9b2e2bfdb0295f8414d31485d6fd569abfdd4bbf07a52436a208ee3ec1916050","src/read/elf/comdat.rs":"1390d60a405d97bcd05a5ceb01d32f1285d649fd438732686a50cb69380be4a9","src/read/elf/compression.rs":"726bb1ca96a480ac2a0c8ccaa4c2291d62ca6c4af2b1a2e43b74ae7be17f6d75","src/read/elf/dynamic.rs":"c934f8b72d71ed39ff129ce37a8177b5bf12fba93695914eae48112bf89702b4","src/read/elf/file.rs":"ee9af438eafc86570a1376b7a08a5df46eae05d8fb41e868ce913255c597628e","src/read/elf/hash.rs":"c167aab3a9d1baa1ae915bd7bf4e4b97320dd758f8691372f9ecd518af9dcb1c","src/read/elf/mod.rs":"7c0d4d10c1055f57184f470aa014e6358394ec95019a74da8d5079d275437d0b","src/read/elf/note.rs":"ca7fe4d04fe130e7c29bea5f5f559f4561272e4658a502444164003f36e620ec","src/read/elf/relocation.rs":"7ff8078cf3bbecb3d6a6c47e6d9bd7af3da4f757879894f1557ce00053753f90","src/read/elf/section.rs":"2c7eac8e12d8150f5a25fa39a3764ed1296952debd10ff56211c22160354948f","src/read/elf/segment.rs":"3fbfada9cdf09920235068607a1c2d4daf61889aa16aa36a242c9b908886ab02","src/read/elf/symbol.rs":"70d39f5088e53394d6c7ef9fa0c5329e69ad48f4019ae86a8dbba3097e72e9eb","src/read/elf/version.rs":"db85dee7d063f68d41278f54220f9d08a73cc6ba775e4bee51596401bb01b716","src/read/gnu_compression.rs":"793af03a626ce1be0de9554c224ccbcd0eda9e4dc1378d70317da9c285d6aa99","src/read/macho/dyld_cache.rs":"ecc361b38cea542b4578dabd08cc0540e0619f32f5a7fef8b977ba972b13178d","src/read/macho/fat.rs":"79c48ff8492b6b721a8b965278d3f789262d20c53d5d1fe318a1ce2da59e73fc","src/read/macho/file.rs":"14ea70c1cc271ac1b52138c55e41b8b90b520388206e2d14feb234f4f77ddeda","src/read/macho/load_command.rs":"b6d5d67c940dd21c9526b4a261c31419d06a039f12ecf02dc9bf0d7b26bde2ed","src/read/macho/mod.rs":"0e97e28f379766511f33d8ccb97eaa690a352f9180080a376a86516249b1bdd8","src/read/macho/relocation.rs":"2134a761649eae62da33e09b344f9ca63180b01a351f524c48e31f6ba8fc8a70","src/read/macho/section.rs":"aa6c5fcb0d9c5f63c8f59cc69e0d20bc3aea5904776fe4a0be0635dc780c84c4","src/read/macho/segment.rs":"5a1d893c2fdae83baf7769e629eacae68142e99ded55b70153d25950006914f2","src/read/macho/symbol.rs":"c2f86418e4923425e8ae819f700a3985151cb47522b8fad676dd48eebcb00bea","src/read/mod.rs":"0dc861cc65439482b6a89bab9817dac9fe9d6a0bb500b922dd892005a58d6056","src/read/pe/data_directory.rs":"ac3d1a0c3d1c02d6005666ff808c898b48866895252f0196faa773cfd605fb53","src/read/pe/export.rs":"3e9c8f6b5409fc205ccf183f9b65446ce54b948ac00bc1a0481542e72ab33442","src/read/pe/file.rs":"95ae7b3cebf1ab263d98c22f1e2cd9d224486164a78ab53f3f47792cb688c312","src/read/pe/import.rs":"89ed0978ecbd8ffccf8c5a67523cd0e7d7c2f5241cb2777df24495f6792ca40f","src/read/pe/mod.rs":"f0c70bf86680e7afdfdffbf4f3118efc9801516c6898663f574b115b62b1fbcd","src/read/pe/relocation.rs":"27795df870bc38b3aab57a9ef78c141633442727dbcd5b1c4b61513ec64069a6","src/read/pe/resource.rs":"4840d8d01d930ab64a5aa2e3b5c1930020d36fd6f2e12b86c66024d652d12eb9","src/read/pe/rich.rs":"9e18325fd09ea96b139ddd9c97b4b25d85cb4afac83a7e7c2cd944750e33fd2d","src/read/pe/section.rs":"d11151f78c697d99afc1613a47a94b74ca86edcfbc61eab67c99f882e703b90c","src/read/read_cache.rs":"4992f90ced3bf98f13157ec0d74e893b45e5d6753d835b959a6de4c34aa38b47","src/read/read_ref.rs":"ea6177cc2ea107755c7e067de1463ee7365766cd3613b11171adfd48b1704201","src/read/traits.rs":"238150a5c95078d422f96e79278849d86181f3dfc2051091655cec1f537cbd0c","src/read/util.rs":"90383bddd6286fcdb1586bbd9ac038beceb02ec8a485bff1c460c72a08b16aab","src/read/wasm.rs":"8e368732e68c3da4b585391348d6b254a7ef91da5fd2d8d1e23deb8491cb649c","src/read/xcoff/comdat.rs":"42a1581fa44ffc868dceecfb05e81736d6322ec44613c9597bb7fbe69290b8cc","src/read/xcoff/file.rs":"70487b353be3dc5ce051a5a283b98a46af8a290e2fcbc2d3b41ddc8af979ef96","src/read/xcoff/mod.rs":"40921a3e9d65360f57c7301f11dd12ec82f71311b689272f209758ecbc755eb3","src/read/xcoff/relocation.rs":"911c318ad7e34ada6cbc4152cd188b5a040e76e43b45a4bb06f81409f02f053e","src/read/xcoff/section.rs":"c4ad51d8e087188421b2ef7cefe4c6a724420aa5f1acf5145e9cabd89fae498f","src/read/xcoff/segment.rs":"c1eea626cc7b7c5121342bcc8f70f1afafe5920f83b1a790202495a80d5205ce","src/read/xcoff/symbol.rs":"b0938d31e03975bdf87d3eb69d635388baac19714a4b82ec803acca5e16017f0","src/write/coff/mod.rs":"511bd3a19575b1d888d0d1abb17bb697c36eda0e7032375afb13daedc972c27e","src/write/coff/object.rs":"2916db3708ada20f3e35648aac7ca8893c737538156822c95b10bcac67bc361a","src/write/coff/writer.rs":"7762c785a60034b1d6f8c8df8bfb58dcdf6f16b94c04e0bc3bb8a5586ffef36a","src/write/elf/mod.rs":"1bb945edad539b4f19dda5d46c9b86fa4ea3721eedda77ca2595b5519c3e30f2","src/write/elf/object.rs":"b6b3a1ad0a83fb5be5cce3fe9f4d2f0750e2a6ec3f9ff3d188bc87234b5854da","src/write/elf/writer.rs":"0e233b679dc8a63eeaa3708cdfdf85db1f216b4b4444fee10fae0e27f71c9991","src/write/macho.rs":"289aeeb1820f8f05e53d92765bd1bd9fbbbbfd6480b6f5f3761151906bc8e286","src/write/mod.rs":"6abb66eecb5b55df2f5ab8f1a4a7c0d23a4cbbe937c32c45c131ccb21b34ae60","src/write/pe.rs":"f8332a54b0a73b7143b035a2b47405908219544a9e67979a479a4da37c978487","src/write/string.rs":"f5494d7545d7e355b39eeeb0d7ae430f09d3c13fbf24b316e3657e94411724fa","src/write/util.rs":"cf9a3841ee354e43a1c800bf4df698ae8bfb7f78ca72588b8b0ca2a72735a73e","src/write/xcoff.rs":"5ea8ac71baeda93ca4863db2d170d22abd3a08942c5ec1640594984eb981ad2b","src/xcoff.rs":"b6be882df7591a4abcd2b5113ddccdbd8cc528487795c187ec56084154b336df","tests/build/elf.rs":"faeae8579455a247a7b60adddbf5350d44aa2665a47ab006b62d2ae250511ff4","tests/build/mod.rs":"7fc41ffba6b4337dc9734ce5624af618f30b5927ee1cdbece7ac5d1c3ebbb3b4","tests/integration.rs":"ebbad227c5e5ee9ca149c417ba5b90ee0286b4bb15f746985928133d7ae29910","tests/parse_self.rs":"81b44b2dd1de9a5d8c18d9bd8926156e39fb83931837afa8ca344da0d309aeee","tests/read/coff.rs":"a7bda931599b9cf998798133b8cbe4a455f7418c20c82b1c16857c5002b2d35a","tests/read/elf.rs":"142b30376800eb1f8ec28f570eee3329dc5615e56e91b3e50a446a0e21b9cd6f","tests/read/macho.rs":"724373043e31000cd9bc1b289f650bd6e85682edfa1d6aca0e236b2d61a5e1e6","tests/read/mod.rs":"42b6cb878b5064cf1841fbdf52c14f129cafc23f9dafd23bd08673cd84779f39","tests/round_trip/bss.rs":"6285e90cea9385680a38f03dd7f74460ddab3fe8cf49a6e9e247174e72f98b9b","tests/round_trip/coff.rs":"29f20305a2df5badceeef1cd214a756e9ad19f9dc46a1cf2c495d8799c76fd81","tests/round_trip/comdat.rs":"6fb3c0152e0152e44ec55e4361aebe2842b8c2e0b133a837fd65b5d5a945106d","tests/round_trip/common.rs":"364747c169c0773cdf9df29e1ac74adc20d8e66320172ae2d986643db30d1e62","tests/round_trip/elf.rs":"b704d081e0d00a2a1f3a5f56dae9de89ead5cff18d15529324e67d9933348b37","tests/round_trip/macho.rs":"ba20eda176f120b28bd34daddc5d190a6f5ee8507b6fedd8ba1e4fe9381cabff","tests/round_trip/mod.rs":"1c599fff06b9763c55d277b4bff3cbcd2033a7079c2b434751ababb2266b164a","tests/round_trip/section_flags.rs":"6008210add97ebdbc9885eb188ea18b80d3e3d86fdd07896a29891fd3dc489ec","tests/round_trip/tls.rs":"ec31a29310e8eb1a6379f3a91e1e1ec5bde32cf9e3ba9c0799bdec4353cedbef"},"package":"081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce"} \ No newline at end of file diff --git a/third_party/rust/object/CHANGELOG.md b/third_party/rust/object/CHANGELOG.md index 451fcb106398..9a5290a663e8 100644 --- a/third_party/rust/object/CHANGELOG.md +++ b/third_party/rust/object/CHANGELOG.md @@ -2,6 +2,310 @@ -------------------------------------------------------------------------------- +## 0.36.1 + +Released 2024/06/29. + +### Added + +* Added `SectionKind::DebugString`. + [#694](https://github.com/gimli-rs/object/pull/694) + +* Added `Architecture::Sparc` and `Architecture::Sparc32Plus`. + [#699](https://github.com/gimli-rs/object/pull/699) + [#700](https://github.com/gimli-rs/object/pull/700) + +* Added more RISC-V ELF relocation constants. + [#701](https://github.com/gimli-rs/object/pull/701) + +### Changed + +* Changed `read::ElfFile::imports` to return the library for versioned symbols. + [#693](https://github.com/gimli-rs/object/pull/693) + +* Changed `read::MachOFile` to support Go's debug section compression. + [#697](https://github.com/gimli-rs/object/pull/697) + +* Reversed the order of Mach-O relocations emitted by `write::Object`. + [#702](https://github.com/gimli-rs/object/pull/702) + +-------------------------------------------------------------------------------- + +## 0.36.0 + +Released 2024/05/26. + +### Breaking changes + +* Deleted `data` and `align` parameters for `write::Object::add_subsection`. + Use `add_symbol_data` or `add_symbol_bss` instead. + [#676](https://github.com/gimli-rs/object/pull/676) + +* Changed methods in the lower level read API to accept or return `SectionIndex` + or `SymbolIndex` instead of `usize`. + [#677](https://github.com/gimli-rs/object/pull/677) + [#684](https://github.com/gimli-rs/object/pull/684) + [#685](https://github.com/gimli-rs/object/pull/685) + +* Deleted `SymbolKind::Null`. Changed `read::Object::sections` and `read::Object::symbols` + to no longer return null entries. This affects ELF and XCOFF. + [#679](https://github.com/gimli-rs/object/pull/679) + +* Changed `read::ObjectMap::object` to return `ObjectMapFile`. This handles + splitting the object file name into path and member. + [#686](https://github.com/gimli-rs/object/pull/686) + +* Changed `read::coff::ImageSymbol::address` to only return an address for + symbols that have an address. + [#689](https://github.com/gimli-rs/object/pull/689) + +### Added + +* Added `pod::slice_from_all_bytes` and `pod::slice_from_all_bytes_mut`. + [#672](https://github.com/gimli-rs/object/pull/672) + +* Added `write::Object::set_subsections_via_symbols`. + Changed `write::Object::add_symbol_data` and `write::Object::add_symbol_bss` + to correctly handle zero size symbols when subsections are enabled. + [#676](https://github.com/gimli-rs/object/pull/676) + +* Added methods in the unified read API to return the lower level API structures. + Some existing methods were deprecated so that naming of these methods is more consistent. + [#678](https://github.com/gimli-rs/object/pull/678) + +* Added methods in the lower level read API to return a `SectionIndex` or `SymbolIndex`. + [#684](https://github.com/gimli-rs/object/pull/684) + [#689](https://github.com/gimli-rs/object/pull/689) + +* Implemented `Display` for `read::SymbolIndex` and `read::SectionIndex`. + [#684](https://github.com/gimli-rs/object/pull/684) + +* Added `is_common`, `is_absolute`, `is_local`, and `is_weak` to `read::elf::Sym`. + [#685](https://github.com/gimli-rs/object/pull/685) + +### Changed + +* Changed `read::ArchiveFile` to skip the `` member. + [#669](https://github.com/gimli-rs/object/pull/669) + +* Fixed handling of segment data in the dyld shared cache. + [#673](https://github.com/gimli-rs/object/pull/673) + +* Changed `read::RelocationMap` to handle Mach-O section relocations. + [#675](https://github.com/gimli-rs/object/pull/675) + +* Changed `read::elf::RelocationSections` to ignore relocations that apply to relocations. + [#680](https://github.com/gimli-rs/object/pull/680) + +* Removed a lifetime bound from an argument in `read::elf::SectionTable::section_name`, + `read::elf::SymbolTable::symbol_name`, and `read::elf::SymbolTable::symbol_section`. + [#681](https://github.com/gimli-rs/object/pull/681) + +-------------------------------------------------------------------------------- + +## 0.35.0 + +Released 2024/04/10. + +### Breaking changes + +* Moved the `'file` lifetime parameter from `read::Object` to its associated types. + [#655](https://github.com/gimli-rs/object/pull/655) + +### Added + +* Added support more section kinds in `build::elf`. + [#650](https://github.com/gimli-rs/object/pull/650) + +* Added thin archive support to `read::ArchiveFile`. + [#651](https://github.com/gimli-rs/object/pull/651) + +* Added `read::ReadCacheOps` and changed `read::ReadCache` bound from `Read + Seek` to `ReadCacheOps`. + [#652](https://github.com/gimli-rs/object/pull/652) + +* Added `read::ObjectSection::relocation_map` + [#654](https://github.com/gimli-rs/object/pull/654) + +* Added `read::ArchiveFile::symbols`. + [#658](https://github.com/gimli-rs/object/pull/658) + +* Added `BinaryFormat::native_object`. + [#661](https://github.com/gimli-rs/object/pull/661) + +### Changed + +* The minimum supported rust version for the `read` feature and its dependencies + has changed to 1.65.0. + [#655](https://github.com/gimli-rs/object/pull/655) + +* Fixed `sh_offset` handling for `SHT_NOBITS` sections in `build::elf`. + [#645](https://github.com/gimli-rs/object/pull/645) + +* Fixed handling of ELF files with dynamic symbols but no dynamic strings. + [#646](https://github.com/gimli-rs/object/pull/646) + +* Fixed potential panics in `read::WasmFile` due to invalid function indices. + [#649](https://github.com/gimli-rs/object/pull/649) + +* Fixed handling of Wasm components in `read::WasmFile`. + [#649](https://github.com/gimli-rs/object/pull/649) + +* Fixed `sh_entsize` for 32-bit hash sections in `write::elf`. + [#650](https://github.com/gimli-rs/object/pull/650) + +* Fixed `sh_size` for attribute sections in `build::elf`. + [#650](https://github.com/gimli-rs/object/pull/650) + +* Fixed `sh_info` for `SHT_DYNSYM` sections in `build::elf`. + [#650](https://github.com/gimli-rs/object/pull/650) + +* Fixed handling of dynamic relocations with invalid `sh_link` in `build::elf`. + [#650](https://github.com/gimli-rs/object/pull/650) + +* Fixed parsing of member names containing '/' in `read::ArchiveFile`. + [#657](https://github.com/gimli-rs/object/pull/657) + +* Fixed handling of load segment alignments in `build::elf::Builder::read`. + [#659](https://github.com/gimli-rs/object/pull/659) + +-------------------------------------------------------------------------------- + +## 0.34.0 + +Released 2024/03/11. + +### Breaking changes + +* Replaced `macho::DyldSubCacheInfo` with `macho::DyldSubCacheEntryV1`. + Changed the return type of `macho::DyldCacheHeader::subcaches`. + [#642](https://github.com/gimli-rs/object/pull/642) + +### Changed + +* Added `macho::DyldSubCacheEntryV2` and changed `read::macho::DyldCache` + to handle both versions. This is needed for macOS 13 and above. + [#642](https://github.com/gimli-rs/object/pull/642) + +-------------------------------------------------------------------------------- + +## 0.33.0 + +Released 2024/03/05. + +### Breaking changes + +* Deleted file format variants in `RelocationKind`. Replaced their usage + with `read::Relocation::flags` and `write::Relocation::flags`. + [#585](https://github.com/gimli-rs/object/pull/585) + +* Replaced `kind`, `encoding` and `size` fields in `write::Relocation` + with `RelocationFlags::Generic` in the `flags` field. + [#585](https://github.com/gimli-rs/object/pull/585) + +* Replaced `macho::FatHeader::parse`, `macho::FatHeader::parse_arch32`, + and `macho::FatHeader::parse_arch64` with `read::macho::MachOFatFile`, + `read::macho::MachOFatFile32` and `read::macho::MachOFatFile64`. + [#623](https://github.com/gimli-rs/object/pull/623) + +### Added + +* Added `macho::PLATFORM_XROS` and `macho::PLATFORM_XROSSIMULATOR`. + [#626](https://github.com/gimli-rs/object/pull/626) + +* Added `build::elf::Builder` and associated types. + Extended `write::elf::Writer` to support this. + [#618](https://github.com/gimli-rs/object/pull/618) + +### Changed + +* Changed the lifetime to `'data` for the return value of `ObjectSection::name`, + `ObjectSection::name_bytes`, `ObjectComdat::name`, `ObjectComdat::name_bytes`. + [#620](https://github.com/gimli-rs/object/pull/620) + [#622](https://github.com/gimli-rs/object/pull/622) + +* Checked that sizes are smaller than the file length in `read::ReadCache`. + [#630](https://github.com/gimli-rs/object/pull/630) + +* Used `Vec::try_reserve_exact` for large allocations. + [#632](https://github.com/gimli-rs/object/pull/632) + +-------------------------------------------------------------------------------- + +## 0.32.2 + +Released 2023/12/24. + +### Added + +* Added ELF relocations for LoongArch ABI v2.20. + [#578](https://github.com/gimli-rs/object/pull/578) + [#589](https://github.com/gimli-rs/object/pull/589) + +* Added ELF support for SHARC. + [#593](https://github.com/gimli-rs/object/pull/593) + +* Added `write::coff::Writer`. + [#595](https://github.com/gimli-rs/object/pull/595) + +* Added `SubArchitecture::Arm64EC` support for PE/COFF. + [#607](https://github.com/gimli-rs/object/pull/607) + +* Added `SubArchitecture::Arm64E` support for Mach-O. + [#614](https://github.com/gimli-rs/object/pull/614) + +* Added `read::Object::symbol_by_name` and `read::Object::symbol_by_name_bytes`. + [#602](https://github.com/gimli-rs/object/pull/602) + +* Added more functions to the low level API in `read::xcoff`. + [#608](https://github.com/gimli-rs/object/pull/608) + +* Added more functions to the low level API in `read::macho`. + [#584](https://github.com/gimli-rs/object/pull/584) + +### Changed + +* Fixes for AArch64 relocation addends for Mach-O. + [#581](https://github.com/gimli-rs/object/pull/581) + +* Changes to `write::Object` output for Mach-O, including the addition of a `LC_DYSYMTAB` load command. + [#584](https://github.com/gimli-rs/object/pull/584) + +* Changed `write::Object` to always use `R_X86_64_PLT32` for x86-64 branches for ELF. + [#590](https://github.com/gimli-rs/object/pull/590) + +* Fixed `read::ObjectSymbol::kind` for undefined section symbols for COFF. + [#592](https://github.com/gimli-rs/object/pull/592) + +* Fixed `write::Object` to accept undefined section symbols for COFF. + [#594](https://github.com/gimli-rs/object/pull/594) + +* Improved parsing of auxiliary section symbols for COFF. + [#603](https://github.com/gimli-rs/object/pull/603) + +* Improved the selection of symbols for `read::Object::symbol_map`. + This includes changes to `read::Symbol::is_definition`. + [#601](https://github.com/gimli-rs/object/pull/601) + [#606](https://github.com/gimli-rs/object/pull/606) + +* Changed `read::ObjectSymbol::kind` for ELF `STT_NOTYPE` symbols to `SymbolKind::Unknown`. + [#604](https://github.com/gimli-rs/object/pull/604) + +* Changed `read::ObjectSymbol::scope` for XCOFF `C_HIDEXT` symbols to `SymbolScope::Compilation`. + [#605](https://github.com/gimli-rs/object/pull/605) + +-------------------------------------------------------------------------------- + +## 0.32.1 + +Released 2023/09/03. + +### Added + +* Added `write::Object::set_macho_cpu_subtype`. + [#574](https://github.com/gimli-rs/object/pull/574) + +-------------------------------------------------------------------------------- + ## 0.32.0 Released 2023/08/12. diff --git a/third_party/rust/object/Cargo.toml b/third_party/rust/object/Cargo.toml index 385666a10bc6..4e6a44b43691 100644 --- a/third_party/rust/object/Cargo.toml +++ b/third_party/rust/object/Cargo.toml @@ -11,12 +11,17 @@ [package] edition = "2018" -rust-version = "1.60" +rust-version = "1.65" name = "object" -version = "0.32.0" -exclude = [ - "/.github", - "/testfiles", +version = "0.36.1" +include = [ + "/Cargo.toml", + "/CHANGELOG.md", + "/README.md", + "/LICENSE-APACHE", + "/LICENSE-MIT", + "/src", + "/tests", ] description = "A unified interface for reading and writing object file formats." readme = "README.md" @@ -73,22 +78,33 @@ version = "2.4.1" default-features = false [dependencies.ruzstd] -version = "0.4.0" +version = "0.7.0" optional = true [dependencies.wasmparser] -version = "0.110.0" +version = "0.212.0" optional = true +default-features = false [features] all = [ "read", "write", + "build", "std", "compression", "wasm", ] archive = [] +build = [ + "build_core", + "write_std", + "elf", +] +build_core = [ + "read_core", + "write_core", +] cargo-all = [] coff = [] compression = [ @@ -103,6 +119,7 @@ default = [ doc = [ "read_core", "write_std", + "build_core", "std", "compression", "archive", diff --git a/third_party/rust/object/README.md b/third_party/rust/object/README.md index 5b90fca306c7..620816d098d5 100644 --- a/third_party/rust/object/README.md +++ b/third_party/rust/object/README.md @@ -11,7 +11,14 @@ For reading files, it provides multiple levels of support: * a higher level unified API for accessing common features of object files, such as sections and symbols ([example](crates/examples/src/objdump.rs)) -Supported file formats: ELF, Mach-O, Windows PE/COFF, Wasm, XCOFF, and Unix archive. +Supported file formats for reading: ELF, Mach-O, Windows PE/COFF, Wasm, XCOFF, and Unix archive. + +For writing files, it provides: + +* low level writers for ELF, PE, and COFF +* higher level builder for ELF ([example](crates/rewrite/src)) +* a unified API for writing relocatable object files (ELF, Mach-O, COFF, XCOFF) + ([example](crates/examples/src/bin/simple_write.rs)) ## Example for unified read API ```rust @@ -19,16 +26,14 @@ use object::{Object, ObjectSection}; use std::error::Error; use std::fs; -/// Reads a file and displays the content of the ".boot" section. +/// Reads a file and displays the name of each section. fn main() -> Result<(), Box> { - let bin_data = fs::read("./multiboot2-binary.elf")?; - let obj_file = object::File::parse(&*bin_data)?; - if let Some(section) = obj_file.section_by_name(".boot") { - println!("{:#x?}", section.data()?); - } else { - eprintln!("section not available"); - } - Ok(()) + let binary_data = fs::read("path/to/binary")?; + let file = object::File::parse(&*binary_data)?; + for section in file.sections() { + println!("{}", section.name()?); + } + Ok(()) } ``` @@ -37,10 +42,7 @@ See [`crates/examples`](crates/examples) for more examples. ## Minimum Supported Rust Version (MSRV) Changes to MSRV are considered breaking changes. We are conservative about changing the MSRV, -but sometimes are required to due to dependencies. The MSRV is: - - * 1.60.0 for the `read` feature and its dependencies. - * 1.65.0 for other features. +but sometimes are required to due to dependencies. The MSRV is 1.65.0. ## License diff --git a/third_party/rust/object/clippy.toml b/third_party/rust/object/clippy.toml deleted file mode 100644 index 16caf02ee919..000000000000 --- a/third_party/rust/object/clippy.toml +++ /dev/null @@ -1 +0,0 @@ -msrv = "1.60.0" diff --git a/third_party/rust/object/src/build/bytes.rs b/third_party/rust/object/src/build/bytes.rs new file mode 100644 index 000000000000..2363558584b6 --- /dev/null +++ b/third_party/rust/object/src/build/bytes.rs @@ -0,0 +1,146 @@ +use alloc::borrow::Cow; +use alloc::string::String; +use alloc::vec::Vec; +use core::fmt; + +/// A byte slice. +/// +/// Uses copy-on-write to avoid unnecessary allocations. The bytes can be +/// accessed as a slice using the `Deref` trait, or as a mutable `Vec` using the +/// `to_mut` method. +/// +/// Provides a `Debug` implementation that shows the first 8 bytes and the length. +#[derive(Default, Clone, PartialEq, Eq)] +pub struct Bytes<'a>(Cow<'a, [u8]>); + +impl<'a> Bytes<'a> { + /// Acquire a mutable reference to the bytes. + /// + /// Clones the bytes if they are shared. + pub fn to_mut(&mut self) -> &mut Vec { + self.0.to_mut() + } + + /// Get the bytes as a slice. + pub fn as_slice(&self) -> &[u8] { + self.0.as_ref() + } +} + +impl<'a> core::ops::Deref for Bytes<'a> { + type Target = [u8]; + fn deref(&self) -> &[u8] { + self.0.deref() + } +} + +impl<'a> From<&'a [u8]> for Bytes<'a> { + fn from(bytes: &'a [u8]) -> Self { + Bytes(Cow::Borrowed(bytes)) + } +} + +impl<'a> From> for Bytes<'a> { + fn from(bytes: Vec) -> Self { + Bytes(Cow::Owned(bytes)) + } +} + +impl<'a> fmt::Debug for Bytes<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + debug_list_bytes(&self.0, f) + } +} + +// Only for Debug impl of `Bytes`. +fn debug_list_bytes(bytes: &[u8], fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut list = fmt.debug_list(); + list.entries(bytes.iter().take(8).copied().map(DebugByte)); + if bytes.len() > 8 { + list.entry(&DebugLen(bytes.len())); + } + list.finish() +} + +struct DebugByte(u8); + +impl fmt::Debug for DebugByte { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "0x{:02x}", self.0) + } +} + +struct DebugLen(usize); + +impl fmt::Debug for DebugLen { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "...; {}", self.0) + } +} + +/// A byte slice that is a string of an unknown encoding. +/// +/// Uses copy-on-write to avoid unnecessary allocations. The bytes can be +/// accessed as a slice using the `Deref` trait, or as a mutable `Vec` using the +/// `to_mut` method. +/// +/// Provides a `Debug` implementation that interprets the bytes as UTF-8. +#[derive(Default, Clone, PartialEq, Eq, Hash)] +pub struct ByteString<'a>(Cow<'a, [u8]>); + +impl<'a> ByteString<'a> { + /// Acquire a mutable reference to the bytes. + /// + /// Clones the bytes if they are shared. + pub fn to_mut(&mut self) -> &mut Vec { + self.0.to_mut() + } + + /// Get the bytes as a slice. + pub fn as_slice(&self) -> &[u8] { + self.0.as_ref() + } +} + +impl<'a> core::borrow::Borrow<[u8]> for ByteString<'a> { + fn borrow(&self) -> &[u8] { + self.0.borrow() + } +} + +impl<'a> core::ops::Deref for ByteString<'a> { + type Target = [u8]; + fn deref(&self) -> &[u8] { + self.0.deref() + } +} + +impl<'a> From<&'a [u8]> for ByteString<'a> { + fn from(bytes: &'a [u8]) -> Self { + ByteString(Cow::Borrowed(bytes)) + } +} + +impl<'a> From> for ByteString<'a> { + fn from(bytes: Vec) -> Self { + ByteString(Cow::Owned(bytes)) + } +} + +impl<'a> From<&'a str> for ByteString<'a> { + fn from(s: &'a str) -> Self { + ByteString(Cow::Borrowed(s.as_bytes())) + } +} + +impl<'a> fmt::Debug for ByteString<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "\"{}\"", String::from_utf8_lossy(&self.0)) + } +} + +impl<'a> fmt::Display for ByteString<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "{}", String::from_utf8_lossy(&self.0)) + } +} diff --git a/third_party/rust/object/src/build/elf.rs b/third_party/rust/object/src/build/elf.rs new file mode 100644 index 000000000000..6030a6043227 --- /dev/null +++ b/third_party/rust/object/src/build/elf.rs @@ -0,0 +1,3092 @@ +//! This module provides a [`Builder`] for reading, modifying, and then writing ELF files. +use alloc::vec::Vec; +use core::convert::TryInto; +use core::fmt; +use core::marker::PhantomData; +#[cfg(not(feature = "std"))] +use hashbrown::HashMap; +#[cfg(feature = "std")] +use std::collections::HashMap; + +use crate::build::{ByteString, Bytes, Error, Id, IdPrivate, Item, Result, Table}; +use crate::elf; +use crate::read::elf::{Dyn, FileHeader, ProgramHeader, Rela, SectionHeader, Sym}; +use crate::read::{self, FileKind, ReadRef}; +use crate::write; +use crate::Endianness; + +/// A builder for reading, modifying, and then writing ELF files. +/// +/// Public fields are available for modifying the values that will be written. +/// Methods are available to add elements to tables, and elements can be deleted +/// from tables by setting the `delete` field in the element. +#[derive(Debug)] +pub struct Builder<'data> { + /// The endianness. + /// + /// Used to set the data encoding when writing the ELF file. + pub endian: Endianness, + /// Whether file is 64-bit. + /// + /// Use to set the file class when writing the ELF file. + pub is_64: bool, + /// The alignment of [`elf::PT_LOAD`] segments. + /// + /// This is an informational field and is not used when writing the ELF file. + /// It can optionally be used when calling [`Segments::add_load_segment`]. + /// + /// It is determined heuristically when reading the ELF file. Currently, + /// if all load segments have the same alignment, that alignment is used, + /// otherwise it is set to 1. + pub load_align: u64, + /// The file header. + pub header: Header, + /// The segment table. + pub segments: Segments<'data>, + /// The section table. + pub sections: Sections<'data>, + /// The symbol table. + pub symbols: Symbols<'data>, + /// The dynamic symbol table. + pub dynamic_symbols: DynamicSymbols<'data>, + /// The base version for the GNU version definitions. + /// + /// This will be written as a version definition with index 1. + pub version_base: Option>, + /// The GNU version definitions and dependencies. + pub versions: Versions<'data>, + /// The filenames used in the GNU version definitions. + pub version_files: VersionFiles<'data>, + /// The bucket count parameter for the hash table. + pub hash_bucket_count: u32, + /// The bloom shift parameter for the GNU hash table. + pub gnu_hash_bloom_shift: u32, + /// The bloom count parameter for the GNU hash table. + pub gnu_hash_bloom_count: u32, + /// The bucket count parameter for the GNU hash table. + pub gnu_hash_bucket_count: u32, + marker: PhantomData<()>, +} + +impl<'data> Builder<'data> { + /// Create a new ELF builder. + pub fn new(endian: Endianness, is_64: bool) -> Self { + Self { + endian, + is_64, + load_align: 1, + header: Header::default(), + segments: Segments::new(), + sections: Sections::new(), + symbols: Symbols::new(), + dynamic_symbols: Symbols::new(), + version_base: None, + versions: Versions::new(), + version_files: VersionFiles::new(), + hash_bucket_count: 0, + gnu_hash_bloom_shift: 0, + gnu_hash_bloom_count: 0, + gnu_hash_bucket_count: 0, + marker: PhantomData, + } + } + + /// Read the ELF file from file data. + pub fn read>(data: R) -> Result { + match FileKind::parse(data)? { + FileKind::Elf32 => Self::read32(data), + FileKind::Elf64 => Self::read64(data), + #[allow(unreachable_patterns)] + _ => Err(Error::new("Not an ELF file")), + } + } + + /// Read a 32-bit ELF file from file data. + pub fn read32>(data: R) -> Result { + Self::read_file::, R>(data) + } + + /// Read a 64-bit ELF file from file data. + pub fn read64>(data: R) -> Result { + Self::read_file::, R>(data) + } + + fn read_file(data: R) -> Result + where + Elf: FileHeader, + R: ReadRef<'data>, + { + let header = Elf::parse(data)?; + let endian = header.endian()?; + let is_mips64el = header.is_mips64el(endian); + let section_strings_index = header.section_strings_index(endian, data)?; + let segments = header.program_headers(endian, data)?; + let sections = header.sections(endian, data)?; + let symbols = sections.symbols(endian, data, elf::SHT_SYMTAB)?; + let dynamic_symbols = sections.symbols(endian, data, elf::SHT_DYNSYM)?; + + let mut builder = Builder { + endian, + is_64: header.is_type_64(), + load_align: 0, + header: Header { + os_abi: header.e_ident().os_abi, + abi_version: header.e_ident().abi_version, + e_type: header.e_type(endian), + e_machine: header.e_machine(endian), + e_entry: header.e_entry(endian).into(), + e_flags: header.e_flags(endian), + e_phoff: header.e_phoff(endian).into(), + }, + segments: Segments::new(), + sections: Sections::new(), + symbols: Symbols::new(), + dynamic_symbols: Symbols::new(), + version_base: None, + versions: Versions::new(), + version_files: VersionFiles::new(), + hash_bucket_count: 0, + gnu_hash_bloom_shift: 0, + gnu_hash_bloom_count: 0, + gnu_hash_bucket_count: 0, + marker: PhantomData, + }; + + for segment in segments { + if segment.p_type(endian) == elf::PT_LOAD { + let p_align = segment.p_align(endian).into(); + if builder.load_align == 0 { + builder.load_align = p_align; + } else if builder.load_align != p_align { + builder.load_align = 1; + } + } + + let id = builder.segments.next_id(); + builder.segments.push(Segment { + id, + delete: false, + p_type: segment.p_type(endian), + p_flags: segment.p_flags(endian), + p_offset: segment.p_offset(endian).into(), + p_vaddr: segment.p_vaddr(endian).into(), + p_paddr: segment.p_paddr(endian).into(), + p_filesz: segment.p_filesz(endian).into(), + p_memsz: segment.p_memsz(endian).into(), + p_align: segment.p_align(endian).into(), + sections: Vec::new(), + marker: PhantomData, + }); + } + if builder.load_align == 0 { + builder.load_align = 1; + } + + for (index, section) in sections.enumerate().skip(1) { + let id = SectionId(index.0 - 1); + let relocations = if let Some((rels, link)) = section.rel(endian, data)? { + Self::read_relocations( + index, + endian, + is_mips64el, + section, + rels, + link, + &symbols, + &dynamic_symbols, + )? + } else if let Some((rels, link)) = section.rela(endian, data)? { + Self::read_relocations( + index, + endian, + is_mips64el, + section, + rels, + link, + &symbols, + &dynamic_symbols, + )? + } else { + SectionData::Data(Bytes::default()) + }; + if let Some(hash) = section.hash_header(endian, data)? { + builder.hash_bucket_count = hash.bucket_count.get(endian); + } + if let Some(hash) = section.gnu_hash_header(endian, data)? { + builder.gnu_hash_bloom_shift = hash.bloom_shift.get(endian); + builder.gnu_hash_bloom_count = hash.bloom_count.get(endian); + builder.gnu_hash_bucket_count = hash.bucket_count.get(endian); + } + let data = match section.sh_type(endian) { + elf::SHT_NOBITS => SectionData::UninitializedData(section.sh_size(endian).into()), + elf::SHT_PROGBITS + | elf::SHT_INIT_ARRAY + | elf::SHT_FINI_ARRAY + | elf::SHT_PREINIT_ARRAY => SectionData::Data(section.data(endian, data)?.into()), + elf::SHT_REL | elf::SHT_RELA => relocations, + elf::SHT_SYMTAB => { + if index == symbols.section() { + SectionData::Symbol + } else { + return Err(Error(format!( + "Unsupported SHT_SYMTAB section at index {}", + index + ))); + } + } + elf::SHT_SYMTAB_SHNDX => { + if index == symbols.shndx_section() { + SectionData::SymbolSectionIndex + } else { + return Err(Error(format!( + "Unsupported SHT_SYMTAB_SHNDX section at index {}", + index + ))); + } + } + elf::SHT_DYNSYM => { + if index == dynamic_symbols.section() { + SectionData::DynamicSymbol + } else { + return Err(Error(format!( + "Unsupported SHT_DYNSYM section at index {}", + index + ))); + } + } + elf::SHT_STRTAB => { + if index == symbols.string_section() { + SectionData::String + } else if index == dynamic_symbols.string_section() { + SectionData::DynamicString + } else if index == section_strings_index { + SectionData::SectionString + } else { + return Err(Error(format!( + "Unsupported SHT_STRTAB section at index {}", + index + ))); + } + } + elf::SHT_NOTE => SectionData::Note(section.data(endian, data)?.into()), + elf::SHT_DYNAMIC => { + let (dyns, link) = section.dynamic(endian, data)?.unwrap(); + let dynamic_strings = sections.strings(endian, data, link)?; + Self::read_dynamics::(endian, dyns, dynamic_strings)? + } + elf::SHT_GNU_ATTRIBUTES => { + let attributes = section.attributes(endian, data)?; + Self::read_attributes(index, attributes, sections.len(), symbols.len())? + } + elf::SHT_HASH => SectionData::Hash, + elf::SHT_GNU_HASH => SectionData::GnuHash, + elf::SHT_GNU_VERSYM => SectionData::GnuVersym, + elf::SHT_GNU_VERDEF => SectionData::GnuVerdef, + elf::SHT_GNU_VERNEED => SectionData::GnuVerneed, + other => match (builder.header.e_machine, other) { + (elf::EM_ARM, elf::SHT_ARM_ATTRIBUTES) + | (elf::EM_AARCH64, elf::SHT_AARCH64_ATTRIBUTES) + | (elf::EM_CSKY, elf::SHT_CSKY_ATTRIBUTES) + | (elf::EM_RISCV, elf::SHT_RISCV_ATTRIBUTES) => { + let attributes = section.attributes(endian, data)?; + Self::read_attributes(index, attributes, sections.len(), symbols.len())? + } + // Some section types that we can't parse but that are safe to copy. + // Lots of types missing, add as needed. We can't default to copying + // everything because some types are not safe to copy. + (elf::EM_ARM, elf::SHT_ARM_EXIDX) + | (elf::EM_IA_64, elf::SHT_IA_64_UNWIND) + | (elf::EM_MIPS, elf::SHT_MIPS_REGINFO) + | (elf::EM_MIPS, elf::SHT_MIPS_DWARF) + | (elf::EM_X86_64, elf::SHT_X86_64_UNWIND) => { + SectionData::Data(section.data(endian, data)?.into()) + } + _ => return Err(Error(format!("Unsupported section type {:x}", other))), + }, + }; + let sh_flags = section.sh_flags(endian).into(); + let sh_link = section.sh_link(endian); + let sh_link_section = if sh_link == 0 { + None + } else { + if sh_link as usize >= sections.len() { + return Err(Error(format!( + "Invalid sh_link {} in section at index {}", + sh_link, index + ))); + } + Some(SectionId(sh_link as usize - 1)) + }; + let sh_info = section.sh_info(endian); + let sh_info_section = if sh_info == 0 || sh_flags & u64::from(elf::SHF_INFO_LINK) == 0 { + None + } else { + if sh_info as usize >= sections.len() { + return Err(Error(format!( + "Invalid sh_info link {} in section at index {}", + sh_info, index + ))); + } + Some(SectionId(sh_info as usize - 1)) + }; + let sh_flags = section.sh_flags(endian).into(); + let sh_addr = section.sh_addr(endian).into(); + if sh_flags & u64::from(elf::SHF_ALLOC) != 0 { + for segment in &mut builder.segments { + if segment.contains_address(sh_addr) { + segment.sections.push(id); + } + } + } + builder.sections.push(Section { + id, + delete: false, + name: sections.section_name(endian, section)?.into(), + sh_type: section.sh_type(endian), + sh_flags, + sh_addr, + sh_offset: section.sh_offset(endian).into(), + sh_size: section.sh_size(endian).into(), + sh_link_section, + sh_info, + sh_info_section, + sh_addralign: section.sh_addralign(endian).into(), + sh_entsize: section.sh_entsize(endian).into(), + data, + }); + } + + Self::read_symbols( + endian, + &symbols, + &mut builder.symbols, + builder.sections.len(), + )?; + Self::read_symbols( + endian, + &dynamic_symbols, + &mut builder.dynamic_symbols, + builder.sections.len(), + )?; + builder.read_gnu_versions(endian, data, §ions, &dynamic_symbols)?; + + Ok(builder) + } + + #[allow(clippy::too_many_arguments)] + fn read_relocations( + index: read::SectionIndex, + endian: Elf::Endian, + is_mips64el: bool, + section: &'data Elf::SectionHeader, + rels: &'data [Rel], + link: read::SectionIndex, + symbols: &read::elf::SymbolTable<'data, Elf, R>, + dynamic_symbols: &read::elf::SymbolTable<'data, Elf, R>, + ) -> Result> + where + Elf: FileHeader, + Rel: Copy + Into, + R: ReadRef<'data>, + { + if link == dynamic_symbols.section() { + Self::read_relocations_impl::( + index, + endian, + is_mips64el, + rels, + dynamic_symbols.len(), + ) + .map(SectionData::DynamicRelocation) + } else if link.0 == 0 || section.sh_flags(endian).into() & u64::from(elf::SHF_ALLOC) != 0 { + // If there's no link, then none of the relocations may reference symbols. + // Assume that these are dynamic relocations, but don't use the dynamic + // symbol table when parsing. + // + // Additionally, sometimes there is an allocated section that links to + // the static symbol table. We don't currently support this case in general, + // but if none of the relocation entries reference a symbol then it is + // safe to treat it as a dynamic relocation section. + // + // For both of these cases, if there is a reference to a symbol then + // an error will be returned when parsing the relocations. + Self::read_relocations_impl::(index, endian, is_mips64el, rels, 0) + .map(SectionData::DynamicRelocation) + } else if link == symbols.section() { + Self::read_relocations_impl::( + index, + endian, + is_mips64el, + rels, + symbols.len(), + ) + .map(SectionData::Relocation) + } else { + return Err(Error(format!( + "Invalid sh_link {} in relocation section at index {}", + link.0, index, + ))); + } + } + + fn read_relocations_impl( + index: read::SectionIndex, + endian: Elf::Endian, + is_mips64el: bool, + rels: &'data [Rel], + symbols_len: usize, + ) -> Result>> + where + Elf: FileHeader, + Rel: Copy + Into, + { + let mut relocations = Vec::new(); + for rel in rels { + let rel = (*rel).into(); + let symbol = if let Some(symbol) = rel.symbol(endian, is_mips64el) { + if symbol.0 >= symbols_len { + return Err(Error(format!( + "Invalid symbol index {} in relocation section at index {}", + symbol, index, + ))); + } + Some(SymbolId(symbol.0 - 1)) + } else { + None + }; + relocations.push(Relocation { + r_offset: rel.r_offset(endian).into(), + symbol, + r_type: rel.r_type(endian, is_mips64el), + r_addend: rel.r_addend(endian).into(), + }); + } + Ok(relocations) + } + + fn read_dynamics( + endian: Elf::Endian, + dyns: &'data [Elf::Dyn], + strings: read::StringTable<'data, R>, + ) -> Result> + where + Elf: FileHeader, + R: ReadRef<'data>, + { + let mut dynamics = Vec::with_capacity(dyns.len()); + for d in dyns { + let tag = d.d_tag(endian).into().try_into().map_err(|_| { + Error(format!( + "Unsupported dynamic tag 0x{:x}", + d.d_tag(endian).into() + )) + })?; + if tag == elf::DT_NULL { + break; + } + let val = d.d_val(endian).into(); + dynamics.push(if d.is_string(endian) { + let val = + strings + .get(val.try_into().map_err(|_| { + Error(format!("Unsupported dynamic string 0x{:x}", val)) + })?) + .map_err(|_| Error(format!("Invalid dynamic string 0x{:x}", val)))?; + Dynamic::String { + tag, + val: val.into(), + } + } else { + match tag { + elf::DT_SYMTAB + | elf::DT_STRTAB + | elf::DT_STRSZ + | elf::DT_HASH + | elf::DT_GNU_HASH + | elf::DT_VERSYM + | elf::DT_VERDEF + | elf::DT_VERDEFNUM + | elf::DT_VERNEED + | elf::DT_VERNEEDNUM => Dynamic::Auto { tag }, + _ => Dynamic::Integer { tag, val }, + } + }); + } + Ok(SectionData::Dynamic(dynamics)) + } + + fn read_symbols( + endian: Elf::Endian, + symbols: &read::elf::SymbolTable<'data, Elf, R>, + builder_symbols: &mut Symbols<'data, DYNAMIC>, + sections_len: usize, + ) -> Result<()> + where + Elf: FileHeader, + R: ReadRef<'data>, + { + for (index, symbol) in symbols.enumerate().skip(1) { + let id = SymbolId(index.0 - 1); + let section = + if let Some(section_index) = symbols.symbol_section(endian, symbol, index)? { + let section_id = section_index.0.wrapping_sub(1); + if section_id >= sections_len { + return Err(Error::new("Invalid symbol section index")); + } + Some(SectionId(section_id)) + } else { + None + }; + builder_symbols.push(Symbol { + id, + delete: false, + name: symbols.symbol_name(endian, symbol)?.into(), + section, + st_info: symbol.st_info(), + st_other: symbol.st_other(), + st_shndx: symbol.st_shndx(endian), + st_value: symbol.st_value(endian).into(), + st_size: symbol.st_size(endian).into(), + version: VersionId::local(), + version_hidden: false, + }); + } + Ok(()) + } + + fn read_attributes( + index: read::SectionIndex, + attributes: read::elf::AttributesSection<'data, Elf>, + sections_len: usize, + symbols_len: usize, + ) -> Result> + where + Elf: FileHeader, + { + let mut builder_attributes = AttributesSection::new(); + let mut subsections = attributes.subsections()?; + while let Some(subsection) = subsections.next()? { + let mut builder_subsection = AttributesSubsection::new(subsection.vendor().into()); + let mut subsubsections = subsection.subsubsections(); + while let Some(subsubsection) = subsubsections.next()? { + let tag = match subsubsection.tag() { + elf::Tag_File => AttributeTag::File, + elf::Tag_Section => { + let mut tag_sections = Vec::new(); + let mut indices = subsubsection.indices(); + while let Some(index) = indices.next()? { + let index = index as usize; + if index >= sections_len { + return Err(Error(format!( + "Invalid section index {} in attribute", + index + ))); + } + tag_sections.push(SectionId(index - 1)); + } + AttributeTag::Section(tag_sections) + } + elf::Tag_Symbol => { + let mut tag_symbols = Vec::new(); + let mut indices = subsubsection.indices(); + while let Some(index) = indices.next()? { + let index = index as usize; + if index >= symbols_len { + return Err(Error(format!( + "Invalid symbol index {} in attribute", + index + ))); + } + tag_symbols.push(SymbolId(index - 1)); + } + AttributeTag::Symbol(tag_symbols) + } + tag => { + return Err(Error(format!( + "Unsupported attribute tag 0x{:x} in section at index {}", + tag, index, + ))) + } + }; + let data = subsubsection.attributes_data().into(); + builder_subsection + .subsubsections + .push(AttributesSubsubsection { tag, data }); + } + builder_attributes.subsections.push(builder_subsection); + } + Ok(SectionData::Attributes(builder_attributes)) + } + + fn read_gnu_versions( + &mut self, + endian: Elf::Endian, + data: R, + sections: &read::elf::SectionTable<'data, Elf, R>, + dynamic_symbols: &read::elf::SymbolTable<'data, Elf, R>, + ) -> Result<()> + where + Elf: FileHeader, + R: ReadRef<'data>, + { + let strings = dynamic_symbols.strings(); + let mut ids = HashMap::new(); + ids.insert(0, VersionId::local()); + ids.insert(1, VersionId::global()); + + if let Some((mut verdefs, link)) = sections.gnu_verdef(endian, data)? { + if link != dynamic_symbols.string_section() { + return Err(Error::new("Invalid SHT_GNU_VERDEF section")); + } + while let Some((verdef, mut verdauxs)) = verdefs.next()? { + let flags = verdef.vd_flags.get(endian); + if flags & elf::VER_FLG_BASE != 0 { + if flags != elf::VER_FLG_BASE + || verdef.vd_ndx.get(endian) != 1 + || verdef.vd_cnt.get(endian) != 1 + { + return Err(Error::new("Unsupported VER_FLG_BASE in SHT_GNU_VERDEF")); + } + if self.version_base.is_some() { + return Err(Error::new("Duplicate VER_FLG_BASE in SHT_GNU_VERDEF")); + } + let verdaux = verdauxs.next()?.ok_or_else(|| { + Error::new("Missing name for VER_FLG_BASE in SHT_GNU_VERDEF") + })?; + self.version_base = Some(verdaux.name(endian, strings)?.into()); + continue; + } + + let index = verdef.vd_ndx.get(endian) & elf::VERSYM_VERSION; + let id = self.versions.next_id(); + if ids.insert(index, id).is_some() { + return Err(Error(format!("Duplicate SHT_GNU_VERDEF index {}", index))); + } + + let mut names = Vec::new(); + while let Some(verdaux) = verdauxs.next()? { + names.push(verdaux.name(endian, strings)?.into()); + } + + let data = VersionData::Def(VersionDef { flags, names }); + self.versions.push(Version { + id, + delete: false, + data, + }); + } + } + + if let Some((mut verneeds, link)) = sections.gnu_verneed(endian, data)? { + if link != dynamic_symbols.string_section() { + return Err(Error::new("Invalid SHT_GNU_VERNEED section")); + } + while let Some((verneed, mut vernauxs)) = verneeds.next()? { + let file = VersionFileId(self.version_files.len()); + self.version_files.push(VersionFile { + id: file, + delete: false, + name: verneed.file(endian, strings)?.into(), + }); + while let Some(vernaux) = vernauxs.next()? { + let index = vernaux.vna_other.get(endian) & elf::VERSYM_VERSION; + let id = self.versions.next_id(); + if ids.insert(index, id).is_some() { + return Err(Error(format!("Duplicate SHT_GNU_VERNEED index {}", index))); + } + + let data = VersionData::Need(VersionNeed { + flags: vernaux.vna_flags.get(endian), + name: vernaux.name(endian, strings)?.into(), + file, + }); + self.versions.push(Version { + id, + delete: false, + data, + }); + } + } + } + + if let Some((versyms, link)) = sections.gnu_versym(endian, data)? { + if versyms.len() != dynamic_symbols.len() || link != dynamic_symbols.section() { + return Err(Error::new("Invalid SHT_GNU_VERSYM section")); + } + for (id, versym) in versyms.iter().skip(1).enumerate() { + let index = versym.0.get(endian); + let symbol = self.dynamic_symbols.get_mut(SymbolId(id)); + symbol.version = *ids + .get(&(index & elf::VERSYM_VERSION)) + .ok_or_else(|| Error(format!("Invalid SHT_GNU_VERSYM index {:x}", index)))?; + symbol.version_hidden = index & elf::VERSYM_HIDDEN != 0; + } + } + Ok(()) + } + + /// Write the ELF file to the buffer. + pub fn write(mut self, buffer: &mut dyn write::WritableBuffer) -> Result<()> { + struct SectionOut { + id: SectionId, + name: Option, + offset: usize, + attributes: Vec, + } + + struct SymbolOut { + id: SymbolId, + name: Option, + } + + struct DynamicSymbolOut { + id: DynamicSymbolId, + name: Option, + hash: Option, + gnu_hash: Option, + } + + #[derive(Default, Clone)] + struct VersionFileOut { + versions: Vec, + } + + // TODO: require the caller to do this? + self.delete_orphans(); + self.delete_unused_versions(); + + let mut writer = write::elf::Writer::new(self.endian, self.is_64, buffer); + + // Find metadata sections, and assign section indices. + let mut shstrtab_id = None; + let mut symtab_id = None; + let mut symtab_shndx_id = None; + let mut strtab_id = None; + let mut dynsym_id = None; + let mut dynstr_id = None; + let mut hash_id = None; + let mut gnu_hash_id = None; + let mut gnu_versym_id = None; + let mut gnu_verdef_id = None; + let mut gnu_verneed_id = None; + let mut out_sections = Vec::with_capacity(self.sections.len()); + let mut out_sections_index = vec![None; self.sections.len()]; + if !self.sections.is_empty() { + writer.reserve_null_section_index(); + } + for section in &self.sections { + let index = match §ion.data { + SectionData::Data(_) + | SectionData::UninitializedData(_) + | SectionData::Relocation(_) + | SectionData::DynamicRelocation(_) + | SectionData::Note(_) + | SectionData::Dynamic(_) + | SectionData::Attributes(_) => writer.reserve_section_index(), + SectionData::SectionString => { + if shstrtab_id.is_some() { + return Err(Error::new("Multiple .shstrtab sections")); + } + shstrtab_id = Some(section.id); + writer.reserve_shstrtab_section_index_with_name(§ion.name) + } + SectionData::Symbol => { + if symtab_id.is_some() { + return Err(Error::new("Multiple .symtab sections")); + } + symtab_id = Some(section.id); + writer.reserve_symtab_section_index_with_name(§ion.name) + } + SectionData::SymbolSectionIndex => { + if symtab_shndx_id.is_some() { + return Err(Error::new("Multiple .symtab_shndx sections")); + } + symtab_shndx_id = Some(section.id); + writer.reserve_symtab_shndx_section_index_with_name(§ion.name) + } + SectionData::String => { + if strtab_id.is_some() { + return Err(Error::new("Multiple .strtab sections")); + } + strtab_id = Some(section.id); + writer.reserve_strtab_section_index_with_name(§ion.name) + } + SectionData::DynamicSymbol => { + if dynsym_id.is_some() { + return Err(Error::new("Multiple .dynsym sections")); + } + dynsym_id = Some(section.id); + writer.reserve_dynsym_section_index_with_name(§ion.name) + } + SectionData::DynamicString => { + if dynstr_id.is_some() { + return Err(Error::new("Multiple .dynstr sections")); + } + dynstr_id = Some(section.id); + writer.reserve_dynstr_section_index_with_name(§ion.name) + } + SectionData::Hash => { + if hash_id.is_some() { + return Err(Error::new("Multiple .hash sections")); + } + hash_id = Some(section.id); + writer.reserve_hash_section_index_with_name(§ion.name) + } + SectionData::GnuHash => { + if gnu_hash_id.is_some() { + return Err(Error::new("Multiple .gnu.hash sections")); + } + gnu_hash_id = Some(section.id); + writer.reserve_gnu_hash_section_index_with_name(§ion.name) + } + SectionData::GnuVersym => { + if gnu_versym_id.is_some() { + return Err(Error::new("Multiple .gnu.version sections")); + } + gnu_versym_id = Some(section.id); + writer.reserve_gnu_versym_section_index_with_name(§ion.name) + } + SectionData::GnuVerdef => { + if gnu_verdef_id.is_some() { + return Err(Error::new("Multiple .gnu.version_d sections")); + } + gnu_verdef_id = Some(section.id); + writer.reserve_gnu_verdef_section_index_with_name(§ion.name) + } + SectionData::GnuVerneed => { + if gnu_verneed_id.is_some() { + return Err(Error::new("Multiple .gnu.version_r sections")); + } + gnu_verneed_id = Some(section.id); + writer.reserve_gnu_verneed_section_index_with_name(§ion.name) + } + }; + out_sections_index[section.id.0] = Some(index); + + let name = if section.name.is_empty() { + None + } else { + Some(writer.add_section_name(§ion.name)) + }; + out_sections.push(SectionOut { + id: section.id, + name, + offset: 0, + attributes: Vec::new(), + }); + } + + // Assign dynamic strings. + for section in &self.sections { + if let SectionData::Dynamic(dynamics) = §ion.data { + for dynamic in dynamics { + if let Dynamic::String { val, .. } = dynamic { + writer.add_dynamic_string(val); + } + } + } + } + + // Assign dynamic symbol indices. + let mut out_dynsyms = Vec::with_capacity(self.dynamic_symbols.len()); + // Local symbols must come before global. + let local_symbols = self + .dynamic_symbols + .into_iter() + .filter(|symbol| symbol.st_bind() == elf::STB_LOCAL); + let global_symbols = self + .dynamic_symbols + .into_iter() + .filter(|symbol| symbol.st_bind() != elf::STB_LOCAL); + for symbol in local_symbols.chain(global_symbols) { + let mut name = None; + let mut hash = None; + let mut gnu_hash = None; + if !symbol.name.is_empty() { + name = Some(writer.add_dynamic_string(&symbol.name)); + if hash_id.is_some() { + hash = Some(elf::hash(&symbol.name)); + } + if gnu_hash_id.is_some() && symbol.section.is_some() { + gnu_hash = Some(elf::gnu_hash(&symbol.name)); + } + } + out_dynsyms.push(DynamicSymbolOut { + id: symbol.id, + name, + hash, + gnu_hash, + }); + } + let num_local_dynamic = out_dynsyms + .iter() + .take_while(|sym| self.dynamic_symbols.get(sym.id).st_bind() == elf::STB_LOCAL) + .count(); + // We must sort for GNU hash before allocating symbol indices. + let mut gnu_hash_symbol_count = 0; + if gnu_hash_id.is_some() { + if self.gnu_hash_bucket_count == 0 { + return Err(Error::new(".gnu.hash bucket count is zero")); + } + // TODO: recalculate bucket_count? + out_dynsyms[num_local_dynamic..].sort_by_key(|sym| match sym.gnu_hash { + None => (0, 0), + Some(hash) => (1, hash % self.gnu_hash_bucket_count), + }); + gnu_hash_symbol_count = out_dynsyms + .iter() + .skip(num_local_dynamic) + .skip_while(|sym| sym.gnu_hash.is_none()) + .count() as u32; + } + let mut out_dynsyms_index = vec![None; self.dynamic_symbols.len()]; + if dynsym_id.is_some() { + writer.reserve_null_dynamic_symbol_index(); + } + for out_dynsym in &mut out_dynsyms { + out_dynsyms_index[out_dynsym.id.0] = Some(writer.reserve_dynamic_symbol_index()); + } + + // Hash parameters. + let hash_index_base = 1; // Null symbol. + let hash_chain_count = hash_index_base + out_dynsyms.len() as u32; + + // GNU hash parameters. + let gnu_hash_index_base = if gnu_hash_symbol_count == 0 { + 0 + } else { + out_dynsyms.len() as u32 - gnu_hash_symbol_count + }; + let gnu_hash_symbol_base = gnu_hash_index_base + 1; // Null symbol. + + // Assign symbol indices. + let mut out_syms = Vec::with_capacity(self.symbols.len()); + // Local symbols must come before global. + let local_symbols = self + .symbols + .into_iter() + .filter(|symbol| symbol.st_bind() == elf::STB_LOCAL); + let global_symbols = self + .symbols + .into_iter() + .filter(|symbol| symbol.st_bind() != elf::STB_LOCAL); + for symbol in local_symbols.chain(global_symbols) { + let name = if symbol.name.is_empty() { + None + } else { + Some(writer.add_string(&symbol.name)) + }; + + out_syms.push(SymbolOut { + id: symbol.id, + name, + }); + } + let num_local = out_syms + .iter() + .take_while(|sym| self.symbols.get(sym.id).st_bind() == elf::STB_LOCAL) + .count(); + let mut out_syms_index = vec![None; self.symbols.len()]; + if symtab_id.is_some() { + writer.reserve_null_symbol_index(); + } + for out_sym in out_syms.iter_mut() { + out_syms_index[out_sym.id.0] = Some(writer.reserve_symbol_index(None)); + } + + // Count the versions and add version strings. + let mut verdef_count = 0; + let mut verdaux_count = 0; + let mut verneed_count = 0; + let mut vernaux_count = 0; + let mut out_version_files = vec![VersionFileOut::default(); self.version_files.len()]; + if let Some(version_base) = &self.version_base { + verdef_count += 1; + verdaux_count += 1; + writer.add_dynamic_string(version_base); + } + for version in &self.versions { + match &version.data { + VersionData::Def(def) => { + verdef_count += 1; + verdaux_count += def.names.len(); + for name in &def.names { + writer.add_dynamic_string(name); + } + } + VersionData::Need(need) => { + vernaux_count += 1; + writer.add_dynamic_string(&need.name); + out_version_files[need.file.0].versions.push(version.id); + } + } + } + for file in &self.version_files { + verneed_count += 1; + writer.add_dynamic_string(&file.name); + } + + // Build the attributes sections. + for out_section in &mut out_sections { + let SectionData::Attributes(attributes) = &self.sections.get(out_section.id).data + else { + continue; + }; + if attributes.subsections.is_empty() { + continue; + } + let mut writer = writer.attributes_writer(); + for subsection in &attributes.subsections { + writer.start_subsection(&subsection.vendor); + for subsubsection in &subsection.subsubsections { + writer.start_subsubsection(subsubsection.tag.tag()); + match &subsubsection.tag { + AttributeTag::File => {} + AttributeTag::Section(sections) => { + for id in sections { + if let Some(index) = out_sections_index[id.0] { + writer.write_subsubsection_index(index.0); + } + } + writer.write_subsubsection_index(0); + } + AttributeTag::Symbol(symbols) => { + for id in symbols { + if let Some(index) = out_syms_index[id.0] { + writer.write_subsubsection_index(index.0); + } + } + writer.write_subsubsection_index(0); + } + } + writer.write_subsubsection_attributes(&subsubsection.data); + writer.end_subsubsection(); + } + writer.end_subsection(); + } + out_section.attributes = writer.data(); + } + + // TODO: support section headers in strtab + if shstrtab_id.is_none() && !out_sections.is_empty() { + return Err(Error::new(".shstrtab section is needed but not present")); + } + if symtab_id.is_none() && !out_syms.is_empty() { + return Err(Error::new(".symtab section is needed but not present")); + } + if symtab_shndx_id.is_none() && writer.symtab_shndx_needed() { + return Err(Error::new( + ".symtab.shndx section is needed but not present", + )); + } else if symtab_shndx_id.is_some() { + writer.require_symtab_shndx(); + } + if strtab_id.is_none() && writer.strtab_needed() { + return Err(Error::new(".strtab section is needed but not present")); + } else if strtab_id.is_some() { + writer.require_strtab(); + } + if dynsym_id.is_none() && !out_dynsyms.is_empty() { + return Err(Error::new(".dynsym section is needed but not present")); + } + if dynstr_id.is_none() && writer.dynstr_needed() { + return Err(Error::new(".dynstr section is needed but not present")); + } else if dynstr_id.is_some() { + writer.require_dynstr(); + } + if gnu_verdef_id.is_none() && verdef_count > 0 { + return Err(Error::new( + ".gnu.version_d section is needed but not present", + )); + } + if gnu_verneed_id.is_none() && verneed_count > 0 { + return Err(Error::new( + ".gnu.version_r section is needed but not present", + )); + } + + // Start reserving file ranges. + writer.reserve_file_header(); + + let mut dynsym_addr = None; + let mut dynstr_addr = None; + let mut hash_addr = None; + let mut gnu_hash_addr = None; + let mut versym_addr = None; + let mut verdef_addr = None; + let mut verneed_addr = None; + + if !self.segments.is_empty() { + // TODO: support program headers in other locations. + if self.header.e_phoff != writer.reserved_len() as u64 { + return Err(Error(format!( + "Unsupported e_phoff value 0x{:x}", + self.header.e_phoff + ))); + } + writer.reserve_program_headers(self.segments.count() as u32); + } + + let mut alloc_sections = Vec::new(); + if !self.segments.is_empty() { + // Reserve alloc sections at original offsets. + alloc_sections = out_sections + .iter() + .enumerate() + .filter_map(|(index, out_section)| { + let section = self.sections.get(out_section.id); + if section.is_alloc() { + Some(index) + } else { + None + } + }) + .collect(); + // The data for alloc sections may need to be written in a different order + // from their section headers. + alloc_sections.sort_by_key(|index| { + let section = &self.sections.get(out_sections[*index].id); + // Empty sections need to come before other sections at the same offset. + (section.sh_offset, section.sh_size) + }); + for index in &alloc_sections { + let out_section = &mut out_sections[*index]; + let section = &self.sections.get(out_section.id); + + if section.sh_type == elf::SHT_NOBITS { + // sh_offset is meaningless for SHT_NOBITS, so preserve the input + // value without checking it. + out_section.offset = section.sh_offset as usize; + continue; + } + + if section.sh_offset < writer.reserved_len() as u64 { + return Err(Error(format!( + "Unsupported sh_offset value 0x{:x} for section '{}', expected at least 0x{:x}", + section.sh_offset, + section.name, + writer.reserved_len(), + ))); + } + // The input sh_offset needs to be preserved so that offsets in program + // headers are correct. + writer.reserve_until(section.sh_offset as usize); + out_section.offset = match §ion.data { + SectionData::Data(data) => { + writer.reserve(data.len(), section.sh_addralign as usize) + } + SectionData::DynamicRelocation(relocations) => writer + .reserve_relocations(relocations.len(), section.sh_type == elf::SHT_RELA), + SectionData::Note(data) => { + writer.reserve(data.len(), section.sh_addralign as usize) + } + SectionData::Dynamic(dynamics) => writer.reserve_dynamics(1 + dynamics.len()), + SectionData::DynamicSymbol => { + dynsym_addr = Some(section.sh_addr); + writer.reserve_dynsym() + } + SectionData::DynamicString => { + dynstr_addr = Some(section.sh_addr); + writer.reserve_dynstr() + } + SectionData::Hash => { + hash_addr = Some(section.sh_addr); + writer.reserve_hash(self.hash_bucket_count, hash_chain_count) + } + SectionData::GnuHash => { + gnu_hash_addr = Some(section.sh_addr); + writer.reserve_gnu_hash( + self.gnu_hash_bloom_count, + self.gnu_hash_bucket_count, + gnu_hash_symbol_count, + ) + } + SectionData::GnuVersym => { + versym_addr = Some(section.sh_addr); + writer.reserve_gnu_versym() + } + SectionData::GnuVerdef => { + verdef_addr = Some(section.sh_addr); + writer.reserve_gnu_verdef(verdef_count, verdaux_count) + } + SectionData::GnuVerneed => { + verneed_addr = Some(section.sh_addr); + writer.reserve_gnu_verneed(verneed_count, vernaux_count) + } + _ => { + return Err(Error(format!( + "Unsupported alloc section type {:x} for section '{}'", + section.sh_type, section.name, + ))); + } + }; + if out_section.offset as u64 != section.sh_offset { + return Err(Error(format!( + "Unaligned sh_offset value 0x{:x} for section '{}', expected 0x{:x}", + section.sh_offset, section.name, out_section.offset, + ))); + } + } + } + + // Reserve non-alloc sections at any offset. + for out_section in &mut out_sections { + let section = self.sections.get(out_section.id); + if !self.segments.is_empty() && section.is_alloc() { + continue; + } + out_section.offset = match §ion.data { + SectionData::Data(data) => { + writer.reserve(data.len(), section.sh_addralign as usize) + } + SectionData::UninitializedData(_) => writer.reserved_len(), + SectionData::Note(data) => { + writer.reserve(data.len(), section.sh_addralign as usize) + } + SectionData::Attributes(_) => { + writer.reserve(out_section.attributes.len(), section.sh_addralign as usize) + } + // These are handled elsewhere. + SectionData::Relocation(_) + | SectionData::SectionString + | SectionData::Symbol + | SectionData::SymbolSectionIndex + | SectionData::String => { + continue; + } + _ => { + return Err(Error(format!( + "Unsupported non-alloc section type {:x}", + section.sh_type + ))); + } + }; + } + + writer.reserve_symtab(); + writer.reserve_symtab_shndx(); + writer.reserve_strtab(); + + // Reserve non-alloc relocations. + for out_section in &mut out_sections { + let section = self.sections.get(out_section.id); + if !self.segments.is_empty() && section.is_alloc() { + continue; + } + let SectionData::Relocation(relocations) = §ion.data else { + continue; + }; + out_section.offset = + writer.reserve_relocations(relocations.len(), section.sh_type == elf::SHT_RELA); + } + + writer.reserve_shstrtab(); + writer.reserve_section_headers(); + + // Start writing. + writer.write_file_header(&write::elf::FileHeader { + os_abi: self.header.os_abi, + abi_version: self.header.abi_version, + e_type: self.header.e_type, + e_machine: self.header.e_machine, + e_entry: self.header.e_entry, + e_flags: self.header.e_flags, + })?; + + if !self.segments.is_empty() { + writer.write_align_program_headers(); + for segment in &self.segments { + writer.write_program_header(&write::elf::ProgramHeader { + p_type: segment.p_type, + p_flags: segment.p_flags, + p_offset: segment.p_offset, + p_vaddr: segment.p_vaddr, + p_paddr: segment.p_paddr, + p_filesz: segment.p_filesz, + p_memsz: segment.p_memsz, + p_align: segment.p_align, + }); + } + } + + // Write alloc sections. + if !self.segments.is_empty() { + for index in &alloc_sections { + let out_section = &mut out_sections[*index]; + let section = self.sections.get(out_section.id); + + if section.sh_type == elf::SHT_NOBITS { + continue; + } + + writer.pad_until(out_section.offset); + match §ion.data { + SectionData::Data(data) => { + writer.write(data); + } + SectionData::DynamicRelocation(relocations) => { + for rel in relocations { + let r_sym = if let Some(symbol) = rel.symbol { + out_dynsyms_index[symbol.0].unwrap().0 + } else { + 0 + }; + writer.write_relocation( + section.sh_type == elf::SHT_RELA, + &write::elf::Rel { + r_offset: rel.r_offset, + r_sym, + r_type: rel.r_type, + r_addend: rel.r_addend, + }, + ); + } + } + SectionData::Note(data) => { + writer.write(data); + } + SectionData::Dynamic(dynamics) => { + for d in dynamics { + match *d { + Dynamic::Auto { tag } => { + // TODO: support more values + let val = match tag { + elf::DT_SYMTAB => dynsym_addr.ok_or(Error::new( + "Missing .dynsym section for DT_SYMTAB", + ))?, + elf::DT_STRTAB => dynstr_addr.ok_or(Error::new( + "Missing .dynstr section for DT_STRTAB", + ))?, + elf::DT_STRSZ => writer.dynstr_len() as u64, + elf::DT_HASH => hash_addr.ok_or(Error::new( + "Missing .hash section for DT_HASH", + ))?, + elf::DT_GNU_HASH => gnu_hash_addr.ok_or(Error::new( + "Missing .gnu.hash section for DT_GNU_HASH", + ))?, + elf::DT_VERSYM => versym_addr.ok_or(Error::new( + "Missing .gnu.version section for DT_VERSYM", + ))?, + elf::DT_VERDEF => verdef_addr.ok_or(Error::new( + "Missing .gnu.version_d section for DT_VERDEF", + ))?, + elf::DT_VERDEFNUM => verdef_count as u64, + elf::DT_VERNEED => verneed_addr.ok_or(Error::new( + "Missing .gnu.version_r section for DT_VERNEED", + ))?, + elf::DT_VERNEEDNUM => verneed_count as u64, + _ => { + return Err(Error(format!( + "Cannot generate value for dynamic tag 0x{:x}", + tag + ))) + } + }; + writer.write_dynamic(tag, val); + } + Dynamic::Integer { tag, val } => { + writer.write_dynamic(tag, val); + } + Dynamic::String { tag, ref val } => { + let val = writer.get_dynamic_string(val); + writer.write_dynamic_string(tag, val); + } + } + } + writer.write_dynamic(elf::DT_NULL, 0); + } + SectionData::DynamicSymbol => { + writer.write_null_dynamic_symbol(); + for out_dynsym in &out_dynsyms { + let symbol = self.dynamic_symbols.get(out_dynsym.id); + let section = + symbol.section.map(|id| out_sections_index[id.0].unwrap()); + writer.write_dynamic_symbol(&write::elf::Sym { + name: out_dynsym.name, + section, + st_info: symbol.st_info, + st_other: symbol.st_other, + st_shndx: symbol.st_shndx, + st_value: symbol.st_value, + st_size: symbol.st_size, + }); + } + } + SectionData::DynamicString => { + writer.write_dynstr(); + } + SectionData::Hash => { + if self.hash_bucket_count == 0 { + return Err(Error::new(".hash bucket count is zero")); + } + writer.write_hash(self.hash_bucket_count, hash_chain_count, |index| { + out_dynsyms + .get(index.checked_sub(hash_index_base)? as usize)? + .hash + }); + } + SectionData::GnuHash => { + if self.gnu_hash_bucket_count == 0 { + return Err(Error::new(".gnu.hash bucket count is zero")); + } + writer.write_gnu_hash( + gnu_hash_symbol_base, + self.gnu_hash_bloom_shift, + self.gnu_hash_bloom_count, + self.gnu_hash_bucket_count, + gnu_hash_symbol_count, + |index| { + out_dynsyms[(gnu_hash_index_base + index) as usize] + .gnu_hash + .unwrap() + }, + ); + } + SectionData::GnuVersym => { + writer.write_null_gnu_versym(); + for out_dynsym in &out_dynsyms { + let symbol = self.dynamic_symbols.get(out_dynsym.id); + let mut index = symbol.version.0 as u16; + if symbol.version_hidden { + index |= elf::VERSYM_HIDDEN; + } + writer.write_gnu_versym(index); + } + } + SectionData::GnuVerdef => { + writer.write_align_gnu_verdef(); + if let Some(version_base) = &self.version_base { + writer.write_gnu_verdef(&write::elf::Verdef { + version: elf::VER_DEF_CURRENT, + flags: elf::VER_FLG_BASE, + index: 1, + aux_count: 1, + name: writer.get_dynamic_string(version_base), + }); + } + for version in &self.versions { + if let VersionData::Def(def) = &version.data { + let mut names = def.names.iter(); + let name = names.next().ok_or_else(|| { + Error(format!("Missing SHT_GNU_VERDEF name {}", version.id.0)) + })?; + writer.write_gnu_verdef(&write::elf::Verdef { + version: elf::VER_DEF_CURRENT, + flags: def.flags, + index: version.id.0 as u16, + aux_count: def.names.len() as u16, + name: writer.get_dynamic_string(name), + }); + for name in names { + writer.write_gnu_verdaux(writer.get_dynamic_string(name)); + } + } + } + } + SectionData::GnuVerneed => { + writer.write_align_gnu_verneed(); + for file in &self.version_files { + let out_file = &out_version_files[file.id.0]; + if out_file.versions.is_empty() { + continue; + } + writer.write_gnu_verneed(&write::elf::Verneed { + version: elf::VER_NEED_CURRENT, + aux_count: out_file.versions.len() as u16, + file: writer.get_dynamic_string(&file.name), + }); + for id in &out_file.versions { + let version = self.versions.get(*id); + // This will always match. + if let VersionData::Need(need) = &version.data { + debug_assert_eq!(*id, version.id); + writer.write_gnu_vernaux(&write::elf::Vernaux { + flags: need.flags, + index: version.id.0 as u16, + name: writer.get_dynamic_string(&need.name), + }); + } + } + } + } + _ => { + return Err(Error(format!( + "Unsupported alloc section type {:x}", + section.sh_type + ))); + } + } + } + } + + // Write non-alloc sections. + for out_section in &mut out_sections { + let section = self.sections.get(out_section.id); + if !self.segments.is_empty() && section.is_alloc() { + continue; + } + match §ion.data { + SectionData::Data(data) => { + writer.write_align(section.sh_addralign as usize); + debug_assert_eq!(out_section.offset, writer.len()); + writer.write(data); + } + SectionData::UninitializedData(_) => { + // Nothing to do. + } + SectionData::Note(data) => { + writer.write_align(section.sh_addralign as usize); + debug_assert_eq!(out_section.offset, writer.len()); + writer.write(data); + } + SectionData::Attributes(_) => { + writer.write_align(section.sh_addralign as usize); + debug_assert_eq!(out_section.offset, writer.len()); + writer.write(&out_section.attributes); + } + // These are handled elsewhere. + SectionData::Relocation(_) + | SectionData::SectionString + | SectionData::Symbol + | SectionData::SymbolSectionIndex + | SectionData::String => {} + _ => { + return Err(Error(format!( + "Unsupported non-alloc section type {:x}", + section.sh_type + ))); + } + } + } + + writer.write_null_symbol(); + for out_sym in &out_syms { + let symbol = self.symbols.get(out_sym.id); + let section = symbol.section.map(|id| out_sections_index[id.0].unwrap()); + writer.write_symbol(&write::elf::Sym { + name: out_sym.name, + section, + st_info: symbol.st_info, + st_other: symbol.st_other, + st_shndx: symbol.st_shndx, + st_value: symbol.st_value, + st_size: symbol.st_size, + }); + } + writer.write_symtab_shndx(); + writer.write_strtab(); + + // Write non-alloc relocations. + for section in &self.sections { + if !self.segments.is_empty() && section.is_alloc() { + continue; + } + let SectionData::Relocation(relocations) = §ion.data else { + continue; + }; + writer.write_align_relocation(); + for rel in relocations { + let r_sym = if let Some(id) = rel.symbol { + out_syms_index[id.0].unwrap().0 + } else { + 0 + }; + writer.write_relocation( + section.sh_type == elf::SHT_RELA, + &write::elf::Rel { + r_offset: rel.r_offset, + r_sym, + r_type: rel.r_type, + r_addend: rel.r_addend, + }, + ); + } + } + + writer.write_shstrtab(); + + writer.write_null_section_header(); + for out_section in &out_sections { + let section = self.sections.get(out_section.id); + match §ion.data { + SectionData::Data(_) + | SectionData::UninitializedData(_) + | SectionData::Relocation(_) + | SectionData::DynamicRelocation(_) + | SectionData::Note(_) + | SectionData::Dynamic(_) + | SectionData::Attributes(_) => { + let sh_size = match §ion.data { + SectionData::Data(data) => data.len() as u64, + SectionData::UninitializedData(len) => *len, + SectionData::Relocation(relocations) => { + (relocations.len() + * self.class().rel_size(section.sh_type == elf::SHT_RELA)) + as u64 + } + SectionData::DynamicRelocation(relocations) => { + (relocations.len() + * self.class().rel_size(section.sh_type == elf::SHT_RELA)) + as u64 + } + SectionData::Note(data) => data.len() as u64, + SectionData::Dynamic(dynamics) => { + ((1 + dynamics.len()) * self.class().dyn_size()) as u64 + } + SectionData::Attributes(_) => out_section.attributes.len() as u64, + _ => { + return Err(Error(format!( + "Unimplemented size for section type {:x}", + section.sh_type + ))) + } + }; + let sh_link = if let Some(id) = section.sh_link_section { + if let Some(index) = out_sections_index[id.0] { + index.0 + } else { + return Err(Error(format!( + "Invalid sh_link from section '{}' to deleted section '{}'", + section.name, + self.sections.get(id).name, + ))); + } + } else { + 0 + }; + let sh_info = if let Some(id) = section.sh_info_section { + if let Some(index) = out_sections_index[id.0] { + index.0 + } else { + return Err(Error(format!( + "Invalid sh_info link from section '{}' to deleted section '{}'", + section.name, + self.sections.get(id).name, + ))); + } + } else { + section.sh_info + }; + writer.write_section_header(&write::elf::SectionHeader { + name: out_section.name, + sh_type: section.sh_type, + sh_flags: section.sh_flags, + sh_addr: section.sh_addr, + sh_offset: out_section.offset as u64, + sh_size, + sh_link, + sh_info, + sh_addralign: section.sh_addralign, + sh_entsize: section.sh_entsize, + }); + } + SectionData::SectionString => { + writer.write_shstrtab_section_header(); + } + SectionData::Symbol => { + writer.write_symtab_section_header(1 + num_local as u32); + } + SectionData::SymbolSectionIndex => { + writer.write_symtab_shndx_section_header(); + } + SectionData::String => { + writer.write_strtab_section_header(); + } + SectionData::DynamicString => { + writer.write_dynstr_section_header(section.sh_addr); + } + SectionData::DynamicSymbol => { + writer + .write_dynsym_section_header(section.sh_addr, 1 + num_local_dynamic as u32); + } + SectionData::Hash => { + writer.write_hash_section_header(section.sh_addr); + } + SectionData::GnuHash => { + writer.write_gnu_hash_section_header(section.sh_addr); + } + SectionData::GnuVersym => { + writer.write_gnu_versym_section_header(section.sh_addr); + } + SectionData::GnuVerdef => { + writer.write_gnu_verdef_section_header(section.sh_addr); + } + SectionData::GnuVerneed => { + writer.write_gnu_verneed_section_header(section.sh_addr); + } + } + } + debug_assert_eq!(writer.reserved_len(), writer.len()); + Ok(()) + } + + /// Delete segments, symbols, relocations, and dynamics that refer + /// to deleted items. + /// + /// This calls `delete_orphan_segments`, `delete_orphan_symbols`, + /// `delete_orphan_relocations`, and `delete_orphan_dynamics`. + pub fn delete_orphans(&mut self) { + self.delete_orphan_segments(); + self.delete_orphan_symbols(); + self.delete_orphan_relocations(); + self.delete_orphan_dynamics(); + } + + /// Set the delete flag for segments that only refer to deleted sections. + pub fn delete_orphan_segments(&mut self) { + let sections = &self.sections; + for segment in &mut self.segments { + // We only delete segments that have become empty due to section deletions. + if segment.sections.is_empty() { + continue; + } + segment.sections.retain(|id| !sections.get(*id).delete); + segment.delete = segment.sections.is_empty(); + } + } + + /// Set the delete flag for symbols that refer to deleted sections. + pub fn delete_orphan_symbols(&mut self) { + for symbol in &mut self.symbols { + if let Some(section) = symbol.section { + if self.sections.get_mut(section).delete { + symbol.delete = true; + } + } + } + for symbol in &mut self.dynamic_symbols { + if let Some(section) = symbol.section { + if self.sections.get_mut(section).delete { + symbol.delete = true; + } + } + } + } + + /// Delete relocations that refer to deleted symbols. + pub fn delete_orphan_relocations(&mut self) { + let symbols = &self.symbols; + let dynamic_symbols = &self.dynamic_symbols; + for section in &mut self.sections { + match &mut section.data { + SectionData::Relocation(relocations) => { + relocations.retain(|relocation| match relocation.symbol { + None => true, + Some(id) => !symbols.get(id).delete, + }); + } + SectionData::DynamicRelocation(relocations) => { + relocations.retain(|relocation| match relocation.symbol { + None => true, + Some(id) => !dynamic_symbols.get(id).delete, + }); + } + _ => {} + } + } + } + + /// Delete dynamic entries that refer to deleted sections. + pub fn delete_orphan_dynamics(&mut self) { + let mut have_dynsym = false; + let mut have_dynstr = false; + let mut have_hash = false; + let mut have_gnu_hash = false; + let mut have_versym = false; + let mut have_verdef = false; + let mut have_verneed = false; + for section in &self.sections { + match §ion.data { + SectionData::DynamicSymbol => have_dynsym = true, + SectionData::DynamicString => have_dynstr = true, + SectionData::Hash => have_hash = true, + SectionData::GnuHash => have_gnu_hash = true, + SectionData::GnuVersym => have_versym = true, + SectionData::GnuVerdef => have_verdef = true, + SectionData::GnuVerneed => have_verneed = true, + _ => {} + } + } + for section in &mut self.sections { + if let SectionData::Dynamic(dynamics) = &mut section.data { + dynamics.retain(|dynamic| match dynamic { + Dynamic::Auto { + tag: elf::DT_SYMTAB, + } => have_dynsym, + Dynamic::Auto { + tag: elf::DT_STRTAB, + } + | Dynamic::Auto { tag: elf::DT_STRSZ } => have_dynstr, + Dynamic::Auto { tag: elf::DT_HASH } => have_hash, + Dynamic::Auto { + tag: elf::DT_GNU_HASH, + } => have_gnu_hash, + Dynamic::Auto { + tag: elf::DT_VERSYM, + } => have_versym, + Dynamic::Auto { + tag: elf::DT_VERNEED, + } + | Dynamic::Auto { + tag: elf::DT_VERNEEDNUM, + } => have_verneed, + Dynamic::Auto { + tag: elf::DT_VERDEF, + } + | Dynamic::Auto { + tag: elf::DT_VERDEFNUM, + } => have_verdef, + _ => true, + }); + } + } + } + + /// Delete unused GNU version entries. + pub fn delete_unused_versions(&mut self) { + let mut version_used = vec![false; self.versions.len() + VERSION_ID_BASE]; + for symbol in &self.dynamic_symbols { + version_used[symbol.version.0] = true; + } + let mut version_file_used = vec![false; self.version_files.len()]; + for version in &mut self.versions { + if !version_used[version.id.0] { + version.delete = true; + continue; + } + if let VersionData::Need(need) = &version.data { + version_file_used[need.file.0] = true; + } + } + for file in &mut self.version_files { + if !version_file_used[file.id.0] { + file.delete = true; + } + } + } + + /// Return the ELF file class that will be written. + /// + /// This can be useful for calculating sizes. + pub fn class(&self) -> write::elf::Class { + write::elf::Class { is_64: self.is_64 } + } + + /// Calculate the size of the file header. + pub fn file_header_size(&self) -> usize { + self.class().file_header_size() + } + + /// Calculate the size of the program headers. + pub fn program_headers_size(&self) -> usize { + self.segments.count() * self.class().program_header_size() + } + + /// Calculate the size of the dynamic symbol table. + /// + /// To get an accurate result, you may need to first call + /// [`Self::delete_orphan_symbols`]. + pub fn dynamic_symbol_size(&self) -> usize { + (1 + self.dynamic_symbols.count()) * self.class().sym_size() + } + + /// Calculate the size of the dynamic string table. + /// + /// This adds all of the currently used dynamic strings to a string table, + /// calculates the size of the string table, and discards the string table. + /// + /// To get an accurate result, you may need to first call + /// [`Self::delete_orphan_symbols`] and [`Self::delete_unused_versions`]. + pub fn dynamic_string_size(&self) -> usize { + let mut dynstr = write::string::StringTable::default(); + for section in &self.sections { + if let SectionData::Dynamic(dynamics) = §ion.data { + for dynamic in dynamics { + if let Dynamic::String { val, .. } = dynamic { + dynstr.add(val); + } + } + } + } + for symbol in &self.dynamic_symbols { + dynstr.add(&symbol.name); + } + if let Some(version_base) = &self.version_base { + dynstr.add(version_base); + } + for version in &self.versions { + match &version.data { + VersionData::Def(def) => { + for name in &def.names { + dynstr.add(name); + } + } + VersionData::Need(need) => { + dynstr.add(&need.name); + } + } + } + for file in &self.version_files { + dynstr.add(&file.name); + } + dynstr.size(1) + } + + /// Calculate the size of the hash table. + /// + /// To get an accurate result, you may need to first call + /// [`Self::delete_orphan_symbols`]. + pub fn hash_size(&self) -> usize { + let chain_count = 1 + self.dynamic_symbols.count(); + self.class() + .hash_size(self.hash_bucket_count, chain_count as u32) + } + + /// Calculate the size of the GNU hash table. + /// + /// To get an accurate result, you may need to first call + /// [`Self::delete_orphan_symbols`]. + pub fn gnu_hash_size(&self) -> usize { + let symbol_count = self.dynamic_symbols.count_defined(); + self.class().gnu_hash_size( + self.gnu_hash_bloom_count, + self.gnu_hash_bucket_count, + symbol_count as u32, + ) + } + + /// Calculate the size of the GNU symbol version section. + /// + /// To get an accurate result, you may need to first call + /// [`Self::delete_orphan_symbols`] and [`Self::delete_unused_versions`]. + pub fn gnu_versym_size(&self) -> usize { + let symbol_count = 1 + self.dynamic_symbols.count(); + self.class().gnu_versym_size(symbol_count) + } + + /// Calculate the size of the GNU version definition section. + /// + /// To get an accurate result, you may need to first call + /// [`Self::delete_orphan_symbols`] and [`Self::delete_unused_versions`]. + pub fn gnu_verdef_size(&self) -> usize { + let mut verdef_count = 0; + let mut verdaux_count = 0; + if self.version_base.is_some() { + verdef_count += 1; + verdaux_count += 1; + } + for version in &self.versions { + if let VersionData::Def(def) = &version.data { + verdef_count += 1; + verdaux_count += def.names.len(); + } + } + self.class().gnu_verdef_size(verdef_count, verdaux_count) + } + + /// Calculate the size of the GNU version dependency section. + /// + /// To get an accurate result, you may need to first call + /// [`Self::delete_orphan_symbols`] and [`Self::delete_unused_versions`]. + pub fn gnu_verneed_size(&self) -> usize { + let verneed_count = self.version_files.count(); + let mut vernaux_count = 0; + for version in &self.versions { + if let VersionData::Need(_) = &version.data { + vernaux_count += 1; + } + } + self.class().gnu_verneed_size(verneed_count, vernaux_count) + } + + /// Calculate the memory size of a section. + /// + /// Returns 0 for sections that are deleted or aren't allocated. + /// + /// To get an accurate result, you may need to first call + /// [`Self::delete_orphan_symbols`] and [`Self::delete_unused_versions`]. + pub fn section_size(&self, section: &Section<'_>) -> usize { + if section.delete || !section.is_alloc() { + return 0; + } + match §ion.data { + SectionData::Data(data) => data.len(), + SectionData::UninitializedData(len) => *len as usize, + SectionData::Relocation(relocations) => { + relocations.len() * self.class().rel_size(section.sh_type == elf::SHT_RELA) + } + SectionData::DynamicRelocation(relocations) => { + relocations.len() * self.class().rel_size(section.sh_type == elf::SHT_RELA) + } + SectionData::Note(data) => data.len(), + SectionData::Dynamic(dynamics) => (1 + dynamics.len()) * self.class().dyn_size(), + SectionData::DynamicString => self.dynamic_string_size(), + SectionData::DynamicSymbol => self.dynamic_symbol_size(), + SectionData::Hash => self.hash_size(), + SectionData::GnuHash => self.gnu_hash_size(), + SectionData::GnuVersym => self.gnu_versym_size(), + SectionData::GnuVerdef => self.gnu_verdef_size(), + SectionData::GnuVerneed => self.gnu_verneed_size(), + // None of these should be allocated. + SectionData::SectionString + | SectionData::Symbol + | SectionData::SymbolSectionIndex + | SectionData::String + | SectionData::Attributes(_) => 0, + } + } + + /// Set the `sh_size` field for every allocated section. + /// + /// This is useful to call prior to doing memory layout. + /// + /// To get an accurate result, you may need to first call + /// [`Self::delete_orphan_symbols`] and [`Self::delete_unused_versions`]. + pub fn set_section_sizes(&mut self) { + for id in (0..self.sections.len()).map(SectionId) { + let section = self.sections.get(id); + if section.delete || !section.is_alloc() { + continue; + } + self.sections.get_mut(id).sh_size = self.section_size(section) as u64; + } + } + + /// Find the section containing the dynamic table. + /// + /// This uses the `PT_DYNAMIC` program header to find the dynamic section. + pub fn dynamic_section(&self) -> Option { + let segment = self + .segments + .iter() + .find(|segment| segment.p_type == elf::PT_DYNAMIC)?; + // TODO: handle multiple sections in the segment? + segment.sections.iter().copied().next() + } + + /// Find the dynamic table entries. + /// + /// This uses the `PT_DYNAMIC` program header to find the dynamic section, + pub fn dynamic_data(&self) -> Option<&[Dynamic<'data>]> { + let section = self.dynamic_section()?; + match &self.sections.get(section).data { + SectionData::Dynamic(dynamics) => Some(dynamics), + _ => None, + } + } + + /// Find the dynamic table entries. + /// + /// This uses the `PT_DYNAMIC` program header to find the dynamic section, + pub fn dynamic_data_mut(&mut self) -> Option<&mut Vec>> { + let section = self.dynamic_section()?; + match &mut self.sections.get_mut(section).data { + SectionData::Dynamic(dynamics) => Some(dynamics), + _ => None, + } + } + + /// Find the section containing the interpreter path. + /// + /// This uses the `PT_INTERP` program header to find the interp section. + pub fn interp_section(&self) -> Option { + let segment = self + .segments + .iter() + .find(|segment| segment.p_type == elf::PT_INTERP)?; + // TODO: handle multiple sections in the segment? + segment.sections.iter().copied().next() + } + + /// Find the interpreter path. + /// + /// This uses the `PT_INTERP` program header to find the interp section. + pub fn interp_data(&self) -> Option<&[u8]> { + let section = self.interp_section()?; + match &self.sections.get(section).data { + SectionData::Data(data) => Some(data), + _ => None, + } + } + + /// Find the interpreter path. + /// + /// This uses the `PT_INTERP` program header to find the interp section. + pub fn interp_data_mut(&mut self) -> Option<&mut Bytes<'data>> { + let section = self.interp_section()?; + match &mut self.sections.get_mut(section).data { + SectionData::Data(data) => Some(data), + _ => None, + } + } +} + +/// ELF file header. +/// +/// This corresponds to fields in [`elf::FileHeader32`] or [`elf::FileHeader64`]. +/// This only contains the ELF file header fields that can be modified. +/// The other fields are automatically calculated. +#[derive(Debug, Default)] +pub struct Header { + /// The OS ABI field in the file header. + /// + /// One of the `ELFOSABI*` constants. + pub os_abi: u8, + /// The ABI version field in the file header. + /// + /// The meaning of this field depends on the `os_abi` value. + pub abi_version: u8, + /// The object file type in the file header. + /// + /// One of the `ET_*` constants. + pub e_type: u16, + /// The architecture in the file header. + /// + /// One of the `EM_*` constants. + pub e_machine: u16, + /// Entry point virtual address in the file header. + pub e_entry: u64, + /// The processor-specific flags in the file header. + /// + /// A combination of the `EF_*` constants. + pub e_flags: u32, + /// The file offset of the program header table. + /// + /// Writing will fail if the program header table cannot be placed at this offset. + pub e_phoff: u64, +} + +/// An ID for referring to a segment in [`Segments`]. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct SegmentId(usize); + +impl fmt::Debug for SegmentId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "SegmentId({})", self.0) + } +} + +impl Id for SegmentId { + fn index(&self) -> usize { + self.0 + } +} + +impl IdPrivate for SegmentId { + fn new(id: usize) -> Self { + SegmentId(id) + } +} + +/// A segment in [`Segments`]. +/// +/// This corresponds to [`elf::ProgramHeader32`] or [`elf::ProgramHeader64`]. +#[derive(Debug)] +pub struct Segment<'data> { + id: SegmentId, + /// Ignore this segment when writing the ELF file. + pub delete: bool, + /// The `p_type` field in the ELF program header. + /// + /// One of the `PT_*` constants. + pub p_type: u32, + /// The `p_flags` field in the ELF program header. + /// + /// A combination of the `PF_*` constants. + pub p_flags: u32, + /// The `p_offset` field in the ELF program header. + /// + /// This is the file offset of the data in the segment. This should + /// correspond to the file offset of the sections that are placed in + /// this segment. Currently there is no support for section data + /// that is not contained in sections. + pub p_offset: u64, + /// The `p_vaddr` field in the ELF program header. + pub p_vaddr: u64, + /// The `p_paddr` field in the ELF program header. + pub p_paddr: u64, + /// The `p_filesz` field in the ELF program header. + pub p_filesz: u64, + /// The `p_memsz` field in the ELF program header. + pub p_memsz: u64, + /// The `p_align` field in the ELF program header. + pub p_align: u64, + /// The sections contained in this segment. + pub sections: Vec, + // Might need to add reference to data if no sections. + marker: PhantomData<&'data ()>, +} + +impl<'data> Item for Segment<'data> { + type Id = SegmentId; + + fn is_deleted(&self) -> bool { + self.delete + } +} + +impl<'data> Segment<'data> { + /// The ID used for referring to this segment. + pub fn id(&self) -> SegmentId { + self.id + } + + /// Returns true if the segment type is `PT_LOAD`. + pub fn is_load(&self) -> bool { + self.p_type == elf::PT_LOAD + } + + /// Returns true if the segment contains the given file offset. + pub fn contains_offset(&self, offset: u64) -> bool { + offset >= self.p_offset && offset - self.p_offset < self.p_filesz + } + + /// Return the address corresponding to the given file offset. + /// + /// This will return a meaningless value if `contains_offset` is false. + pub fn address_from_offset(&self, offset: u64) -> u64 { + self.p_vaddr + .wrapping_add(offset.wrapping_sub(self.p_offset)) + } + + /// Returns true if the segment contains the given address. + pub fn contains_address(&self, address: u64) -> bool { + address >= self.p_vaddr && address - self.p_vaddr < self.p_memsz + } + + /// Remove all sections from the segment, and set its size to zero. + pub fn remove_sections(&mut self) { + self.p_filesz = 0; + self.p_memsz = 0; + self.sections.clear(); + } + + /// Add a section to the segment. + /// + /// If this is a [`elf::PT_LOAD`] segment, then the file offset and address of the + /// section is changed to be at the end of the segment. + /// + /// The segment's file and address ranges are extended to include the section. + /// This uses the `sh_size` field of the section, not the size of the section data. + /// + /// The section's id is added to the segment's list of sections. + pub fn append_section(&mut self, section: &mut Section<'_>) { + debug_assert_eq!(self.p_filesz, self.p_memsz); + if self.p_type == elf::PT_LOAD { + let align = section.sh_addralign; + let offset = (self.p_offset + self.p_filesz + (align - 1)) & !(align - 1); + let addr = (self.p_paddr + self.p_memsz + (align - 1)) & !(align - 1); + section.sh_offset = offset; + section.sh_addr = addr; + } + self.append_section_range(section); + self.sections.push(section.id); + } + + /// Extend this segment's file and address ranges to include the given section. + /// + /// If the segment's `p_memsz` is zero, then this signifies that the segment + /// has no file or address range yet. In this case, the segment's file and address + /// ranges are set equal to the section. Otherwise, the segment's file and address + /// ranges are extended to include the section. + /// + /// This uses the `sh_size` field of the section, not the size of the section data. + pub fn append_section_range(&mut self, section: &Section<'_>) { + let section_filesize = if section.sh_type == elf::SHT_NOBITS { + 0 + } else { + section.sh_size + }; + if self.p_memsz == 0 { + self.p_offset = section.sh_offset; + self.p_filesz = section_filesize; + self.p_vaddr = section.sh_addr; + self.p_paddr = section.sh_addr; + self.p_memsz = section.sh_size; + } else { + if self.p_offset > section.sh_offset { + self.p_offset = section.sh_offset; + } + let filesz = section.sh_offset + section_filesize - self.p_offset; + if self.p_filesz < filesz { + self.p_filesz = filesz; + } + if self.p_vaddr > section.sh_addr { + self.p_vaddr = section.sh_addr; + self.p_paddr = section.sh_addr; + } + let memsz = section.sh_addr + section.sh_size - self.p_vaddr; + if self.p_memsz < memsz { + self.p_memsz = memsz; + } + } + } + + /// Recalculate the file and address ranges of the segment. + /// + /// Resets the segment's file and address ranges to zero, and then + /// calls `append_section_range` for each section in the segment. + pub fn recalculate_ranges(&mut self, sections: &Sections<'data>) { + self.p_offset = 0; + self.p_filesz = 0; + self.p_vaddr = 0; + self.p_paddr = 0; + self.p_memsz = 0; + let ids = core::mem::take(&mut self.sections); + for id in &ids { + let section = sections.get(*id); + self.append_section_range(section); + } + self.sections = ids; + } +} + +/// A segment table. +pub type Segments<'data> = Table>; + +impl<'data> Segments<'data> { + /// Add a new segment to the table. + pub fn add(&mut self) -> &mut Segment<'data> { + let id = self.next_id(); + self.push(Segment { + id, + delete: false, + p_type: 0, + p_flags: 0, + p_offset: 0, + p_vaddr: 0, + p_paddr: 0, + p_filesz: 0, + p_memsz: 0, + p_align: 0, + sections: Vec::new(), + marker: PhantomData, + }); + self.get_mut(id) + } + + /// Find a `PT_LOAD` segment containing the given offset. + pub fn find_load_segment_from_offset(&self, offset: u64) -> Option<&Segment<'data>> { + self.iter() + .find(|segment| segment.is_load() && segment.contains_offset(offset)) + } + + /// Add a new `PT_LOAD` segment to the table. + /// + /// The file offset and address will be derived from the current maximum for any segment. + /// The address will be chosen so that `p_paddr % align == p_offset % align`. + /// You may wish to use [`Builder::load_align`] for the alignment. + pub fn add_load_segment(&mut self, flags: u32, align: u64) -> &mut Segment<'data> { + let mut max_offset = 0; + let mut max_addr = 0; + for segment in &*self { + let offset = segment.p_offset + segment.p_filesz; + if max_offset < offset { + max_offset = offset; + } + let addr = segment.p_vaddr + segment.p_memsz; + if max_addr < addr { + max_addr = addr; + } + } + // No alignment is required for the segment file offset because sections + // will add their alignment to the file offset when they are added. + let offset = max_offset; + // The address must be chosen so that addr % align == offset % align. + let addr = ((max_addr + (align - 1)) & !(align - 1)) + (offset & (align - 1)); + + let segment = self.add(); + segment.p_type = elf::PT_LOAD; + segment.p_flags = flags; + segment.p_offset = offset; + segment.p_vaddr = addr; + segment.p_paddr = addr; + segment.p_align = align; + segment + } + + /// Add a copy of a segment to the table. + /// + /// This will copy the segment type, flags and alignment. + /// + /// Additionally, if the segment type is `PT_LOAD`, then the file offset and address + /// will be set as in `add_load_segment`. + pub fn copy(&mut self, id: SegmentId) -> &mut Segment<'data> { + let segment = self.get(id); + let p_type = segment.p_type; + let p_flags = segment.p_flags; + let p_align = segment.p_align; + if p_type == elf::PT_LOAD { + self.add_load_segment(p_flags, p_align) + } else { + let segment = self.add(); + segment.p_type = p_type; + segment.p_flags = p_flags; + segment.p_align = p_align; + segment + } + } +} + +/// An ID for referring to a section in [`Sections`]. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct SectionId(usize); + +impl fmt::Debug for SectionId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "SectionId({})", self.0) + } +} + +impl Id for SectionId { + fn index(&self) -> usize { + self.0 + } +} + +impl IdPrivate for SectionId { + fn new(id: usize) -> Self { + SectionId(id) + } +} + +/// A section in [`Sections`]. +/// +/// This corresponds to [`elf::SectionHeader32`] or [`elf::SectionHeader64`]. +#[derive(Debug)] +pub struct Section<'data> { + id: SectionId, + /// Ignore this section when writing the ELF file. + pub delete: bool, + /// The name of the section. + /// + /// This is automatically added to the section header string table, + /// and the resulting string table offset is used to set the `sh_name` + /// field in the ELF section header. + pub name: ByteString<'data>, + /// The `sh_type` field in the ELF section header. + /// + /// One of the `SHT_*` constants. + pub sh_type: u32, + /// The `sh_flags` field in the ELF section header. + /// + /// A combination of the `SHF_*` constants. + pub sh_flags: u64, + /// The `sh_addr` field in the ELF section header. + pub sh_addr: u64, + /// The `sh_offset` field in the ELF section header. + /// + /// This is the file offset of the data in the section. + /// Writing will fail if the data cannot be placed at this offset. + /// + /// This is only used for sections that have `SHF_ALLOC` set. + /// For other sections, the section data is written at the next available + /// offset. + pub sh_offset: u64, + /// The `sh_size` field in the ELF section header. + /// + /// This size is not used when writing. The size of the `data` field is + /// used instead. + pub sh_size: u64, + /// The ID of the section linked to by the `sh_link` field in the ELF section header. + pub sh_link_section: Option, + /// The `sh_info` field in the ELF section header. + /// + /// Only used if `sh_info_section` is `None`. + pub sh_info: u32, + /// The ID of the section linked to by the `sh_info` field in the ELF section header. + pub sh_info_section: Option, + /// The `sh_addralign` field in the ELF section header. + pub sh_addralign: u64, + /// The `sh_entsize` field in the ELF section header. + pub sh_entsize: u64, + /// The section data. + pub data: SectionData<'data>, +} + +impl<'data> Item for Section<'data> { + type Id = SectionId; + + fn is_deleted(&self) -> bool { + self.delete + } +} + +impl<'data> Section<'data> { + /// The ID used for referring to this section. + pub fn id(&self) -> SectionId { + self.id + } + + /// Returns true if the section flags include `SHF_ALLOC`. + pub fn is_alloc(&self) -> bool { + self.sh_flags & u64::from(elf::SHF_ALLOC) != 0 + } + + /// Return the segment permission flags that are equivalent to the section flags. + pub fn p_flags(&self) -> u32 { + let mut p_flags = elf::PF_R; + if self.sh_flags & u64::from(elf::SHF_WRITE) != 0 { + p_flags |= elf::PF_W; + } + if self.sh_flags & u64::from(elf::SHF_EXECINSTR) != 0 { + p_flags |= elf::PF_X; + } + p_flags + } +} + +/// The data for a [`Section`]. +#[derive(Debug, Clone)] +pub enum SectionData<'data> { + /// The section contains the given raw data bytes. + Data(Bytes<'data>), + /// The section contains uninitialised data bytes of the given length. + UninitializedData(u64), + /// The section contains relocations. + Relocation(Vec), + /// The section contains dynamic relocations. + DynamicRelocation(Vec), + /// The section contains notes. + // TODO: parse notes + Note(Bytes<'data>), + /// The section contains dynamic entries. + Dynamic(Vec>), + /// The section contains attributes. + /// + /// This may be GNU attributes or other vendor-specific attributes. + Attributes(AttributesSection<'data>), + /// The section contains the strings for the section headers. + SectionString, + /// The section contains the symbol table. + Symbol, + /// The section contains the extended section index for the symbol table. + SymbolSectionIndex, + /// The section contains the strings for symbol table. + String, + /// The section contains the dynamic symbol table. + DynamicSymbol, + /// The section contains the dynamic string table. + DynamicString, + /// The section contains the hash table. + Hash, + /// The section contains the GNU hash table. + GnuHash, + /// The section contains the GNU symbol versions. + GnuVersym, + /// The section contains the GNU version definitions. + GnuVerdef, + /// The section contains the GNU version dependencies. + GnuVerneed, +} + +/// A section table. +pub type Sections<'data> = Table>; + +impl<'data> Sections<'data> { + /// Add a new section to the table. + pub fn add(&mut self) -> &mut Section<'data> { + let id = self.next_id(); + self.push(Section { + id, + delete: false, + name: ByteString::default(), + sh_type: 0, + sh_flags: 0, + sh_addr: 0, + sh_offset: 0, + sh_size: 0, + sh_link_section: None, + sh_info: 0, + sh_info_section: None, + sh_addralign: 0, + sh_entsize: 0, + data: SectionData::Data(Bytes::default()), + }) + } + + /// Add a copy of a section to the table. + /// + /// This will set the file offset of the copy to zero. + /// [`Segment::append_section`] can be used to assign a valid file offset and a new address. + pub fn copy(&mut self, id: SectionId) -> &mut Section<'data> { + let section = self.get(id); + let id = self.next_id(); + let name = section.name.clone(); + let sh_type = section.sh_type; + let sh_flags = section.sh_flags; + let sh_addr = section.sh_addr; + let sh_size = section.sh_size; + let sh_link_section = section.sh_link_section; + let sh_info = section.sh_info; + let sh_info_section = section.sh_info_section; + let sh_addralign = section.sh_addralign; + let sh_entsize = section.sh_entsize; + let data = section.data.clone(); + self.push(Section { + id, + delete: false, + name, + sh_type, + sh_flags, + sh_addr, + sh_offset: 0, + sh_size, + sh_link_section, + sh_info, + sh_info_section, + sh_addralign, + sh_entsize, + data, + }) + } +} + +/// An ID for referring to a symbol in [`Symbols`]. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct SymbolId(usize); + +impl fmt::Debug for SymbolId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +impl Id for SymbolId { + fn index(&self) -> usize { + self.0 + } +} + +impl IdPrivate for SymbolId { + fn new(id: usize) -> Self { + SymbolId(id) + } +} + +/// A symbol in [`Symbols`]. +/// +/// This corresponds to [`elf::Sym32`] or [`elf::Sym64`]. +#[derive(Debug)] +pub struct Symbol<'data, const DYNAMIC: bool = false> { + id: SymbolId, + /// Ignore this symbol when writing the ELF file. + pub delete: bool, + /// The name of the symbol. + pub name: ByteString<'data>, + /// The section referenced by the symbol. + /// + /// Used to set the `st_shndx` field in the ELF symbol. + pub section: Option, + /// The `st_info` field in the ELF symbol. + pub st_info: u8, + /// The `st_other` field in the ELF symbol. + pub st_other: u8, + /// The `st_shndx` field in the ELF symbol. + /// + /// Only used if `Self::section` is `None`. + pub st_shndx: u16, + /// The `st_value` field in the ELF symbol. + pub st_value: u64, + /// The `st_size` field in the ELF symbol. + pub st_size: u64, + /// GNU version for dynamic symbols. + pub version: VersionId, + /// Set the [`elf::VERSYM_HIDDEN`] flag for this symbol. + pub version_hidden: bool, +} + +impl<'data, const DYNAMIC: bool> Item for Symbol<'data, DYNAMIC> { + type Id = SymbolId; + + fn is_deleted(&self) -> bool { + self.delete + } +} + +impl<'data, const DYNAMIC: bool> Symbol<'data, DYNAMIC> { + /// The ID used for referring to this symbol. + pub fn id(&self) -> SymbolId { + self.id + } + + /// Get the `st_bind` component of the `st_info` field. + #[inline] + pub fn st_bind(&self) -> u8 { + self.st_info >> 4 + } + + /// Get the `st_type` component of the `st_info` field. + #[inline] + pub fn st_type(&self) -> u8 { + self.st_info & 0xf + } + + /// Set the `st_info` field given the `st_bind` and `st_type` components. + #[inline] + pub fn set_st_info(&mut self, st_bind: u8, st_type: u8) { + self.st_info = (st_bind << 4) + (st_type & 0xf); + } +} + +/// A symbol table. +pub type Symbols<'data, const DYNAMIC: bool = false> = Table>; + +impl<'data, const DYNAMIC: bool> Symbols<'data, DYNAMIC> { + /// Number of defined symbols. + pub fn count_defined(&self) -> usize { + self.into_iter() + .filter(|symbol| symbol.st_shndx != elf::SHN_UNDEF) + .count() + } + + /// Add a new symbol to the table. + pub fn add(&mut self) -> &mut Symbol<'data, DYNAMIC> { + let id = self.next_id(); + self.push(Symbol { + id, + delete: false, + name: ByteString::default(), + section: None, + st_info: 0, + st_other: 0, + st_shndx: 0, + st_value: 0, + st_size: 0, + version: VersionId::local(), + version_hidden: false, + }) + } +} + +/// A relocation stored in a [`Section`]. +/// +/// This corresponds to [`elf::Rel32`], [`elf::Rela32`], [`elf::Rel64`] or [`elf::Rela64`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Relocation { + /// The `r_offset` field in the ELF relocation. + pub r_offset: u64, + /// The symbol referenced by the ELF relocation. + pub symbol: Option>, + /// The `r_type` field in the ELF relocation. + pub r_type: u32, + /// The `r_addend` field in the ELF relocation. + /// + /// Only used if the section type is `SHT_RELA`. + pub r_addend: i64, +} + +/// A dynamic symbol ID. +pub type DynamicSymbolId = SymbolId; + +/// A dynamic symbol. +pub type DynamicSymbol<'data> = Symbol<'data, true>; + +/// A dynamic symbol table. +pub type DynamicSymbols<'data> = Symbols<'data, true>; + +/// A dynamic relocation. +pub type DynamicRelocation = Relocation; + +/// An entry in the dynamic section. +/// +/// This corresponds to [`elf::Dyn32`] or [`elf::Dyn64`]. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Dynamic<'data> { + /// The value is an automatically generated integer. + /// + /// Writing will fail if the value cannot be automatically generated. + Auto { + /// The `d_tag` field in the dynamic entry. + /// + /// One of the `DT_*` values. + tag: u32, + }, + /// The value is an integer. + Integer { + /// The `d_tag` field in the dynamic entry. + /// + /// One of the `DT_*` values. + tag: u32, + /// The `d_val` field in the dynamic entry. + val: u64, + }, + /// The value is a string. + String { + /// The `d_tag` field in the dynamic entry. + /// + /// One of the `DT_*` values. + tag: u32, + /// The string value. + /// + /// This will be stored in the dynamic string section. + val: ByteString<'data>, + }, +} + +impl<'data> Dynamic<'data> { + /// The `d_tag` field in the dynamic entry. + /// + /// One of the `DT_*` values. + pub fn tag(&self) -> u32 { + match self { + Dynamic::Auto { tag } => *tag, + Dynamic::Integer { tag, .. } => *tag, + Dynamic::String { tag, .. } => *tag, + } + } +} + +/// An ID for referring to a filename in [`VersionFiles`]. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct VersionFileId(usize); + +impl fmt::Debug for VersionFileId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "VersionFileId({})", self.0) + } +} + +impl Id for VersionFileId { + fn index(&self) -> usize { + self.0 + } +} + +impl IdPrivate for VersionFileId { + fn new(id: usize) -> Self { + VersionFileId(id) + } +} + +/// A filename used for GNU versioning. +/// +/// Stored in [`VersionFiles`]. +#[derive(Debug)] +pub struct VersionFile<'data> { + id: VersionFileId, + /// Ignore this file when writing the ELF file. + pub delete: bool, + /// The filename. + pub name: ByteString<'data>, +} + +impl<'data> Item for VersionFile<'data> { + type Id = VersionFileId; + + fn is_deleted(&self) -> bool { + self.delete + } +} + +impl<'data> VersionFile<'data> { + /// The ID used for referring to this filename. + pub fn id(&self) -> VersionFileId { + self.id + } +} + +/// A table of filenames used for GNU versioning. +pub type VersionFiles<'data> = Table>; + +impl<'data> VersionFiles<'data> { + /// Add a new filename to the table. + pub fn add(&mut self, name: ByteString<'data>) -> VersionFileId { + let id = self.next_id(); + self.push(VersionFile { + id, + name, + delete: false, + }); + id + } +} + +const VERSION_ID_BASE: usize = 2; + +/// An ID for referring to a version in [`Versions`]. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct VersionId(usize); + +impl fmt::Debug for VersionId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "VersionId({})", self.0) + } +} + +impl Id for VersionId { + fn index(&self) -> usize { + self.0 - VERSION_ID_BASE + } +} + +impl IdPrivate for VersionId { + fn new(id: usize) -> Self { + VersionId(VERSION_ID_BASE + id) + } +} + +impl VersionId { + /// Return `True` if this is a special version that does not exist in the version table. + pub fn is_special(&self) -> bool { + self.0 < VERSION_ID_BASE + } + + /// Return the ID for a version index of [`elf::VER_NDX_LOCAL`]. + pub fn local() -> Self { + VersionId(elf::VER_NDX_LOCAL as usize) + } + + /// Return the ID for a version index of [`elf::VER_NDX_GLOBAL`]. + pub fn global() -> Self { + VersionId(elf::VER_NDX_GLOBAL as usize) + } +} + +/// A version for a symbol. +#[derive(Debug)] +pub struct Version<'data> { + id: VersionId, + /// The data for this version. + pub data: VersionData<'data>, + /// Ignore this version when writing the ELF file. + pub delete: bool, +} + +impl<'data> Item for Version<'data> { + type Id = VersionId; + + fn is_deleted(&self) -> bool { + self.delete + } +} + +impl<'data> Version<'data> { + /// The ID used for referring to this version. + pub fn id(&self) -> VersionId { + self.id + } +} + +/// The data for a version for a symbol. +#[derive(Debug)] +pub enum VersionData<'data> { + /// The version for a defined symbol. + Def(VersionDef<'data>), + /// The version for an undefined symbol. + Need(VersionNeed<'data>), +} + +/// A GNU version definition. +#[derive(Debug)] +pub struct VersionDef<'data> { + /// The names for the version. + /// + /// This usually has two elements. The first element is the name of this + /// version, and the second element is the name of the previous version + /// in the tree of versions. + pub names: Vec>, + /// The version flags. + /// + /// A combination of the `VER_FLG_*` constants. + pub flags: u16, +} + +/// A GNU version dependency. +#[derive(Debug)] +pub struct VersionNeed<'data> { + /// The filename of the library providing this version. + pub file: VersionFileId, + /// The name of the version. + pub name: ByteString<'data>, + /// The version flags. + /// + /// A combination of the `VER_FLG_*` constants. + pub flags: u16, +} + +/// A table of versions that are referenced by symbols. +pub type Versions<'data> = Table>; + +impl<'data> Versions<'data> { + /// Add a version. + pub fn add(&mut self, data: VersionData<'data>) -> VersionId { + let id = self.next_id(); + self.push(Version { + id, + data, + delete: false, + }); + id + } +} + +/// The contents of an attributes section. +#[derive(Debug, Default, Clone)] +pub struct AttributesSection<'data> { + /// The subsections. + pub subsections: Vec>, +} + +impl<'data> AttributesSection<'data> { + /// Create a new attributes section. + pub fn new() -> Self { + Self::default() + } +} + +/// A subsection of an attributes section. +#[derive(Debug, Clone)] +pub struct AttributesSubsection<'data> { + /// The vendor namespace for these attributes. + pub vendor: ByteString<'data>, + /// The sub-subsections. + pub subsubsections: Vec>, +} + +impl<'data> AttributesSubsection<'data> { + /// Create a new subsection. + pub fn new(vendor: ByteString<'data>) -> Self { + AttributesSubsection { + vendor, + subsubsections: Vec::new(), + } + } +} + +/// A sub-subsection in an attributes section. +#[derive(Debug, Clone)] +pub struct AttributesSubsubsection<'data> { + /// The sub-subsection tag. + pub tag: AttributeTag, + /// The data containing the attributes. + pub data: Bytes<'data>, +} + +/// The tag for a sub-subsection in an attributes section. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum AttributeTag { + /// The attributes apply to the whole file. + /// + /// Correspeonds to [`elf::Tag_File`]. + File, + /// The attributes apply to the given sections. + /// + /// Correspeonds to [`elf::Tag_Section`]. + Section(Vec), + /// The attributes apply to the given symbols. + /// + /// Correspeonds to [`elf::Tag_Symbol`]. + Symbol(Vec), +} + +impl AttributeTag { + /// Return the corresponding `elf::Tag_*` value for this tag. + pub fn tag(&self) -> u8 { + match self { + AttributeTag::File => elf::Tag_File, + AttributeTag::Section(_) => elf::Tag_Section, + AttributeTag::Symbol(_) => elf::Tag_Symbol, + } + } +} diff --git a/third_party/rust/object/src/build/error.rs b/third_party/rust/object/src/build/error.rs new file mode 100644 index 000000000000..364aa2f9d9fa --- /dev/null +++ b/third_party/rust/object/src/build/error.rs @@ -0,0 +1,41 @@ +use alloc::string::String; +use core::{fmt, result}; +#[cfg(feature = "std")] +use std::error; + +use crate::{read, write}; + +/// The error type used within the build module. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Error(pub(super) String); + +impl Error { + pub(super) fn new(message: impl Into) -> Self { + Error(message.into()) + } +} + +impl fmt::Display for Error { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +#[cfg(feature = "std")] +impl error::Error for Error {} + +impl From for Error { + fn from(error: read::Error) -> Error { + Error(format!("{}", error)) + } +} + +impl From for Error { + fn from(error: write::Error) -> Error { + Error(error.0) + } +} + +/// The result type used within the build module. +pub type Result = result::Result; diff --git a/third_party/rust/object/src/build/mod.rs b/third_party/rust/object/src/build/mod.rs new file mode 100644 index 000000000000..5945f86be383 --- /dev/null +++ b/third_party/rust/object/src/build/mod.rs @@ -0,0 +1,18 @@ +//! Interface for building object files. +//! +//! This module provides common types and traits used in the builders. +//! +//! The submodules define the builders for each file format. + +mod error; +pub use error::{Error, Result}; + +mod bytes; +pub use bytes::{ByteString, Bytes}; + +mod table; +use table::IdPrivate; +pub use table::{Id, Item, Table}; + +#[cfg(feature = "elf")] +pub mod elf; diff --git a/third_party/rust/object/src/build/table.rs b/third_party/rust/object/src/build/table.rs new file mode 100644 index 000000000000..63f276a5cf48 --- /dev/null +++ b/third_party/rust/object/src/build/table.rs @@ -0,0 +1,128 @@ +use alloc::vec::Vec; + +/// An item in a [`Table`]. +pub trait Item { + /// The type of identifier for the item. + type Id: Id; + + /// Return `True` if the item is deleted. + fn is_deleted(&self) -> bool; +} + +/// An identifier for referring to an item in a [`Table`]. +pub trait Id: IdPrivate { + /// Return the index of the item in the table. + fn index(&self) -> usize; +} + +mod id_private { + pub trait IdPrivate { + fn new(id: usize) -> Self; + } +} +pub(super) use id_private::IdPrivate; + +/// A table of items. +/// +/// Each item has a unique identifier. +/// Items can be deleted without changing the identifiers of other items. +#[derive(Debug)] +pub struct Table(Vec); + +impl Table { + pub(super) fn new() -> Self { + Table(Vec::new()) + } +} + +impl Table { + pub(super) fn next_id(&self) -> T::Id { + T::Id::new(self.0.len()) + } + + pub(super) fn push(&mut self, item: T) -> &mut T { + self.0.push(item); + self.0.last_mut().unwrap() + } + + /// Number of items, including deleted items. + pub(super) fn len(&self) -> usize { + self.0.len() + } + + /// Return `True` if there are no non-deleted items. + pub fn is_empty(&self) -> bool { + self.into_iter().next().is_none() + } + + /// Number of non-deleted items. + pub fn count(&self) -> usize { + self.into_iter().count() + } + + /// Return a reference to an item. + pub fn get(&self, id: T::Id) -> &T { + self.0.get(id.index()).unwrap() + } + + /// Return a mutable reference to a segment. + pub fn get_mut(&mut self, id: T::Id) -> &mut T { + self.0.get_mut(id.index()).unwrap() + } + + /// Return an iterator for the segments. + pub fn iter(&self) -> TableIter<'_, T> { + self.into_iter() + } + + /// Return a mutable iterator for the segments. + pub fn iter_mut(&mut self) -> TableIterMut<'_, T> { + self.into_iter() + } +} + +impl<'a, T: Item> IntoIterator for &'a Table { + type Item = &'a T; + type IntoIter = TableIter<'a, T>; + fn into_iter(self) -> TableIter<'a, T> { + TableIter { + iter: self.0.iter(), + } + } +} + +impl<'a, T: Item> IntoIterator for &'a mut Table { + type Item = &'a mut T; + type IntoIter = TableIterMut<'a, T>; + fn into_iter(self) -> TableIterMut<'a, T> { + TableIterMut { + iter: self.0.iter_mut(), + } + } +} + +/// An iterator for non-deleted items in a [`Table`]. +#[derive(Debug)] +pub struct TableIter<'a, T> { + iter: core::slice::Iter<'a, T>, +} + +impl<'a, T: Item> Iterator for TableIter<'a, T> { + type Item = &'a T; + fn next(&mut self) -> Option<&'a T> { + self.iter.find(|item| !item.is_deleted()) + } +} + +/// An iterator for non-deleted items in a [`Table`]. +#[derive(Debug)] +pub struct TableIterMut<'a, T> { + iter: core::slice::IterMut<'a, T>, +} + +impl<'a, T: Item> Iterator for TableIterMut<'a, T> { + type Item = &'a mut T; + fn next(&mut self) -> Option<&'a mut T> { + self.iter.find(|item| !item.is_deleted()) + } +} diff --git a/third_party/rust/object/src/common.rs b/third_party/rust/object/src/common.rs index 0e6af091ce08..c9db8c332a48 100644 --- a/third_party/rust/object/src/common.rs +++ b/third_party/rust/object/src/common.rs @@ -26,12 +26,24 @@ pub enum Architecture { Riscv64, S390x, Sbf, + Sharc, + Sparc, + Sparc32Plus, Sparc64, Wasm32, Wasm64, Xtensa, } +/// A CPU sub-architecture. +#[allow(missing_docs)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum SubArchitecture { + Arm64E, + Arm64EC, +} + impl Architecture { /// The size of an address value for this architecture. /// @@ -59,6 +71,9 @@ impl Architecture { Architecture::Riscv64 => Some(AddressSize::U64), Architecture::S390x => Some(AddressSize::U64), Architecture::Sbf => Some(AddressSize::U64), + Architecture::Sharc => Some(AddressSize::U32), + Architecture::Sparc => Some(AddressSize::U32), + Architecture::Sparc32Plus => Some(AddressSize::U32), Architecture::Sparc64 => Some(AddressSize::U64), Architecture::Wasm32 => Some(AddressSize::U32), Architecture::Wasm64 => Some(AddressSize::U64), @@ -102,6 +117,21 @@ pub enum BinaryFormat { Xcoff, } +impl BinaryFormat { + /// The target's native binary format for relocatable object files. + /// + /// Defaults to `Elf` for unknown platforms. + pub fn native_object() -> BinaryFormat { + if cfg!(target_os = "windows") { + BinaryFormat::Coff + } else if cfg!(target_os = "macos") { + BinaryFormat::MachO + } else { + BinaryFormat::Elf + } + } +} + /// The kind of a section. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[non_exhaustive] @@ -177,6 +207,11 @@ pub enum SectionKind { /// /// Example Mach-O sections: `__DWARF/__debug_info` Debug, + /// Debug strings. + /// + /// This is the same as either `Debug` or `OtherString`, depending on the file format. + /// This value is only used in the API for writing files. It is never returned when reading files. + DebugString, /// Information for the linker. /// /// Example COFF sections: `.drectve` @@ -244,8 +279,6 @@ pub enum ComdatKind { pub enum SymbolKind { /// The symbol kind is unknown. Unknown, - /// The symbol is a null placeholder. - Null, /// The symbol is for executable code. Text, /// The symbol is for a data object. @@ -291,6 +324,8 @@ pub enum SymbolScope { #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[non_exhaustive] pub enum RelocationKind { + /// The operation is unknown. + Unknown, /// S + A Absolute, /// S + A - P @@ -311,19 +346,6 @@ pub enum RelocationKind { SectionOffset, /// The index of the section containing the symbol. SectionIndex, - /// Some other ELF relocation. The value is dependent on the architecture. - Elf(u32), - /// Some other Mach-O relocation. The value is dependent on the architecture. - MachO { - /// The relocation type. - value: u8, - /// Whether the relocation is relative to the place. - relative: bool, - }, - /// Some other COFF relocation. The value is dependent on the architecture. - Coff(u16), - /// Some other XCOFF relocation. - Xcoff(u8), } /// Information about how the result of the relocation operation is encoded in the place. @@ -333,6 +355,8 @@ pub enum RelocationKind { #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[non_exhaustive] pub enum RelocationEncoding { + /// The relocation encoding is unknown. + Unknown, /// Generic encoding. Generic, @@ -367,6 +391,30 @@ pub enum RelocationEncoding { /// /// The `RelocationKind` must be PC relative. LoongArchBranch, + + /// SHARC+ 48-bit Type A instruction + /// + /// Represents these possible variants, each with a corresponding + /// `R_SHARC_*` constant: + /// + /// * 24-bit absolute address + /// * 32-bit absolute address + /// * 6-bit relative address + /// * 24-bit relative address + /// * 6-bit absolute address in the immediate value field + /// * 16-bit absolute address in the immediate value field + SharcTypeA, + + /// SHARC+ 32-bit Type B instruction + /// + /// Represents these possible variants, each with a corresponding + /// `R_SHARC_*` constant: + /// + /// * 6-bit absolute address in the immediate value field + /// * 7-bit absolute address in the immediate value field + /// * 16-bit absolute address + /// * 6-bit relative address + SharcTypeB, } /// File flags that are specific to each file format. @@ -499,3 +547,44 @@ pub enum SymbolFlags { containing_csect: Option, }, } + +/// Relocation fields that are specific to each file format and architecture. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum RelocationFlags { + /// Format independent representation. + Generic { + /// The operation used to calculate the result of the relocation. + kind: RelocationKind, + /// Information about how the result of the relocation operation is encoded in the place. + encoding: RelocationEncoding, + /// The size in bits of the place of relocation. + size: u8, + }, + /// ELF relocation fields. + Elf { + /// `r_type` field in the ELF relocation. + r_type: u32, + }, + /// Mach-O relocation fields. + MachO { + /// `r_type` field in the Mach-O relocation. + r_type: u8, + /// `r_pcrel` field in the Mach-O relocation. + r_pcrel: bool, + /// `r_length` field in the Mach-O relocation. + r_length: u8, + }, + /// COFF relocation fields. + Coff { + /// `typ` field in the COFF relocation. + typ: u16, + }, + /// XCOFF relocation fields. + Xcoff { + /// `r_rtype` field in the XCOFF relocation. + r_rtype: u8, + /// `r_rsize` field in the XCOFF relocation. + r_rsize: u8, + }, +} diff --git a/third_party/rust/object/src/elf.rs b/third_party/rust/object/src/elf.rs index da68ec2d4060..c9c53bf11288 100644 --- a/third_party/rust/object/src/elf.rs +++ b/third_party/rust/object/src/elf.rs @@ -1921,6 +1921,7 @@ pub const GNU_PROPERTY_HIUSER: u32 = 0xffffffff; /// AArch64 specific GNU properties. pub const GNU_PROPERTY_AARCH64_FEATURE_1_AND: u32 = 0xc0000000; +pub const GNU_PROPERTY_AARCH64_FEATURE_PAUTH: u32 = 0xc0000001; pub const GNU_PROPERTY_AARCH64_FEATURE_1_BTI: u32 = 1 << 0; pub const GNU_PROPERTY_AARCH64_FEATURE_1_PAC: u32 = 1 << 1; @@ -2206,6 +2207,114 @@ pub const R_386_IRELATIVE: u32 = 42; /// Load from 32 bit GOT entry, relaxable. pub const R_386_GOT32X: u32 = 43; +// ADI SHARC specific definitions + +// SHARC values for `Rel*::r_type` + +/// 24-bit absolute address in bits 23:0 of a 48-bit instr +/// +/// Targets: +/// +/// * Type 25a (PC_DIRECT) +pub const R_SHARC_ADDR24_V3: u32 = 0x0b; + +/// 32-bit absolute address in bits 31:0 of a 48-bit instr +/// +/// Targets: +/// +/// * Type 14a +/// * Type 14d +/// * Type 15a +/// * Type 16a +/// * Type 17a +/// * Type 18a +/// * Type 19a +pub const R_SHARC_ADDR32_V3: u32 = 0x0c; + +/// 32-bit absolute address in bits 31:0 of a 32-bit data location +/// +/// Represented with `RelocationEncoding::Generic` +pub const R_SHARC_ADDR_VAR_V3: u32 = 0x0d; + +/// 6-bit PC-relative address in bits 32:27 of a 48-bit instr +/// +/// Targets: +/// +/// * Type 9a +/// * Type 10a +pub const R_SHARC_PCRSHORT_V3: u32 = 0x0e; + +/// 24-bit PC-relative address in bits 23:0 of a 48-bit instr +/// +/// Targets: +/// +/// * Type 8a +/// * Type 12a (truncated to 23 bits after relocation) +/// * Type 13a (truncated to 23 bits after relocation) +/// * Type 25a (PC Relative) +pub const R_SHARC_PCRLONG_V3: u32 = 0x0f; + +/// 6-bit absolute address in bits 32:27 of a 48-bit instr +/// +/// Targets: +/// +/// * Type 4a +/// * Type 4b +/// * Type 4d +pub const R_SHARC_DATA6_V3: u32 = 0x10; + +/// 16-bit absolute address in bits 39:24 of a 48-bit instr +/// +/// Targets: +/// +/// * Type 12a +pub const R_SHARC_DATA16_V3: u32 = 0x11; + +/// 6-bit absolute address into bits 16:11 of a 32-bit instr +/// +/// Targets: +/// +/// * Type 4b +pub const R_SHARC_DATA6_VISA_V3: u32 = 0x12; + +/// 7-bit absolute address into bits 6:0 of a 32-bit instr +pub const R_SHARC_DATA7_VISA_V3: u32 = 0x13; + +/// 16-bit absolute address into bits 15:0 of a 32-bit instr +pub const R_SHARC_DATA16_VISA_V3: u32 = 0x14; + +/// 6-bit PC-relative address into bits 16:11 of a Type B +/// +/// Targets: +/// +/// * Type 9b +pub const R_SHARC_PCR6_VISA_V3: u32 = 0x17; + +/// 16-bit absolute address into bits 15:0 of a 16-bit location. +/// +/// Represented with `RelocationEncoding::Generic` +pub const R_SHARC_ADDR_VAR16_V3: u32 = 0x19; + +pub const R_SHARC_CALC_PUSH_ADDR: u32 = 0xe0; +pub const R_SHARC_CALC_PUSH_ADDEND: u32 = 0xe1; +pub const R_SHARC_CALC_ADD: u32 = 0xe2; +pub const R_SHARC_CALC_SUB: u32 = 0xe3; +pub const R_SHARC_CALC_MUL: u32 = 0xe4; +pub const R_SHARC_CALC_DIV: u32 = 0xe5; +pub const R_SHARC_CALC_MOD: u32 = 0xe6; +pub const R_SHARC_CALC_LSHIFT: u32 = 0xe7; +pub const R_SHARC_CALC_RSHIFT: u32 = 0xe8; +pub const R_SHARC_CALC_AND: u32 = 0xe9; +pub const R_SHARC_CALC_OR: u32 = 0xea; +pub const R_SHARC_CALC_XOR: u32 = 0xeb; +pub const R_SHARC_CALC_PUSH_LEN: u32 = 0xec; +pub const R_SHARC_CALC_NOT: u32 = 0xf6; + +// SHARC values for `SectionHeader*::sh_type`. + +/// .adi.attributes +pub const SHT_SHARC_ADI_ATTRIBUTES: u32 = SHT_LOPROC + 0x2; + // SUN SPARC specific definitions. // SPARC values for `st_type` component of `Sym*::st_info`. @@ -3717,6 +3826,10 @@ pub const SHT_ARM_PREEMPTMAP: u32 = SHT_LOPROC + 2; /// ARM attributes section. pub const SHT_ARM_ATTRIBUTES: u32 = SHT_LOPROC + 3; +// AArch64 values for `SectionHeader*::sh_type`. +/// AArch64 attributes section. +pub const SHT_AARCH64_ATTRIBUTES: u32 = SHT_LOPROC + 3; + // AArch64 values for `Rel*::r_type`. /// No relocation. @@ -4087,14 +4200,14 @@ pub const R_ARM_PC13: u32 = 4; pub const R_ARM_ABS16: u32 = 5; /// Direct 12 bit pub const R_ARM_ABS12: u32 = 6; -/// Direct & 0x7C (LDR, STR). +/// Direct & 0x7C (`LDR`, `STR`). pub const R_ARM_THM_ABS5: u32 = 7; /// Direct 8 bit pub const R_ARM_ABS8: u32 = 8; pub const R_ARM_SBREL32: u32 = 9; -/// PC relative 24 bit (Thumb32 BL). +/// PC relative 24 bit (Thumb32 `BL`). pub const R_ARM_THM_PC22: u32 = 10; -/// PC relative & 0x3FC (Thumb16 LDR, ADD, ADR). +/// PC relative & 0x3FC (Thumb16 `LDR`, `ADD`, `ADR`). pub const R_ARM_THM_PC8: u32 = 11; pub const R_ARM_AMP_VCALL9: u32 = 12; /// Obsolete static relocation. @@ -4129,11 +4242,11 @@ pub const R_ARM_GOTPC: u32 = 25; pub const R_ARM_GOT32: u32 = 26; /// Deprecated, 32 bit PLT address. pub const R_ARM_PLT32: u32 = 27; -/// PC relative 24 bit (BL, BLX). +/// PC relative 24 bit (`BL`, `BLX`). pub const R_ARM_CALL: u32 = 28; -/// PC relative 24 bit (B, BL). +/// PC relative 24 bit (`B`, `BL`). pub const R_ARM_JUMP24: u32 = 29; -/// PC relative 24 bit (Thumb32 B.W). +/// PC relative 24 bit (Thumb32 `B.W`). pub const R_ARM_THM_JUMP24: u32 = 30; /// Adjust by program base. pub const R_ARM_BASE_ABS: u32 = 31; @@ -4156,99 +4269,99 @@ pub const R_ARM_V4BX: u32 = 40; pub const R_ARM_TARGET2: u32 = 41; /// 32 bit PC relative. pub const R_ARM_PREL31: u32 = 42; -/// Direct 16-bit (MOVW). +/// Direct 16-bit (`MOVW`). pub const R_ARM_MOVW_ABS_NC: u32 = 43; -/// Direct high 16-bit (MOVT). +/// Direct high 16-bit (`MOVT`). pub const R_ARM_MOVT_ABS: u32 = 44; -/// PC relative 16-bit (MOVW). +/// PC relative 16-bit (`MOVW`). pub const R_ARM_MOVW_PREL_NC: u32 = 45; /// PC relative (MOVT). pub const R_ARM_MOVT_PREL: u32 = 46; -/// Direct 16 bit (Thumb32 MOVW). +/// Direct 16 bit (Thumb32 `MOVW`). pub const R_ARM_THM_MOVW_ABS_NC: u32 = 47; -/// Direct high 16 bit (Thumb32 MOVT). +/// Direct high 16 bit (Thumb32 `MOVT`). pub const R_ARM_THM_MOVT_ABS: u32 = 48; -/// PC relative 16 bit (Thumb32 MOVW). +/// PC relative 16 bit (Thumb32 `MOVW`). pub const R_ARM_THM_MOVW_PREL_NC: u32 = 49; -/// PC relative high 16 bit (Thumb32 MOVT). +/// PC relative high 16 bit (Thumb32 `MOVT`). pub const R_ARM_THM_MOVT_PREL: u32 = 50; -/// PC relative 20 bit (Thumb32 B.W). +/// PC relative 20 bit (Thumb32 `B.W`). pub const R_ARM_THM_JUMP19: u32 = 51; -/// PC relative X & 0x7E (Thumb16 CBZ, CBNZ). +/// PC relative X & 0x7E (Thumb16 `CBZ`, `CBNZ`). pub const R_ARM_THM_JUMP6: u32 = 52; -/// PC relative 12 bit (Thumb32 ADR.W). +/// PC relative 12 bit (Thumb32 `ADR.W`). pub const R_ARM_THM_ALU_PREL_11_0: u32 = 53; -/// PC relative 12 bit (Thumb32 LDR{D,SB,H,SH}). +/// PC relative 12 bit (Thumb32 `LDR{D,SB,H,SH}`). pub const R_ARM_THM_PC12: u32 = 54; /// Direct 32-bit. pub const R_ARM_ABS32_NOI: u32 = 55; /// PC relative 32-bit. pub const R_ARM_REL32_NOI: u32 = 56; -/// PC relative (ADD, SUB). +/// PC relative (`ADD`, `SUB`). pub const R_ARM_ALU_PC_G0_NC: u32 = 57; -/// PC relative (ADD, SUB). +/// PC relative (`ADD`, `SUB`). pub const R_ARM_ALU_PC_G0: u32 = 58; -/// PC relative (ADD, SUB). +/// PC relative (`ADD`, `SUB`). pub const R_ARM_ALU_PC_G1_NC: u32 = 59; -/// PC relative (ADD, SUB). +/// PC relative (`ADD`, `SUB`). pub const R_ARM_ALU_PC_G1: u32 = 60; -/// PC relative (ADD, SUB). +/// PC relative (`ADD`, `SUB`). pub const R_ARM_ALU_PC_G2: u32 = 61; -/// PC relative (LDR,STR,LDRB,STRB). +/// PC relative (`LDR`,`STR`,`LDRB`,`STRB`). pub const R_ARM_LDR_PC_G1: u32 = 62; -/// PC relative (LDR,STR,LDRB,STRB). +/// PC relative (`LDR`,`STR`,`LDRB`,`STRB`). pub const R_ARM_LDR_PC_G2: u32 = 63; -/// PC relative (STR{D,H}, LDR{D,SB,H,SH}). +/// PC relative (`STR{D,H}`, `LDR{D,SB,H,SH}`). pub const R_ARM_LDRS_PC_G0: u32 = 64; -/// PC relative (STR{D,H}, LDR{D,SB,H,SH}). +/// PC relative (`STR{D,H}`, `LDR{D,SB,H,SH}`). pub const R_ARM_LDRS_PC_G1: u32 = 65; -/// PC relative (STR{D,H}, LDR{D,SB,H,SH}). +/// PC relative (`STR{D,H}`, `LDR{D,SB,H,SH}`). pub const R_ARM_LDRS_PC_G2: u32 = 66; -/// PC relative (LDC, STC). +/// PC relative (`LDC`, `STC`). pub const R_ARM_LDC_PC_G0: u32 = 67; -/// PC relative (LDC, STC). +/// PC relative (`LDC`, `STC`). pub const R_ARM_LDC_PC_G1: u32 = 68; -/// PC relative (LDC, STC). +/// PC relative (`LDC`, `STC`). pub const R_ARM_LDC_PC_G2: u32 = 69; -/// Program base relative (ADD,SUB). +/// Program base relative (`ADD`,`SUB`). pub const R_ARM_ALU_SB_G0_NC: u32 = 70; -/// Program base relative (ADD,SUB). +/// Program base relative (`ADD`,`SUB`). pub const R_ARM_ALU_SB_G0: u32 = 71; -/// Program base relative (ADD,SUB). +/// Program base relative (`ADD`,`SUB`). pub const R_ARM_ALU_SB_G1_NC: u32 = 72; -/// Program base relative (ADD,SUB). +/// Program base relative (`ADD`,`SUB`). pub const R_ARM_ALU_SB_G1: u32 = 73; -/// Program base relative (ADD,SUB). +/// Program base relative (`ADD`,`SUB`). pub const R_ARM_ALU_SB_G2: u32 = 74; -/// Program base relative (LDR, STR, LDRB, STRB). +/// Program base relative (`LDR`, `STR`, `LDRB`, `STRB`). pub const R_ARM_LDR_SB_G0: u32 = 75; -/// Program base relative (LDR, STR, LDRB, STRB). +/// Program base relative (`LDR`, `STR`, `LDRB`, `STRB`). pub const R_ARM_LDR_SB_G1: u32 = 76; -/// Program base relative (LDR, STR, LDRB, STRB). +/// Program base relative (`LDR`, `STR`, `LDRB`, `STRB`). pub const R_ARM_LDR_SB_G2: u32 = 77; -/// Program base relative (LDR, STR, LDRB, STRB). +/// Program base relative (`LDR`, `STR`, `LDRB`, `STRB`). pub const R_ARM_LDRS_SB_G0: u32 = 78; -/// Program base relative (LDR, STR, LDRB, STRB). +/// Program base relative (`LDR`, `STR`, `LDRB`, `STRB`). pub const R_ARM_LDRS_SB_G1: u32 = 79; -/// Program base relative (LDR, STR, LDRB, STRB). +/// Program base relative (`LDR`, `STR`, `LDRB`, `STRB`). pub const R_ARM_LDRS_SB_G2: u32 = 80; -/// Program base relative (LDC,STC). +/// Program base relative (`LDC`,`STC`). pub const R_ARM_LDC_SB_G0: u32 = 81; -/// Program base relative (LDC,STC). +/// Program base relative (`LDC`,`STC`). pub const R_ARM_LDC_SB_G1: u32 = 82; -/// Program base relative (LDC,STC). +/// Program base relative (`LDC`,`STC`). pub const R_ARM_LDC_SB_G2: u32 = 83; -/// Program base relative 16 bit (MOVW). +/// Program base relative 16 bit (`MOVW`). pub const R_ARM_MOVW_BREL_NC: u32 = 84; -/// Program base relative high 16 bit (MOVT). +/// Program base relative high 16 bit (`MOVT`). pub const R_ARM_MOVT_BREL: u32 = 85; -/// Program base relative 16 bit (MOVW). +/// Program base relative 16 bit (`MOVW`). pub const R_ARM_MOVW_BREL: u32 = 86; -/// Program base relative 16 bit (Thumb32 MOVW). +/// Program base relative 16 bit (Thumb32 `MOVW`). pub const R_ARM_THM_MOVW_BREL_NC: u32 = 87; -/// Program base relative high 16 bit (Thumb32 MOVT). +/// Program base relative high 16 bit (Thumb32 `MOVT`). pub const R_ARM_THM_MOVT_BREL: u32 = 88; -/// Program base relative 16 bit (Thumb32 MOVW). +/// Program base relative 16 bit (Thumb32 `MOVW`). pub const R_ARM_THM_MOVW_BREL: u32 = 89; pub const R_ARM_TLS_GOTDESC: u32 = 90; pub const R_ARM_TLS_CALL: u32 = 91; @@ -4260,16 +4373,16 @@ pub const R_ARM_PLT32_ABS: u32 = 94; pub const R_ARM_GOT_ABS: u32 = 95; /// PC relative GOT entry. pub const R_ARM_GOT_PREL: u32 = 96; -/// GOT entry relative to GOT origin (LDR). +/// GOT entry relative to GOT origin (`LDR`). pub const R_ARM_GOT_BREL12: u32 = 97; -/// 12 bit, GOT entry relative to GOT origin (LDR, STR). +/// 12 bit, GOT entry relative to GOT origin (`LDR`, `STR`). pub const R_ARM_GOTOFF12: u32 = 98; pub const R_ARM_GOTRELAX: u32 = 99; pub const R_ARM_GNU_VTENTRY: u32 = 100; pub const R_ARM_GNU_VTINHERIT: u32 = 101; -/// PC relative & 0xFFE (Thumb16 B). +/// PC relative & 0xFFE (Thumb16 `B`). pub const R_ARM_THM_PC11: u32 = 102; -/// PC relative & 0x1FE (Thumb16 B/B). +/// PC relative & 0x1FE (Thumb16 `B`/`B`). pub const R_ARM_THM_PC9: u32 = 103; /// PC-rel 32 bit for global dynamic thread local data pub const R_ARM_TLS_GD32: u32 = 104; @@ -4281,18 +4394,18 @@ pub const R_ARM_TLS_LDO32: u32 = 106; pub const R_ARM_TLS_IE32: u32 = 107; /// 32 bit offset relative to static TLS block pub const R_ARM_TLS_LE32: u32 = 108; -/// 12 bit relative to TLS block (LDR, STR). +/// 12 bit relative to TLS block (`LDR`, `STR`). pub const R_ARM_TLS_LDO12: u32 = 109; -/// 12 bit relative to static TLS block (LDR, STR). +/// 12 bit relative to static TLS block (`LDR`, `STR`). pub const R_ARM_TLS_LE12: u32 = 110; -/// 12 bit GOT entry relative to GOT origin (LDR). +/// 12 bit GOT entry relative to GOT origin (`LDR`). pub const R_ARM_TLS_IE12GP: u32 = 111; /// Obsolete. pub const R_ARM_ME_TOO: u32 = 128; pub const R_ARM_THM_TLS_DESCSEQ: u32 = 129; pub const R_ARM_THM_TLS_DESCSEQ16: u32 = 129; pub const R_ARM_THM_TLS_DESCSEQ32: u32 = 130; -/// GOT entry relative to GOT origin, 12 bit (Thumb32 LDR). +/// GOT entry relative to GOT origin, 12 bit (Thumb32 `LDR`). pub const R_ARM_THM_GOT_BREL12: u32 = 131; pub const R_ARM_IRELATIVE: u32 = 160; pub const R_ARM_RXPC25: u32 = 249; @@ -5694,6 +5807,10 @@ pub const EF_RISCV_FLOAT_ABI_QUAD: u32 = 0x0006; pub const EF_RISCV_RVE: u32 = 0x0008; pub const EF_RISCV_TSO: u32 = 0x0010; +// RISC-V values for `SectionHeader*::sh_type`. +/// RISC-V attributes section. +pub const SHT_RISCV_ATTRIBUTES: u32 = SHT_LOPROC + 3; + // RISC-V values `Rel*::r_type`. pub const R_RISCV_NONE: u32 = 0; pub const R_RISCV_32: u32 = 1; @@ -5749,6 +5866,14 @@ pub const R_RISCV_SET8: u32 = 54; pub const R_RISCV_SET16: u32 = 55; pub const R_RISCV_SET32: u32 = 56; pub const R_RISCV_32_PCREL: u32 = 57; +pub const R_RISCV_IRELATIVE: u32 = 58; +pub const R_RISCV_PLT32: u32 = 59; +pub const R_RISCV_SET_ULEB128: u32 = 60; +pub const R_RISCV_SUB_ULEB128: u32 = 61; +pub const R_RISCV_TLSDESC_HI20: u32 = 62; +pub const R_RISCV_TLSDESC_LOAD_LO12: u32 = 63; +pub const R_RISCV_TLSDESC_ADD_LO12: u32 = 64; +pub const R_RISCV_TLSDESC_CALL: u32 = 65; // BPF values `Rel*::r_type`. /// No reloc @@ -6052,8 +6177,30 @@ pub const R_LARCH_TLS_GD_HI20: u32 = 98; /// 32-bit PC relative pub const R_LARCH_32_PCREL: u32 = 99; /// Paired with a normal relocation at the same address to indicate the -/// insturction can be relaxed +/// instruction can be relaxed pub const R_LARCH_RELAX: u32 = 100; +/// Reserved +pub const R_LARCH_DELETE: u32 = 101; +/// Delete some bytes to ensure the instruction at PC + A aligned to +/// `A.next_power_of_two()`-byte boundary +pub const R_LARCH_ALIGN: u32 = 102; +/// 22-bit PC-relative offset with two trailing zeros +pub const R_LARCH_PCREL20_S2: u32 = 103; +/// Reserved +pub const R_LARCH_CFA: u32 = 104; +/// 6-bit in-place addition +pub const R_LARCH_ADD6: u32 = 105; +/// 6-bit in-place subtraction +pub const R_LARCH_SUB6: u32 = 106; +/// LEB128 in-place addition +pub const R_LARCH_ADD_ULEB128: u32 = 107; +/// LEB128 in-place subtraction +pub const R_LARCH_SUB_ULEB128: u32 = 108; +/// 64-bit PC relative +pub const R_LARCH_64_PCREL: u32 = 109; +/// 18..=37 bits of `S + A - PC` into the `pcaddu18i` instruction at `PC`, +/// and 2..=17 bits of `S + A - PC` into the `jirl` instruction at `PC + 4` +pub const R_LARCH_CALL36: u32 = 110; // Xtensa values Rel*::r_type`. pub const R_XTENSA_NONE: u32 = 0; diff --git a/third_party/rust/object/src/lib.rs b/third_party/rust/object/src/lib.rs index e17802c4faa7..91b472654c90 100644 --- a/third_party/rust/object/src/lib.rs +++ b/third_party/rust/object/src/lib.rs @@ -8,60 +8,50 @@ //! //! Raw structs are defined for: [ELF](elf), [Mach-O](macho), [PE/COFF](pe), //! [XCOFF](xcoff), [archive]. -//! Types and traits for zerocopy support are defined in [pod] and [endian]. +//! Types and traits for zerocopy support are defined in the [`pod`] and [`endian`] modules. //! //! ## Unified read API //! -//! The [read::Object] trait defines the unified interface. This trait is implemented -//! by [read::File], which allows reading any file format, as well as implementations -//! for each file format: [ELF](read::elf::ElfFile), [Mach-O](read::macho::MachOFile), -//! [COFF](read::coff::CoffFile), [PE](read::pe::PeFile), [Wasm](read::wasm::WasmFile), -//! [XCOFF](read::xcoff::XcoffFile). +//! The [`read`] module provides a unified read API using the [`read::Object`] trait. +//! There is an implementation of this trait for [`read::File`], which allows reading any +//! file format, as well as implementations for each file format. //! //! ## Low level read API //! -//! In addition to the unified read API, the various `read` modules define helpers that -//! operate on the raw structs. These also provide traits that abstract over the differences -//! between 32-bit and 64-bit versions of the file format. +//! The [`read#modules`] submodules define helpers that operate on the raw structs. +//! These can be used instead of the unified API, or in conjunction with it to access +//! details that are not available via the unified API. //! //! ## Unified write API //! -//! [write::Object] allows building a COFF/ELF/Mach-O/XCOFF relocatable object file and -//! then writing it out. +//! The [`mod@write`] module provides a unified write API for relocatable object files +//! using [`write::Object`]. This does not support writing executable files. //! -//! ## Low level executable writers +//! ## Low level write API //! -//! [write::elf::Writer] and [write::pe::Writer] allow writing executable files. +//! The [`mod@write#modules`] submodules define helpers for writing the raw structs. //! -//! ## Example for unified read API -//! ```no_run -//! # #[cfg(feature = "read")] -//! use object::{Object, ObjectSection}; -//! use std::error::Error; -//! use std::fs; +//! ## Build API //! -//! /// Reads a file and displays the content of the ".boot" section. -//! fn main() -> Result<(), Box> { -//! # #[cfg(all(feature = "read", feature = "std"))] { -//! let bin_data = fs::read("./multiboot2-binary.elf")?; -//! let obj_file = object::File::parse(&*bin_data)?; -//! if let Some(section) = obj_file.section_by_name(".boot") { -//! println!("{:#x?}", section.data()?); -//! } else { -//! eprintln!("section not available"); -//! } -//! # } -//! Ok(()) -//! } -//! ``` +//! The [`mod@build`] submodules define helpers for building object files, either from +//! scratch or by modifying existing files. +//! +//! ## Shared definitions +//! +//! The crate provides a number of definitions that are used by both the read and write +//! APIs. These are defined at the top level module, but none of these are the main entry +//! points of the crate. #![deny(missing_docs)] #![deny(missing_debug_implementations)] #![no_std] #![warn(rust_2018_idioms)] // Style. +#![allow(clippy::collapsible_else_if)] #![allow(clippy::collapsible_if)] #![allow(clippy::comparison_chain)] +#![allow(clippy::field_reassign_with_default)] +#![allow(clippy::manual_flatten)] #![allow(clippy::match_like_matches_macro)] #![allow(clippy::single_match)] #![allow(clippy::type_complexity)] @@ -69,8 +59,6 @@ #![allow(clippy::should_implement_trait)] // Unit errors are converted to other types by callers. #![allow(clippy::result_unit_err)] -// Worse readability sometimes. -#![allow(clippy::collapsible_else_if)] #[cfg(feature = "cargo-all")] compile_error!("'--all-features' is not supported; use '--features all' instead"); @@ -104,6 +92,9 @@ pub use read::*; #[cfg(feature = "write_core")] pub mod write; +#[cfg(feature = "build_core")] +pub mod build; + #[cfg(feature = "archive")] pub mod archive; #[cfg(feature = "elf")] diff --git a/third_party/rust/object/src/macho.rs b/third_party/rust/object/src/macho.rs index 3cd38e0eefcf..88919d62efc2 100644 --- a/third_party/rust/object/src/macho.rs +++ b/third_party/rust/object/src/macho.rs @@ -305,7 +305,6 @@ pub struct DyldCacheHeader { pub images_count: U32, // offset: 0x1c /// base address of dyld when cache was built pub dyld_base_address: U64, // offset: 0x20 - /// reserved1: [u8; 32], // offset: 0x28 /// file offset of where local symbols are stored pub local_symbols_offset: U64, // offset: 0x48 @@ -313,23 +312,14 @@ pub struct DyldCacheHeader { pub local_symbols_size: U64, // offset: 0x50 /// unique value for each shared cache file pub uuid: [u8; 16], // offset: 0x58 - /// reserved2: [u8; 32], // offset: 0x68 - /// reserved3: [u8; 32], // offset: 0x88 - /// reserved4: [u8; 32], // offset: 0xa8 - /// reserved5: [u8; 32], // offset: 0xc8 - /// reserved6: [u8; 32], // offset: 0xe8 - /// reserved7: [u8; 32], // offset: 0x108 - /// reserved8: [u8; 32], // offset: 0x128 - /// reserved9: [u8; 32], // offset: 0x148 - /// reserved10: [u8; 32], // offset: 0x168 /// file offset to first dyld_subcache_info pub subcaches_offset: U32, // offset: 0x188 @@ -337,7 +327,6 @@ pub struct DyldCacheHeader { pub subcaches_count: U32, // offset: 0x18c /// the UUID of the .symbols subcache pub symbols_subcache_uuid: [u8; 16], // offset: 0x190 - /// reserved11: [u8; 32], // offset: 0x1a0 /// file offset to first dyld_cache_image_info /// Use this instead of images_offset if mapping_offset is at least 0x1c4. @@ -351,15 +340,10 @@ pub struct DyldCacheHeader { #[derive(Debug, Clone, Copy)] #[repr(C)] pub struct DyldCacheMappingInfo { - /// pub address: U64, - /// pub size: U64, - /// pub file_offset: U64, - /// pub max_prot: U32, - /// pub init_prot: U32, } @@ -367,27 +351,36 @@ pub struct DyldCacheMappingInfo { #[derive(Debug, Clone, Copy)] #[repr(C)] pub struct DyldCacheImageInfo { - /// pub address: U64, - /// pub mod_time: U64, - /// pub inode: U64, - /// pub path_file_offset: U32, - /// pub pad: U32, } -/// Corresponds to a struct whose source code has not been published as of Nov 2021. -/// Added in the dyld cache version which shipped with macOS 12 / iOS 15. +/// Added in dyld-940, which shipped with macOS 12 / iOS 15. +/// Originally called `dyld_subcache_entry`, renamed to `dyld_subcache_entry_v1` +/// in dyld-1042.1. #[derive(Debug, Clone, Copy)] #[repr(C)] -pub struct DyldSubCacheInfo { +pub struct DyldSubCacheEntryV1 { /// The UUID of this subcache. pub uuid: [u8; 16], - /// The size of this subcache plus all previous subcaches. - pub cumulative_size: U64, + /// The offset of this subcache from the main cache base address. + pub cache_vm_offset: U64, +} + +/// Added in dyld-1042.1, which shipped with macOS 13 / iOS 16. +/// Called `dyld_subcache_entry` as of dyld-1042.1. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct DyldSubCacheEntryV2 { + /// The UUID of this subcache. + pub uuid: [u8; 16], + /// The offset of this subcache from the main cache base address. + pub cache_vm_offset: U64, + /// The file name suffix of the subCache file, e.g. ".25.data" or ".03.development". + pub file_suffix: [u8; 32], } // Definitions from "/usr/include/mach-o/loader.h". @@ -1978,6 +1971,8 @@ pub const PLATFORM_IOSSIMULATOR: u32 = 7; pub const PLATFORM_TVOSSIMULATOR: u32 = 8; pub const PLATFORM_WATCHOSSIMULATOR: u32 = 9; pub const PLATFORM_DRIVERKIT: u32 = 10; +pub const PLATFORM_XROS: u32 = 11; +pub const PLATFORM_XROSSIMULATOR: u32 = 12; /* Known values for the tool field above. */ pub const TOOL_CLANG: u32 = 1; @@ -3251,7 +3246,8 @@ unsafe_impl_endian_pod!( DyldCacheHeader, DyldCacheMappingInfo, DyldCacheImageInfo, - DyldSubCacheInfo, + DyldSubCacheEntryV1, + DyldSubCacheEntryV2, MachHeader32, MachHeader64, LoadCommand, diff --git a/third_party/rust/object/src/pe.rs b/third_party/rust/object/src/pe.rs index f274d2270b93..64ccf0696d89 100644 --- a/third_party/rust/object/src/pe.rs +++ b/third_party/rust/object/src/pe.rs @@ -359,6 +359,8 @@ pub const IMAGE_FILE_MACHINE_AMD64: u16 = 0x8664; pub const IMAGE_FILE_MACHINE_M32R: u16 = 0x9041; /// ARM64 Little-Endian pub const IMAGE_FILE_MACHINE_ARM64: u16 = 0xAA64; +/// ARM64EC ("Emulation Compatible") +pub const IMAGE_FILE_MACHINE_ARM64EC: u16 = 0xA641; pub const IMAGE_FILE_MACHINE_CEE: u16 = 0xC0EE; /// RISCV32 pub const IMAGE_FILE_MACHINE_RISCV32: u16 = 0x5032; diff --git a/third_party/rust/object/src/pod.rs b/third_party/rust/object/src/pod.rs index 8ee78164f56d..2907e1e03150 100644 --- a/third_party/rust/object/src/pod.rs +++ b/third_party/rust/object/src/pod.rs @@ -21,9 +21,11 @@ type Result = result::Result; /// - have no padding pub unsafe trait Pod: Copy + 'static {} -/// Cast a byte slice to a `Pod` type. +/// Cast the head of a byte slice to a `Pod` type. /// -/// Returns the type and the tail of the slice. +/// Returns the type and the tail of the byte slice. +/// +/// Returns an error if the byte slice is too short or the alignment is invalid. #[inline] pub fn from_bytes(data: &[u8]) -> Result<(&T, &[u8])> { let size = mem::size_of::(); @@ -39,9 +41,11 @@ pub fn from_bytes(data: &[u8]) -> Result<(&T, &[u8])> { Ok((val, tail)) } -/// Cast a mutable byte slice to a `Pod` type. +/// Cast the head of a mutable byte slice to a `Pod` type. /// -/// Returns the type and the tail of the slice. +/// Returns the type and the tail of the byte slice. +/// +/// Returns an error if the byte slice is too short or the alignment is invalid. #[inline] pub fn from_bytes_mut(data: &mut [u8]) -> Result<(&mut T, &mut [u8])> { let size = mem::size_of::(); @@ -60,9 +64,11 @@ pub fn from_bytes_mut(data: &mut [u8]) -> Result<(&mut T, &mut [u8])> { Ok((val, tail)) } -/// Cast a byte slice to a slice of a `Pod` type. +/// Cast the head of a byte slice to a slice of a `Pod` type. /// /// Returns the type slice and the tail of the byte slice. +/// +/// Returns an error if the byte slice is too short or the alignment is invalid. #[inline] pub fn slice_from_bytes(data: &[u8], count: usize) -> Result<(&[T], &[u8])> { let size = count.checked_mul(mem::size_of::()).ok_or(())?; @@ -78,9 +84,11 @@ pub fn slice_from_bytes(data: &[u8], count: usize) -> Result<(&[T], &[u8 Ok((slice, tail)) } -/// Cast a mutable byte slice to a slice of a `Pod` type. +/// Cast the head of a mutable byte slice to a slice of a `Pod` type. /// /// Returns the type slice and the tail of the byte slice. +/// +/// Returns an error if the byte slice is too short or the alignment is invalid. #[inline] pub fn slice_from_bytes_mut( data: &mut [u8], @@ -102,6 +110,38 @@ pub fn slice_from_bytes_mut( Ok((slice, tail)) } +/// Cast all of a byte slice to a slice of a `Pod` type. +/// +/// Returns the type slice. +/// +/// Returns an error if the size of the byte slice is not an exact multiple +/// of the type size, or the alignment is invalid. +#[inline] +pub fn slice_from_all_bytes(data: &[u8]) -> Result<&[T]> { + let count = data.len() / mem::size_of::(); + let (slice, tail) = slice_from_bytes(data, count)?; + if !tail.is_empty() { + return Err(()); + } + Ok(slice) +} + +/// Cast all of a byte slice to a slice of a `Pod` type. +/// +/// Returns the type slice. +/// +/// Returns an error if the size of the byte slice is not an exact multiple +/// of the type size, or the alignment is invalid. +#[inline] +pub fn slice_from_all_bytes_mut(data: &mut [u8]) -> Result<&mut [T]> { + let count = data.len() / mem::size_of::(); + let (slice, tail) = slice_from_bytes_mut(data, count)?; + if !tail.is_empty() { + return Err(()); + } + Ok(slice) +} + /// Cast a `Pod` type to a byte slice. #[inline] pub fn bytes_of(val: &T) -> &[u8] { @@ -156,6 +196,8 @@ macro_rules! unsafe_impl_pod { unsafe_impl_pod!(u8, u16, u32, u64); +unsafe impl Pod for [T; N] {} + #[cfg(test)] mod tests { use super::*; @@ -223,7 +265,7 @@ mod tests { assert_eq!(tail, tail_mut); let (y, tail) = slice_from_bytes::(&bytes[2..], 2).unwrap(); - let (y_mut, tail_mut) = slice_from_bytes::(&mut bytes_mut[2..], 2).unwrap(); + let (y_mut, tail_mut) = slice_from_bytes_mut::(&mut bytes_mut[2..], 2).unwrap(); assert_eq!(y, &x[1..3]); assert_eq!(y, y_mut); assert_eq!(tail, &bytes[6..]); diff --git a/third_party/rust/object/src/read/any.rs b/third_party/rust/object/src/read/any.rs index 2e147c672706..6ce20bc2b238 100644 --- a/third_party/rust/object/src/read/any.rs +++ b/third_party/rust/object/src/read/any.rs @@ -2,6 +2,8 @@ use alloc::fmt; use alloc::vec::Vec; use core::marker::PhantomData; +#[allow(unused_imports)] // Unused for Wasm +use crate::endian::Endianness; #[cfg(feature = "coff")] use crate::read::coff; #[cfg(feature = "elf")] @@ -17,12 +19,10 @@ use crate::read::xcoff; use crate::read::{ self, Architecture, BinaryFormat, CodeView, ComdatKind, CompressedData, CompressedFileRange, Error, Export, FileFlags, FileKind, Import, Object, ObjectComdat, ObjectKind, ObjectMap, - ObjectSection, ObjectSegment, ObjectSymbol, ObjectSymbolTable, ReadRef, Relocation, Result, - SectionFlags, SectionIndex, SectionKind, SegmentFlags, SymbolFlags, SymbolIndex, SymbolKind, - SymbolMap, SymbolMapName, SymbolScope, SymbolSection, + ObjectSection, ObjectSegment, ObjectSymbol, ObjectSymbolTable, ReadRef, Relocation, + RelocationMap, Result, SectionFlags, SectionIndex, SectionKind, SegmentFlags, SubArchitecture, + SymbolFlags, SymbolIndex, SymbolKind, SymbolMap, SymbolMapName, SymbolScope, SymbolSection, }; -#[allow(unused_imports)] -use crate::{AddressSize, Endian, Endianness}; /// Evaluate an expression on the contents of a file format enum. /// @@ -204,9 +204,9 @@ macro_rules! next_inner { }; } -/// An object file. +/// An object file that can be any supported file format. /// -/// Most functionality is provided by the `Object` trait implementation. +/// Most functionality is provided by the [`Object`] trait implementation. #[derive(Debug)] #[non_exhaustive] #[allow(missing_docs)] @@ -268,14 +268,14 @@ impl<'data, R: ReadRef<'data>> File<'data, R> { /// Parse a Mach-O image from the dyld shared cache. #[cfg(feature = "macho")] - pub fn parse_dyld_cache_image<'cache, E: Endian>( + pub fn parse_dyld_cache_image<'cache, E: crate::Endian>( image: &macho::DyldCacheImage<'data, 'cache, E, R>, ) -> Result { Ok(match image.cache.architecture().address_size() { - Some(AddressSize::U64) => { + Some(read::AddressSize::U64) => { File::MachO64(macho::MachOFile64::parse_dyld_cache_image(image)?) } - Some(AddressSize::U32) => { + Some(read::AddressSize::U32) => { File::MachO32(macho::MachOFile32::parse_dyld_cache_image(image)?) } _ => return Err(Error("Unsupported file format")), @@ -303,26 +303,29 @@ impl<'data, R: ReadRef<'data>> File<'data, R> { impl<'data, R: ReadRef<'data>> read::private::Sealed for File<'data, R> {} -impl<'data, 'file, R> Object<'data, 'file> for File<'data, R> +impl<'data, R> Object<'data> for File<'data, R> where - 'data: 'file, - R: 'file + ReadRef<'data>, + R: ReadRef<'data>, { - type Segment = Segment<'data, 'file, R>; - type SegmentIterator = SegmentIterator<'data, 'file, R>; - type Section = Section<'data, 'file, R>; - type SectionIterator = SectionIterator<'data, 'file, R>; - type Comdat = Comdat<'data, 'file, R>; - type ComdatIterator = ComdatIterator<'data, 'file, R>; - type Symbol = Symbol<'data, 'file, R>; - type SymbolIterator = SymbolIterator<'data, 'file, R>; - type SymbolTable = SymbolTable<'data, 'file, R>; - type DynamicRelocationIterator = DynamicRelocationIterator<'data, 'file, R>; + type Segment<'file> = Segment<'data, 'file, R> where Self: 'file, 'data: 'file; + type SegmentIterator<'file> = SegmentIterator<'data, 'file, R> where Self: 'file, 'data: 'file; + type Section<'file> = Section<'data, 'file, R> where Self: 'file, 'data: 'file; + type SectionIterator<'file> = SectionIterator<'data, 'file, R> where Self: 'file, 'data: 'file; + type Comdat<'file> = Comdat<'data, 'file, R> where Self: 'file, 'data: 'file; + type ComdatIterator<'file> = ComdatIterator<'data, 'file, R> where Self: 'file, 'data: 'file; + type Symbol<'file> = Symbol<'data, 'file, R> where Self: 'file, 'data: 'file; + type SymbolIterator<'file> = SymbolIterator<'data, 'file, R> where Self: 'file, 'data: 'file; + type SymbolTable<'file> = SymbolTable<'data, 'file, R> where Self: 'file, 'data: 'file; + type DynamicRelocationIterator<'file> = DynamicRelocationIterator<'data, 'file, R> where Self: 'file, 'data: 'file; fn architecture(&self) -> Architecture { with_inner!(self, File, |x| x.architecture()) } + fn sub_architecture(&self) -> Option { + with_inner!(self, File, |x| x.sub_architecture()) + } + fn is_little_endian(&self) -> bool { with_inner!(self, File, |x| x.is_little_endian()) } @@ -335,43 +338,46 @@ where with_inner!(self, File, |x| x.kind()) } - fn segments(&'file self) -> SegmentIterator<'data, 'file, R> { + fn segments(&self) -> SegmentIterator<'data, '_, R> { SegmentIterator { inner: map_inner!(self, File, SegmentIteratorInternal, |x| x.segments()), } } - fn section_by_name_bytes(&'file self, section_name: &[u8]) -> Option> { + fn section_by_name_bytes<'file>( + &'file self, + section_name: &[u8], + ) -> Option> { map_inner_option!(self, File, SectionInternal, |x| x .section_by_name_bytes(section_name)) .map(|inner| Section { inner }) } - fn section_by_index(&'file self, index: SectionIndex) -> Result> { + fn section_by_index(&self, index: SectionIndex) -> Result> { map_inner_option!(self, File, SectionInternal, |x| x.section_by_index(index)) .map(|inner| Section { inner }) } - fn sections(&'file self) -> SectionIterator<'data, 'file, R> { + fn sections(&self) -> SectionIterator<'data, '_, R> { SectionIterator { inner: map_inner!(self, File, SectionIteratorInternal, |x| x.sections()), } } - fn comdats(&'file self) -> ComdatIterator<'data, 'file, R> { + fn comdats(&self) -> ComdatIterator<'data, '_, R> { ComdatIterator { inner: map_inner!(self, File, ComdatIteratorInternal, |x| x.comdats()), } } - fn symbol_by_index(&'file self, index: SymbolIndex) -> Result> { + fn symbol_by_index(&self, index: SymbolIndex) -> Result> { map_inner_option!(self, File, SymbolInternal, |x| x .symbol_by_index(index) .map(|x| (x, PhantomData))) .map(|inner| Symbol { inner }) } - fn symbols(&'file self) -> SymbolIterator<'data, 'file, R> { + fn symbols(&self) -> SymbolIterator<'data, '_, R> { SymbolIterator { inner: map_inner!(self, File, SymbolIteratorInternal, |x| ( x.symbols(), @@ -380,14 +386,14 @@ where } } - fn symbol_table(&'file self) -> Option> { + fn symbol_table(&self) -> Option> { map_inner_option!(self, File, SymbolTableInternal, |x| x .symbol_table() .map(|x| (x, PhantomData))) .map(|inner| SymbolTable { inner }) } - fn dynamic_symbols(&'file self) -> SymbolIterator<'data, 'file, R> { + fn dynamic_symbols(&self) -> SymbolIterator<'data, '_, R> { SymbolIterator { inner: map_inner!(self, File, SymbolIteratorInternal, |x| ( x.dynamic_symbols(), @@ -396,7 +402,7 @@ where } } - fn dynamic_symbol_table(&'file self) -> Option> { + fn dynamic_symbol_table(&self) -> Option> { map_inner_option!(self, File, SymbolTableInternal, |x| x .dynamic_symbol_table() .map(|x| (x, PhantomData))) @@ -404,7 +410,7 @@ where } #[cfg(feature = "elf")] - fn dynamic_relocations(&'file self) -> Option> { + fn dynamic_relocations(&self) -> Option> { let inner = match self { File::Elf32(ref elf) => { DynamicRelocationIteratorInternal::Elf32(elf.dynamic_relocations()?) @@ -419,7 +425,7 @@ where } #[cfg(not(feature = "elf"))] - fn dynamic_relocations(&'file self) -> Option> { + fn dynamic_relocations(&self) -> Option> { None } @@ -481,7 +487,7 @@ where } } -/// An iterator over the segments of a `File`. +/// An iterator for the loadable segments in a [`File`]. #[derive(Debug)] pub struct SegmentIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> { inner: SegmentIteratorInternal<'data, 'file, R>, @@ -522,7 +528,9 @@ impl<'data, 'file, R: ReadRef<'data>> Iterator for SegmentIterator<'data, 'file, } } -/// A segment of a `File`. +/// A loadable segment in a [`File`]. +/// +/// Most functionality is provided by the [`ObjectSegment`] trait implementation. pub struct Segment<'data, 'file, R: ReadRef<'data> = &'data [u8]> { inner: SegmentInternal<'data, 'file, R>, } @@ -612,7 +620,7 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectSegment<'data> for Segment<'data, 'f } } -/// An iterator of the sections of a `File`. +/// An iterator for the sections in a [`File`]. #[derive(Debug)] pub struct SectionIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> { inner: SectionIteratorInternal<'data, 'file, R>, @@ -654,7 +662,9 @@ impl<'data, 'file, R: ReadRef<'data>> Iterator for SectionIterator<'data, 'file, } } -/// A Section of a File +/// A section in a [`File`]. +/// +/// Most functionality is provided by the [`ObjectSection`] trait implementation. pub struct Section<'data, 'file, R: ReadRef<'data> = &'data [u8]> { inner: SectionInternal<'data, 'file, R>, } @@ -748,11 +758,11 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectSection<'data> for Section<'data, 'f with_inner!(self.inner, SectionInternal, |x| x.compressed_data()) } - fn name_bytes(&self) -> Result<&[u8]> { + fn name_bytes(&self) -> Result<&'data [u8]> { with_inner!(self.inner, SectionInternal, |x| x.name_bytes()) } - fn name(&self) -> Result<&str> { + fn name(&self) -> Result<&'data str> { with_inner!(self.inner, SectionInternal, |x| x.name()) } @@ -779,12 +789,16 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectSection<'data> for Section<'data, 'f } } + fn relocation_map(&self) -> Result { + with_inner!(self.inner, SectionInternal, |x| x.relocation_map()) + } + fn flags(&self) -> SectionFlags { with_inner!(self.inner, SectionInternal, |x| x.flags()) } } -/// An iterator of the COMDAT section groups of a `File`. +/// An iterator for the COMDAT section groups in a [`File`]. #[derive(Debug)] pub struct ComdatIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> { inner: ComdatIteratorInternal<'data, 'file, R>, @@ -825,7 +839,9 @@ impl<'data, 'file, R: ReadRef<'data>> Iterator for ComdatIterator<'data, 'file, } } -/// A COMDAT section group of a `File`. +/// A COMDAT section group in a [`File`]. +/// +/// Most functionality is provided by the [`ObjectComdat`] trait implementation. pub struct Comdat<'data, 'file, R: ReadRef<'data> = &'data [u8]> { inner: ComdatInternal<'data, 'file, R>, } @@ -878,11 +894,11 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectComdat<'data> for Comdat<'data, 'fil with_inner!(self.inner, ComdatInternal, |x| x.symbol()) } - fn name_bytes(&self) -> Result<&[u8]> { + fn name_bytes(&self) -> Result<&'data [u8]> { with_inner!(self.inner, ComdatInternal, |x| x.name_bytes()) } - fn name(&self) -> Result<&str> { + fn name(&self) -> Result<&'data str> { with_inner!(self.inner, ComdatInternal, |x| x.name()) } @@ -898,7 +914,7 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectComdat<'data> for Comdat<'data, 'fil } } -/// An iterator over COMDAT section entries. +/// An iterator for the sections in a [`Comdat`]. #[derive(Debug)] pub struct ComdatSectionIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> { inner: ComdatSectionIteratorInternal<'data, 'file, R>, @@ -938,7 +954,9 @@ impl<'data, 'file, R: ReadRef<'data>> Iterator for ComdatSectionIterator<'data, } } -/// A symbol table. +/// A symbol table in a [`File`]. +/// +/// Most functionality is provided by the [`ObjectSymbolTable`] trait implementation. #[derive(Debug)] pub struct SymbolTable<'data, 'file, R = &'data [u8]> where @@ -1022,7 +1040,7 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectSymbolTable<'data> for SymbolTable<' } } -/// An iterator over symbol table entries. +/// An iterator for the symbols in a [`SymbolTable`]. #[derive(Debug)] pub struct SymbolIterator<'data, 'file, R = &'data [u8]> where @@ -1101,7 +1119,9 @@ impl<'data, 'file, R: ReadRef<'data>> Iterator for SymbolIterator<'data, 'file, } } -/// A symbol table entry. +/// An symbol in a [`SymbolTable`]. +/// +/// Most functionality is provided by the [`ObjectSymbol`] trait implementation. pub struct Symbol<'data, 'file, R = &'data [u8]> where R: ReadRef<'data>, @@ -1236,7 +1256,7 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectSymbol<'data> for Symbol<'data, 'fil } } -/// An iterator over dynamic relocation entries. +/// An iterator for the dynamic relocation entries in a [`File`]. #[derive(Debug)] pub struct DynamicRelocationIterator<'data, 'file, R = &'data [u8]> where @@ -1273,7 +1293,7 @@ impl<'data, 'file, R: ReadRef<'data>> Iterator for DynamicRelocationIterator<'da } } -/// An iterator over section relocation entries. +/// An iterator for the relocation entries in a [`Section`]. #[derive(Debug)] pub struct SectionRelocationIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> { inner: SectionRelocationIteratorInternal<'data, 'file, R>, diff --git a/third_party/rust/object/src/read/archive.rs b/third_party/rust/object/src/read/archive.rs index f5aaa9b190cf..0e01b8a6ec6b 100644 --- a/third_party/rust/object/src/read/archive.rs +++ b/third_party/rust/object/src/read/archive.rs @@ -1,8 +1,30 @@ //! Support for archive files. +//! +//! ## Example +//! ```no_run +//! use object::{Object, ObjectSection}; +//! use std::error::Error; +//! use std::fs; +//! +//! /// Reads an archive and displays the name of each member. +//! fn main() -> Result<(), Box> { +//! # #[cfg(feature = "std")] { +//! let data = fs::read("path/to/binary")?; +//! let file = object::read::archive::ArchiveFile::parse(&*data)?; +//! for member in file.members() { +//! let member = member?; +//! println!("{}", String::from_utf8_lossy(member.name())); +//! } +//! # } +//! Ok(()) +//! } +//! ``` use core::convert::TryInto; +use core::slice; use crate::archive; +use crate::endian::{BigEndian as BE, LittleEndian as LE, U16Bytes, U32Bytes, U64Bytes}; use crate::read::{self, Bytes, Error, ReadError, ReadRef}; /// The kind of archive format. @@ -47,6 +69,7 @@ pub struct ArchiveFile<'data, R: ReadRef<'data> = &'data [u8]> { members: Members<'data>, symbols: (u64, u64), names: &'data [u8], + thin: bool, } impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { @@ -58,11 +81,15 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { .read_bytes(&mut tail, archive::MAGIC.len() as u64) .read_error("Invalid archive size")?; - if magic == archive::AIX_BIG_MAGIC { + let thin = if magic == archive::AIX_BIG_MAGIC { return Self::parse_aixbig(data); - } else if magic != archive::MAGIC { + } else if magic == archive::THIN_MAGIC { + true + } else if magic == archive::MAGIC { + false + } else { return Err(Error("Unsupported archive identifier")); - } + }; let mut members_offset = tail; let members_end_offset = len; @@ -76,6 +103,7 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { }, symbols: (0, 0), names: &[], + thin, }; // The first few members may be special, so parse them. @@ -93,7 +121,7 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { // BSD may use the extended name for the symbol table. This is handled // by `ArchiveMember::parse`. if tail < len { - let member = ArchiveMember::parse(data, &mut tail, &[])?; + let member = ArchiveMember::parse(data, &mut tail, &[], thin)?; if member.name == b"/" { // GNU symbol table (unless we later determine this is COFF). file.kind = ArchiveKind::Gnu; @@ -101,7 +129,7 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { members_offset = tail; if tail < len { - let member = ArchiveMember::parse(data, &mut tail, &[])?; + let member = ArchiveMember::parse(data, &mut tail, &[], thin)?; if member.name == b"/" { // COFF linker member. file.kind = ArchiveKind::Coff; @@ -109,13 +137,20 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { members_offset = tail; if tail < len { - let member = ArchiveMember::parse(data, &mut tail, &[])?; + let member = ArchiveMember::parse(data, &mut tail, &[], thin)?; if member.name == b"//" { // COFF names table. file.names = member.data(data)?; members_offset = tail; } } + if tail < len { + let member = ArchiveMember::parse(data, &mut tail, file.names, thin)?; + if member.name == b"//" { + // COFF EC Symbol Table. + members_offset = tail; + } + } } else if member.name == b"//" { // GNU names table. file.names = member.data(data)?; @@ -129,7 +164,7 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { members_offset = tail; if tail < len { - let member = ArchiveMember::parse(data, &mut tail, &[])?; + let member = ArchiveMember::parse(data, &mut tail, &[], thin)?; if member.name == b"//" { // GNU names table. file.names = member.data(data)?; @@ -177,9 +212,11 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { members: Members::AixBig { index: &[] }, symbols: (0, 0), names: &[], + thin: false, }; // Read the span of symbol table. + // TODO: an archive may have both 32-bit and 64-bit symbol tables. let symtbl64 = parse_u64_digits(&file_header.gst64off, 10) .read_error("Invalid offset to 64-bit symbol table in AIX big archive")?; if symtbl64 > 0 { @@ -234,6 +271,11 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { self.kind } + /// Return true if the archive is a thin archive. + pub fn is_thin(&self) -> bool { + self.thin + } + /// Iterate over the members of the archive. /// /// This does not return special members. @@ -243,8 +285,37 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { data: self.data, members: self.members, names: self.names, + thin: self.thin, } } + + /// Return the member at the given offset. + pub fn member(&self, member: ArchiveOffset) -> read::Result> { + match self.members { + Members::Common { offset, end_offset } => { + if member.0 < offset || member.0 >= end_offset { + return Err(Error("Invalid archive member offset")); + } + let mut offset = member.0; + ArchiveMember::parse(self.data, &mut offset, self.names, self.thin) + } + Members::AixBig { .. } => { + let offset = member.0; + ArchiveMember::parse_aixbig(self.data, offset) + } + } + } + + /// Iterate over the symbols in the archive. + pub fn symbols(&self) -> read::Result>> { + if self.symbols == (0, 0) { + return Ok(None); + } + let (offset, size) = self.symbols; + ArchiveSymbolIterator::new(self.kind, self.data, offset, size) + .read_error("Invalid archive symbol table") + .map(Some) + } } /// An iterator over the members of an archive. @@ -253,6 +324,7 @@ pub struct ArchiveMemberIterator<'data, R: ReadRef<'data> = &'data [u8]> { data: R, members: Members<'data>, names: &'data [u8], + thin: bool, } impl<'data, R: ReadRef<'data>> Iterator for ArchiveMemberIterator<'data, R> { @@ -267,7 +339,7 @@ impl<'data, R: ReadRef<'data>> Iterator for ArchiveMemberIterator<'data, R> { if *offset >= *end_offset { return None; } - let member = ArchiveMember::parse(self.data, offset, self.names); + let member = ArchiveMember::parse(self.data, offset, self.names, self.thin); if member.is_err() { *offset = *end_offset; } @@ -302,6 +374,7 @@ enum MemberHeader<'data> { pub struct ArchiveMember<'data> { header: MemberHeader<'data>, name: &'data [u8], + // May be zero for thin members. offset: u64, size: u64, } @@ -314,6 +387,7 @@ impl<'data> ArchiveMember<'data> { data: R, offset: &mut u64, names: &'data [u8], + thin: bool, ) -> read::Result { let header = data .read::(offset) @@ -322,16 +396,10 @@ impl<'data> ArchiveMember<'data> { return Err(Error("Invalid archive terminator")); } - let mut file_offset = *offset; - let mut file_size = + let header_file_size = parse_u64_digits(&header.size, 10).read_error("Invalid archive member size")?; - *offset = offset - .checked_add(file_size) - .read_error("Archive member size is too large")?; - // Entries are padded to an even number of bytes. - if (file_size & 1) != 0 { - *offset = offset.saturating_add(1); - } + let mut file_offset = *offset; + let mut file_size = header_file_size; let name = if header.name[0] == b'/' && (header.name[1] as char).is_ascii_digit() { // Read file name from the names table. @@ -345,12 +413,34 @@ impl<'data> ArchiveMember<'data> { let name_len = memchr::memchr(b' ', &header.name).unwrap_or(header.name.len()); &header.name[..name_len] } else { + // Name is terminated by slash or space. + // Slash allows embedding spaces in the name, so only look + // for space if there is no slash. let name_len = memchr::memchr(b'/', &header.name) .or_else(|| memchr::memchr(b' ', &header.name)) .unwrap_or(header.name.len()); &header.name[..name_len] }; + // Members in thin archives don't have data unless they are special members. + if thin && name != b"/" && name != b"//" && name != b"/SYM64/" { + return Ok(ArchiveMember { + header: MemberHeader::Common(header), + name, + offset: 0, + size: file_size, + }); + } + + // Skip the file data. + *offset = offset + .checked_add(header_file_size) + .read_error("Archive member size is too large")?; + // Entries are padded to an even number of bytes. + if (header_file_size & 1) != 0 { + *offset = offset.saturating_add(1); + } + Ok(ArchiveMember { header: MemberHeader::Common(header), name, @@ -473,19 +563,270 @@ impl<'data> ArchiveMember<'data> { } } + /// Return the size of the file data. + pub fn size(&self) -> u64 { + self.size + } + /// Return the offset and size of the file data. pub fn file_range(&self) -> (u64, u64) { (self.offset, self.size) } + /// Return true if the member is a thin member. + /// + /// Thin members have no file data. + pub fn is_thin(&self) -> bool { + self.offset == 0 + } + /// Return the file data. + /// + /// This is an empty slice for thin members. #[inline] pub fn data>(&self, data: R) -> read::Result<&'data [u8]> { + if self.is_thin() { + return Ok(&[]); + } data.read_bytes_at(self.offset, self.size) .read_error("Archive member size is too large") } } +/// An offset of a member in an archive. +#[derive(Debug, Clone, Copy)] +pub struct ArchiveOffset(pub u64); + +/// An iterator over the symbols in the archive symbol table. +#[derive(Debug, Clone)] +pub struct ArchiveSymbolIterator<'data>(SymbolIteratorInternal<'data>); + +#[derive(Debug, Clone)] +enum SymbolIteratorInternal<'data> { + /// There is no symbol table. + None, + /// A GNU symbol table. + /// + /// Contains: + /// - the number of symbols as a 32-bit big-endian integer + /// - the offsets of the member headers as 32-bit big-endian integers + /// - the symbol names as null-terminated strings + Gnu { + offsets: slice::Iter<'data, U32Bytes>, + names: Bytes<'data>, + }, + /// A GNU 64-bit symbol table + /// + /// Contains: + /// - the number of symbols as a 64-bit big-endian integer + /// - the offsets of the member headers as 64-bit big-endian integers + /// - the symbol names as null-terminated strings + Gnu64 { + offsets: slice::Iter<'data, U64Bytes>, + names: Bytes<'data>, + }, + /// A BSD symbol table. + /// + /// Contains: + /// - the size in bytes of the offsets array as a 32-bit little-endian integer + /// - the offsets array, for which each entry is a pair of 32-bit little-endian integers + /// for the offset of the member header and the offset of the symbol name + /// - the size in bytes of the symbol names as a 32-bit little-endian integer + /// - the symbol names as null-terminated strings + Bsd { + offsets: slice::Iter<'data, [U32Bytes; 2]>, + names: Bytes<'data>, + }, + /// A BSD 64-bit symbol table. + /// + /// Contains: + /// - the size in bytes of the offsets array as a 64-bit little-endian integer + /// - the offsets array, for which each entry is a pair of 64-bit little-endian integers + /// for the offset of the member header and the offset of the symbol name + /// - the size in bytes of the symbol names as a 64-bit little-endian integer + /// - the symbol names as null-terminated strings + Bsd64 { + offsets: slice::Iter<'data, [U64Bytes; 2]>, + names: Bytes<'data>, + }, + /// A Windows COFF symbol table. + /// + /// Contains: + /// - the number of members as a 32-bit little-endian integer + /// - the offsets of the member headers as 32-bit little-endian integers + /// - the number of symbols as a 32-bit little-endian integer + /// - the member index for each symbol as a 16-bit little-endian integer + /// - the symbol names as null-terminated strings in lexical order + Coff { + members: &'data [U32Bytes], + indices: slice::Iter<'data, U16Bytes>, + names: Bytes<'data>, + }, +} + +impl<'data> ArchiveSymbolIterator<'data> { + fn new>( + kind: ArchiveKind, + data: R, + offset: u64, + size: u64, + ) -> Result { + let mut data = data.read_bytes_at(offset, size).map(Bytes)?; + match kind { + ArchiveKind::Unknown => Ok(ArchiveSymbolIterator(SymbolIteratorInternal::None)), + ArchiveKind::Gnu => { + let offsets_count = data.read::>()?.get(BE); + let offsets = data.read_slice::>(offsets_count as usize)?; + Ok(ArchiveSymbolIterator(SymbolIteratorInternal::Gnu { + offsets: offsets.iter(), + names: data, + })) + } + ArchiveKind::Gnu64 => { + let offsets_count = data.read::>()?.get(BE); + let offsets = data.read_slice::>(offsets_count as usize)?; + Ok(ArchiveSymbolIterator(SymbolIteratorInternal::Gnu64 { + offsets: offsets.iter(), + names: data, + })) + } + ArchiveKind::Bsd => { + let offsets_size = data.read::>()?.get(LE); + let offsets = data.read_slice::<[U32Bytes; 2]>(offsets_size as usize / 8)?; + let names_size = data.read::>()?.get(LE); + let names = data.read_bytes(names_size as usize)?; + Ok(ArchiveSymbolIterator(SymbolIteratorInternal::Bsd { + offsets: offsets.iter(), + names, + })) + } + ArchiveKind::Bsd64 => { + let offsets_size = data.read::>()?.get(LE); + let offsets = data.read_slice::<[U64Bytes; 2]>(offsets_size as usize / 16)?; + let names_size = data.read::>()?.get(LE); + let names = data.read_bytes(names_size as usize)?; + Ok(ArchiveSymbolIterator(SymbolIteratorInternal::Bsd64 { + offsets: offsets.iter(), + names, + })) + } + ArchiveKind::Coff => { + let members_count = data.read::>()?.get(LE); + let members = data.read_slice::>(members_count as usize)?; + let indices_count = data.read::>()?.get(LE); + let indices = data.read_slice::>(indices_count as usize)?; + Ok(ArchiveSymbolIterator(SymbolIteratorInternal::Coff { + members, + indices: indices.iter(), + names: data, + })) + } + // TODO: Implement AIX big archive symbol table. + ArchiveKind::AixBig => Ok(ArchiveSymbolIterator(SymbolIteratorInternal::None)), + } + } +} + +impl<'data> Iterator for ArchiveSymbolIterator<'data> { + type Item = read::Result>; + + fn next(&mut self) -> Option { + match &mut self.0 { + SymbolIteratorInternal::None => None, + SymbolIteratorInternal::Gnu { offsets, names } => { + let offset = offsets.next()?.get(BE); + Some( + names + .read_string() + .read_error("Missing archive symbol name") + .map(|name| ArchiveSymbol { + name, + offset: ArchiveOffset(offset.into()), + }), + ) + } + SymbolIteratorInternal::Gnu64 { offsets, names } => { + let offset = offsets.next()?.get(BE); + Some( + names + .read_string() + .read_error("Missing archive symbol name") + .map(|name| ArchiveSymbol { + name, + offset: ArchiveOffset(offset), + }), + ) + } + SymbolIteratorInternal::Bsd { offsets, names } => { + let entry = offsets.next()?; + Some( + names + .read_string_at(entry[0].get(LE) as usize) + .read_error("Invalid archive symbol name offset") + .map(|name| ArchiveSymbol { + name, + offset: ArchiveOffset(entry[1].get(LE).into()), + }), + ) + } + SymbolIteratorInternal::Bsd64 { offsets, names } => { + let entry = offsets.next()?; + Some( + names + .read_string_at(entry[0].get(LE) as usize) + .read_error("Invalid archive symbol name offset") + .map(|name| ArchiveSymbol { + name, + offset: ArchiveOffset(entry[1].get(LE)), + }), + ) + } + SymbolIteratorInternal::Coff { + members, + indices, + names, + } => { + let index = indices.next()?.get(LE).wrapping_sub(1); + let member = members + .get(index as usize) + .read_error("Invalid archive symbol member index"); + let name = names + .read_string() + .read_error("Missing archive symbol name"); + Some(member.and_then(|member| { + name.map(|name| ArchiveSymbol { + name, + offset: ArchiveOffset(member.get(LE).into()), + }) + })) + } + } + } +} + +/// A symbol in the archive symbol table. +/// +/// This is used to find the member containing the symbol. +#[derive(Debug, Clone, Copy)] +pub struct ArchiveSymbol<'data> { + name: &'data [u8], + offset: ArchiveOffset, +} + +impl<'data> ArchiveSymbol<'data> { + /// Return the symbol name. + #[inline] + pub fn name(&self) -> &'data [u8] { + self.name + } + + /// Return the offset of the header for the member containing the symbol. + #[inline] + pub fn offset(&self) -> ArchiveOffset { + self.offset + } +} + // Ignores bytes starting from the first space. fn parse_u64_digits(digits: &[u8], radix: u32) -> Option { if let [b' ', ..] = digits { @@ -505,18 +846,27 @@ fn parse_u64_digits(digits: &[u8], radix: u32) -> Option { Some(result) } +/// Digits are a decimal offset into the extended name table. +/// Name is terminated by "/\n" (for GNU) or a null byte (for COFF). fn parse_sysv_extended_name<'data>(digits: &[u8], names: &'data [u8]) -> Result<&'data [u8], ()> { let offset = parse_u64_digits(digits, 10).ok_or(())?; let offset = offset.try_into().map_err(|_| ())?; let name_data = names.get(offset..).ok_or(())?; - let name = match memchr::memchr2(b'/', b'\0', name_data) { - Some(len) => &name_data[..len], - None => name_data, - }; - Ok(name) + let len = memchr::memchr2(b'\n', b'\0', name_data).ok_or(())?; + if name_data[len] == b'\n' { + if len < 1 || name_data[len - 1] != b'/' { + Err(()) + } else { + Ok(&name_data[..len - 1]) + } + } else { + Ok(&name_data[..len]) + } } -/// Modifies `data` to start after the extended name. +/// Digits are a decimal length of the extended name, which is contained +/// in `data` at `offset`. +/// Modifies `offset` and `size` to start after the extended name. fn parse_bsd_extended_name<'data, R: ReadRef<'data>>( digits: &[u8], data: R, @@ -647,6 +997,14 @@ mod tests { \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; let archive = ArchiveFile::parse(&data[..]).unwrap(); assert_eq!(archive.kind(), ArchiveKind::AixBig); + + let data = b"\ + !\n\ + / 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Gnu); + assert!(archive.is_thin()); } #[test] @@ -681,6 +1039,42 @@ mod tests { assert!(members.next().is_none()); } + #[test] + fn thin_gnu_names() { + let data = b"\ + !\n\ + // 18 `\n\ + 0123456789/abcde/\n\ + s p a c e/ 0 0 0 644 4 `\n\ + 0123456789abcde/0 0 0 644 3 `\n\ + /0 0 0 0 644 4 `\n\ + "; + let data = &data[..]; + let archive = ArchiveFile::parse(data).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Gnu); + let mut members = archive.members(); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"s p a c e"); + assert!(member.is_thin()); + assert_eq!(member.size(), 4); + assert_eq!(member.data(data).unwrap(), &[]); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"0123456789abcde"); + assert!(member.is_thin()); + assert_eq!(member.size(), 3); + assert_eq!(member.data(data).unwrap(), &[]); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"0123456789/abcde"); + assert!(member.is_thin()); + assert_eq!(member.size(), 4); + assert_eq!(member.data(data).unwrap(), &[]); + + assert!(members.next().is_none()); + } + #[test] fn bsd_names() { let data = b"\ diff --git a/third_party/rust/object/src/read/coff/comdat.rs b/third_party/rust/object/src/read/coff/comdat.rs index 22e061a236d7..464d23888a61 100644 --- a/third_party/rust/object/src/read/coff/comdat.rs +++ b/third_party/rust/object/src/read/coff/comdat.rs @@ -8,11 +8,11 @@ use crate::read::{ use super::{CoffFile, CoffHeader, ImageSymbol}; -/// An iterator over the COMDAT section groups of a `CoffBigFile`. +/// An iterator for the COMDAT section groups in a [`CoffBigFile`](super::CoffBigFile). pub type CoffBigComdatIterator<'data, 'file, R = &'data [u8]> = CoffComdatIterator<'data, 'file, R, pe::AnonObjectHeaderBigobj>; -/// An iterator over the COMDAT section groups of a `CoffFile`. +/// An iterator for the COMDAT section groups in a [`CoffFile`]. #[derive(Debug)] pub struct CoffComdatIterator< 'data, @@ -20,8 +20,17 @@ pub struct CoffComdatIterator< R: ReadRef<'data> = &'data [u8], Coff: CoffHeader = pe::ImageFileHeader, > { - pub(super) file: &'file CoffFile<'data, R, Coff>, - pub(super) index: usize, + file: &'file CoffFile<'data, R, Coff>, + index: SymbolIndex, +} + +impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> CoffComdatIterator<'data, 'file, R, Coff> { + pub(crate) fn new(file: &'file CoffFile<'data, R, Coff>) -> Self { + CoffComdatIterator { + file, + index: SymbolIndex(0), + } + } } impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> Iterator @@ -33,7 +42,7 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> Iterator loop { let index = self.index; let symbol = self.file.common.symbols.symbol(index).ok()?; - self.index += 1 + symbol.number_of_aux_symbols() as usize; + self.index.0 += 1 + symbol.number_of_aux_symbols() as usize; if let Some(comdat) = CoffComdat::parse(self.file, symbol, index) { return Some(comdat); } @@ -41,11 +50,15 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> Iterator } } -/// A COMDAT section group of a `CoffBigFile`. +/// A COMDAT section group in a [`CoffBigFile`](super::CoffBigFile). +/// +/// Most functionality is provided by the [`ObjectComdat`] trait implementation. pub type CoffBigComdat<'data, 'file, R = &'data [u8]> = CoffComdat<'data, 'file, R, pe::AnonObjectHeaderBigobj>; -/// A COMDAT section group of a `CoffFile`. +/// A COMDAT section group in a [`CoffFile`]. +/// +/// Most functionality is provided by the [`ObjectComdat`] trait implementation. #[derive(Debug)] pub struct CoffComdat< 'data, @@ -63,7 +76,7 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> CoffComdat<'data, 'file, fn parse( file: &'file CoffFile<'data, R, Coff>, section_symbol: &'data Coff::ImageSymbol, - index: usize, + index: SymbolIndex, ) -> Option> { // Must be a section symbol. if !section_symbol.has_aux_section() { @@ -82,7 +95,7 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> CoffComdat<'data, 'file, let mut symbol = section_symbol; let section_number = section_symbol.section_number(); loop { - symbol_index += 1 + symbol.number_of_aux_symbols() as usize; + symbol_index.0 += 1 + symbol.number_of_aux_symbols() as usize; symbol = file.common.symbols.symbol(symbol_index).ok()?; if section_number == symbol.section_number() { break; @@ -91,7 +104,7 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> CoffComdat<'data, 'file, Some(CoffComdat { file, - symbol_index: SymbolIndex(symbol_index), + symbol_index, symbol, selection, }) @@ -127,13 +140,13 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectComdat<'data> } #[inline] - fn name_bytes(&self) -> Result<&[u8]> { + fn name_bytes(&self) -> Result<&'data [u8]> { // Find the name of first symbol referring to the section. self.symbol.name(self.file.common.symbols.strings()) } #[inline] - fn name(&self) -> Result<&str> { + fn name(&self) -> Result<&'data str> { let bytes = self.name_bytes()?; str::from_utf8(bytes) .ok() @@ -145,16 +158,16 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectComdat<'data> CoffComdatSectionIterator { file: self.file, section_number: self.symbol.section_number(), - index: 0, + index: SymbolIndex(0), } } } -/// An iterator over the sections in a COMDAT section group of a `CoffBigFile`. +/// An iterator for the sections in a COMDAT section group in a [`CoffBigFile`](super::CoffBigFile). pub type CoffBigComdatSectionIterator<'data, 'file, R = &'data [u8]> = CoffComdatSectionIterator<'data, 'file, R, pe::AnonObjectHeaderBigobj>; -/// An iterator over the sections in a COMDAT section group of a `CoffFile`. +/// An iterator for the sections in a COMDAT section group in a [`CoffFile`]. #[derive(Debug)] pub struct CoffComdatSectionIterator< 'data, @@ -164,7 +177,7 @@ pub struct CoffComdatSectionIterator< > { file: &'file CoffFile<'data, R, Coff>, section_number: i32, - index: usize, + index: SymbolIndex, } impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> Iterator @@ -178,7 +191,7 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> Iterator loop { let index = self.index; let symbol = self.file.common.symbols.symbol(index).ok()?; - self.index += 1 + symbol.number_of_aux_symbols() as usize; + self.index.0 += 1 + symbol.number_of_aux_symbols() as usize; // Must be a section symbol. if !symbol.has_aux_section() { diff --git a/third_party/rust/object/src/read/coff/file.rs b/third_party/rust/object/src/read/coff/file.rs index 4219f8f02781..525c11d36589 100644 --- a/third_party/rust/object/src/read/coff/file.rs +++ b/third_party/rust/object/src/read/coff/file.rs @@ -1,11 +1,13 @@ use alloc::vec::Vec; use core::fmt::Debug; +use crate::endian::LittleEndian as LE; +use crate::pe; +use crate::pod::Pod; use crate::read::{ self, Architecture, Export, FileFlags, Import, NoDynamicRelocationIterator, Object, ObjectKind, - ObjectSection, ReadError, ReadRef, Result, SectionIndex, SymbolIndex, + ObjectSection, ReadError, ReadRef, Result, SectionIndex, SubArchitecture, SymbolIndex, }; -use crate::{pe, LittleEndian as LE, Pod}; use super::{ CoffComdat, CoffComdatIterator, CoffSection, CoffSectionIterator, CoffSegment, @@ -22,9 +24,19 @@ pub(crate) struct CoffCommon<'data, R: ReadRef<'data>, Coff: CoffHeader = pe::Im } /// A COFF bigobj object file with 32-bit section numbers. +/// +/// This is a file that starts with [`pe::AnonObjectHeaderBigobj`], and corresponds +/// to [`crate::FileKind::CoffBig`]. +/// +/// Most functionality is provided by the [`Object`] trait implementation. pub type CoffBigFile<'data, R = &'data [u8]> = CoffFile<'data, R, pe::AnonObjectHeaderBigobj>; /// A COFF object file. +/// +/// This is a file that starts with [`pe::ImageFileHeader`], and corresponds +/// to [`crate::FileKind::Coff`]. +/// +/// Most functionality is provided by the [`Object`] trait implementation. #[derive(Debug)] pub struct CoffFile<'data, R: ReadRef<'data> = &'data [u8], Coff: CoffHeader = pe::ImageFileHeader> { @@ -51,6 +63,21 @@ impl<'data, R: ReadRef<'data>, Coff: CoffHeader> CoffFile<'data, R, Coff> { data, }) } + + /// Get the raw COFF file header. + pub fn coff_header(&self) -> &'data Coff { + self.header + } + + /// Get the COFF section table. + pub fn coff_section_table(&self) -> SectionTable<'data> { + self.common.sections + } + + /// Get the COFF symbol table. + pub fn coff_symbol_table(&self) -> &SymbolTable<'data, R, Coff> { + &self.common.symbols + } } impl<'data, R: ReadRef<'data>, Coff: CoffHeader> read::private::Sealed @@ -58,33 +85,39 @@ impl<'data, R: ReadRef<'data>, Coff: CoffHeader> read::private::Sealed { } -impl<'data, 'file, R, Coff> Object<'data, 'file> for CoffFile<'data, R, Coff> +impl<'data, R, Coff> Object<'data> for CoffFile<'data, R, Coff> where - 'data: 'file, - R: 'file + ReadRef<'data>, + R: ReadRef<'data>, Coff: CoffHeader, { - type Segment = CoffSegment<'data, 'file, R, Coff>; - type SegmentIterator = CoffSegmentIterator<'data, 'file, R, Coff>; - type Section = CoffSection<'data, 'file, R, Coff>; - type SectionIterator = CoffSectionIterator<'data, 'file, R, Coff>; - type Comdat = CoffComdat<'data, 'file, R, Coff>; - type ComdatIterator = CoffComdatIterator<'data, 'file, R, Coff>; - type Symbol = CoffSymbol<'data, 'file, R, Coff>; - type SymbolIterator = CoffSymbolIterator<'data, 'file, R, Coff>; - type SymbolTable = CoffSymbolTable<'data, 'file, R, Coff>; - type DynamicRelocationIterator = NoDynamicRelocationIterator; + type Segment<'file> = CoffSegment<'data, 'file, R, Coff> where Self: 'file, 'data: 'file; + type SegmentIterator<'file> = CoffSegmentIterator<'data, 'file, R, Coff> where Self: 'file, 'data: 'file; + type Section<'file> = CoffSection<'data, 'file, R, Coff> where Self: 'file, 'data: 'file; + type SectionIterator<'file> = CoffSectionIterator<'data, 'file, R, Coff> where Self: 'file, 'data: 'file; + type Comdat<'file> = CoffComdat<'data, 'file, R, Coff> where Self: 'file, 'data: 'file; + type ComdatIterator<'file> = CoffComdatIterator<'data, 'file, R, Coff> where Self: 'file, 'data: 'file; + type Symbol<'file> = CoffSymbol<'data, 'file, R, Coff> where Self: 'file, 'data: 'file; + type SymbolIterator<'file> = CoffSymbolIterator<'data, 'file, R, Coff> where Self: 'file, 'data: 'file; + type SymbolTable<'file> = CoffSymbolTable<'data, 'file, R, Coff> where Self: 'file, 'data: 'file; + type DynamicRelocationIterator<'file> = NoDynamicRelocationIterator where Self: 'file, 'data: 'file; fn architecture(&self) -> Architecture { match self.header.machine() { pe::IMAGE_FILE_MACHINE_ARMNT => Architecture::Arm, - pe::IMAGE_FILE_MACHINE_ARM64 => Architecture::Aarch64, + pe::IMAGE_FILE_MACHINE_ARM64 | pe::IMAGE_FILE_MACHINE_ARM64EC => Architecture::Aarch64, pe::IMAGE_FILE_MACHINE_I386 => Architecture::I386, pe::IMAGE_FILE_MACHINE_AMD64 => Architecture::X86_64, _ => Architecture::Unknown, } } + fn sub_architecture(&self) -> Option { + match self.header.machine() { + pe::IMAGE_FILE_MACHINE_ARM64EC => Some(SubArchitecture::Arm64EC), + _ => None, + } + } + #[inline] fn is_little_endian(&self) -> bool { true @@ -100,14 +133,14 @@ where ObjectKind::Relocatable } - fn segments(&'file self) -> CoffSegmentIterator<'data, 'file, R, Coff> { + fn segments(&self) -> CoffSegmentIterator<'data, '_, R, Coff> { CoffSegmentIterator { file: self, iter: self.common.sections.iter(), } } - fn section_by_name_bytes( + fn section_by_name_bytes<'file>( &'file self, section_name: &[u8], ) -> Option> { @@ -115,11 +148,8 @@ where .find(|section| section.name_bytes() == Ok(section_name)) } - fn section_by_index( - &'file self, - index: SectionIndex, - ) -> Result> { - let section = self.common.sections.section(index.0)?; + fn section_by_index(&self, index: SectionIndex) -> Result> { + let section = self.common.sections.section(index)?; Ok(CoffSection { file: self, index, @@ -127,25 +157,19 @@ where }) } - fn sections(&'file self) -> CoffSectionIterator<'data, 'file, R, Coff> { + fn sections(&self) -> CoffSectionIterator<'data, '_, R, Coff> { CoffSectionIterator { file: self, iter: self.common.sections.iter().enumerate(), } } - fn comdats(&'file self) -> CoffComdatIterator<'data, 'file, R, Coff> { - CoffComdatIterator { - file: self, - index: 0, - } + fn comdats(&self) -> CoffComdatIterator<'data, '_, R, Coff> { + CoffComdatIterator::new(self) } - fn symbol_by_index( - &'file self, - index: SymbolIndex, - ) -> Result> { - let symbol = self.common.symbols.symbol(index.0)?; + fn symbol_by_index(&self, index: SymbolIndex) -> Result> { + let symbol = self.common.symbols.symbol(index)?; Ok(CoffSymbol { file: &self.common, index, @@ -153,33 +177,26 @@ where }) } - fn symbols(&'file self) -> CoffSymbolIterator<'data, 'file, R, Coff> { - CoffSymbolIterator { - file: &self.common, - index: 0, - } + fn symbols(&self) -> CoffSymbolIterator<'data, '_, R, Coff> { + CoffSymbolIterator::new(&self.common) } #[inline] - fn symbol_table(&'file self) -> Option> { + fn symbol_table(&self) -> Option> { Some(CoffSymbolTable { file: &self.common }) } - fn dynamic_symbols(&'file self) -> CoffSymbolIterator<'data, 'file, R, Coff> { - CoffSymbolIterator { - file: &self.common, - // Hack: don't return any. - index: self.common.symbols.len(), - } + fn dynamic_symbols(&self) -> CoffSymbolIterator<'data, '_, R, Coff> { + CoffSymbolIterator::empty(&self.common) } #[inline] - fn dynamic_symbol_table(&'file self) -> Option> { + fn dynamic_symbol_table(&self) -> Option> { None } #[inline] - fn dynamic_relocations(&'file self) -> Option { + fn dynamic_relocations(&self) -> Option { None } @@ -215,7 +232,7 @@ where } } -/// Read the `class_id` field from an anon object header. +/// Read the `class_id` field from a [`pe::AnonObjectHeader`]. /// /// This can be used to determine the format of the header. pub fn anon_object_class_id<'data, R: ReadRef<'data>>(data: R) -> Result { @@ -225,13 +242,13 @@ pub fn anon_object_class_id<'data, R: ReadRef<'data>>(data: R) -> Result bool; diff --git a/third_party/rust/object/src/read/coff/import.rs b/third_party/rust/object/src/read/coff/import.rs index d635e7592ab0..91f9fd70726f 100644 --- a/third_party/rust/object/src/read/coff/import.rs +++ b/third_party/rust/object/src/read/coff/import.rs @@ -3,13 +3,19 @@ //! These are used by some Windows linkers as a more compact way to describe //! dynamically imported symbols. -use crate::read::{Architecture, Error, ReadError, ReadRef, Result}; -use crate::{pe, ByteString, Bytes, LittleEndian as LE}; +use crate::endian::LittleEndian as LE; +use crate::pe; +use crate::read::{ + Architecture, ByteString, Bytes, Error, ReadError, ReadRef, Result, SubArchitecture, +}; /// A Windows short form description of a symbol to import. /// /// Used in Windows import libraries to provide a mapping from /// a symbol name to a DLL export. This is not an object file. +/// +/// This is a file that starts with [`pe::ImportObjectHeader`], and corresponds +/// to [`crate::FileKind::CoffImport`]. #[derive(Debug, Clone)] pub struct ImportFile<'data> { header: &'data pe::ImportObjectHeader, @@ -64,13 +70,21 @@ impl<'data> ImportFile<'data> { pub fn architecture(&self) -> Architecture { match self.header.machine.get(LE) { pe::IMAGE_FILE_MACHINE_ARMNT => Architecture::Arm, - pe::IMAGE_FILE_MACHINE_ARM64 => Architecture::Aarch64, + pe::IMAGE_FILE_MACHINE_ARM64 | pe::IMAGE_FILE_MACHINE_ARM64EC => Architecture::Aarch64, pe::IMAGE_FILE_MACHINE_I386 => Architecture::I386, pe::IMAGE_FILE_MACHINE_AMD64 => Architecture::X86_64, _ => Architecture::Unknown, } } + /// Get the sub machine type, if available. + pub fn sub_architecture(&self) -> Option { + match self.header.machine.get(LE) { + pe::IMAGE_FILE_MACHINE_ARM64EC => Some(SubArchitecture::Arm64EC), + _ => None, + } + } + /// The public symbol name. pub fn symbol(&self) -> &'data [u8] { self.symbol.0 @@ -181,7 +195,7 @@ impl pe::ImportObjectHeader { } } -/// The data following `ImportObjectHeader`. +/// The data following [`pe::ImportObjectHeader`]. #[derive(Debug, Clone)] pub struct ImportObjectData<'data> { symbol: ByteString<'data>, diff --git a/third_party/rust/object/src/read/coff/mod.rs b/third_party/rust/object/src/read/coff/mod.rs index 26020d7974a4..de397da0a034 100644 --- a/third_party/rust/object/src/read/coff/mod.rs +++ b/third_party/rust/object/src/read/coff/mod.rs @@ -1,6 +1,51 @@ //! Support for reading Windows COFF files. //! -//! Provides `CoffFile` and related types which implement the `Object` trait. +//! Traits are used to abstract over the difference between COFF object files +//! and COFF bigobj files. The primary trait for this is [`CoffHeader`]. +//! +//! ## High level API +//! +//! [`CoffFile`] implements the [`Object`](crate::read::Object) trait for +//! COFF files. [`CoffFile`] is parameterised by [`CoffHeader`]. +//! The default parameter allows reading regular COFF object files, +//! while the type alias [`CoffBigFile`] allows reading COFF bigobj files. +//! +//! [`ImportFile`] allows reading COFF short imports that are used in import +//! libraries. Currently these are not integrated with the unified read API. +//! +//! ## Low level API +//! +//! The [`CoffHeader`] trait can be directly used to parse both COFF +//! object files (which start with [`pe::ImageFileHeader`]) and COFF bigobj +//! files (which start with [`pe::AnonObjectHeaderBigobj`]). +//! +//! ### Example for low level API +//! ```no_run +//! use object::pe; +//! use object::read::coff::{CoffHeader, ImageSymbol as _}; +//! use std::error::Error; +//! use std::fs; +//! +//! /// Reads a file and displays the name of each section and symbol. +//! fn main() -> Result<(), Box> { +//! # #[cfg(feature = "std")] { +//! let data = fs::read("path/to/binary")?; +//! let mut offset = 0; +//! let header = pe::ImageFileHeader::parse(&*data, &mut offset)?; +//! let sections = header.sections(&*data, offset)?; +//! let symbols = header.symbols(&*data)?; +//! for section in sections.iter() { +//! println!("{}", String::from_utf8_lossy(section.name(symbols.strings())?)); +//! } +//! for (_index, symbol) in symbols.iter() { +//! println!("{}", String::from_utf8_lossy(symbol.name(symbols.strings())?)); +//! } +//! # } +//! Ok(()) +//! } +//! ``` +#[cfg(doc)] +use crate::pe; mod file; pub use file::*; diff --git a/third_party/rust/object/src/read/coff/relocation.rs b/third_party/rust/object/src/read/coff/relocation.rs index 44d2c68d0605..41a1fbccf1f5 100644 --- a/third_party/rust/object/src/read/coff/relocation.rs +++ b/third_party/rust/object/src/read/coff/relocation.rs @@ -4,16 +4,17 @@ use core::slice; use crate::endian::LittleEndian as LE; use crate::pe; use crate::read::{ - ReadRef, Relocation, RelocationEncoding, RelocationKind, RelocationTarget, SymbolIndex, + ReadRef, Relocation, RelocationEncoding, RelocationFlags, RelocationKind, RelocationTarget, + SymbolIndex, }; use super::{CoffFile, CoffHeader}; -/// An iterator over the relocations in a `CoffBigSection`. +/// An iterator for the relocations in a [`CoffBigSection`](super::CoffBigSection). pub type CoffBigRelocationIterator<'data, 'file, R = &'data [u8]> = CoffRelocationIterator<'data, 'file, R, pe::AnonObjectHeaderBigobj>; -/// An iterator over the relocations in a `CoffSection`. +/// An iterator for the relocations in a [`CoffSection`](super::CoffSection). pub struct CoffRelocationIterator< 'data, 'file, @@ -31,25 +32,27 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> Iterator fn next(&mut self) -> Option { self.iter.next().map(|relocation| { + let typ = relocation.typ.get(LE); + let flags = RelocationFlags::Coff { typ }; let (kind, size, addend) = match self.file.header.machine() { - pe::IMAGE_FILE_MACHINE_ARMNT => match relocation.typ.get(LE) { + pe::IMAGE_FILE_MACHINE_ARMNT => match typ { pe::IMAGE_REL_ARM_ADDR32 => (RelocationKind::Absolute, 32, 0), pe::IMAGE_REL_ARM_ADDR32NB => (RelocationKind::ImageOffset, 32, 0), pe::IMAGE_REL_ARM_REL32 => (RelocationKind::Relative, 32, -4), pe::IMAGE_REL_ARM_SECTION => (RelocationKind::SectionIndex, 16, 0), pe::IMAGE_REL_ARM_SECREL => (RelocationKind::SectionOffset, 32, 0), - typ => (RelocationKind::Coff(typ), 0, 0), + _ => (RelocationKind::Unknown, 0, 0), }, - pe::IMAGE_FILE_MACHINE_ARM64 => match relocation.typ.get(LE) { + pe::IMAGE_FILE_MACHINE_ARM64 | pe::IMAGE_FILE_MACHINE_ARM64EC => match typ { pe::IMAGE_REL_ARM64_ADDR32 => (RelocationKind::Absolute, 32, 0), pe::IMAGE_REL_ARM64_ADDR32NB => (RelocationKind::ImageOffset, 32, 0), pe::IMAGE_REL_ARM64_SECREL => (RelocationKind::SectionOffset, 32, 0), pe::IMAGE_REL_ARM64_SECTION => (RelocationKind::SectionIndex, 16, 0), pe::IMAGE_REL_ARM64_ADDR64 => (RelocationKind::Absolute, 64, 0), pe::IMAGE_REL_ARM64_REL32 => (RelocationKind::Relative, 32, -4), - typ => (RelocationKind::Coff(typ), 0, 0), + _ => (RelocationKind::Unknown, 0, 0), }, - pe::IMAGE_FILE_MACHINE_I386 => match relocation.typ.get(LE) { + pe::IMAGE_FILE_MACHINE_I386 => match typ { pe::IMAGE_REL_I386_DIR16 => (RelocationKind::Absolute, 16, 0), pe::IMAGE_REL_I386_REL16 => (RelocationKind::Relative, 16, 0), pe::IMAGE_REL_I386_DIR32 => (RelocationKind::Absolute, 32, 0), @@ -58,9 +61,9 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> Iterator pe::IMAGE_REL_I386_SECREL => (RelocationKind::SectionOffset, 32, 0), pe::IMAGE_REL_I386_SECREL7 => (RelocationKind::SectionOffset, 7, 0), pe::IMAGE_REL_I386_REL32 => (RelocationKind::Relative, 32, -4), - typ => (RelocationKind::Coff(typ), 0, 0), + _ => (RelocationKind::Unknown, 0, 0), }, - pe::IMAGE_FILE_MACHINE_AMD64 => match relocation.typ.get(LE) { + pe::IMAGE_FILE_MACHINE_AMD64 => match typ { pe::IMAGE_REL_AMD64_ADDR64 => (RelocationKind::Absolute, 64, 0), pe::IMAGE_REL_AMD64_ADDR32 => (RelocationKind::Absolute, 32, 0), pe::IMAGE_REL_AMD64_ADDR32NB => (RelocationKind::ImageOffset, 32, 0), @@ -73,13 +76,11 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> Iterator pe::IMAGE_REL_AMD64_SECTION => (RelocationKind::SectionIndex, 16, 0), pe::IMAGE_REL_AMD64_SECREL => (RelocationKind::SectionOffset, 32, 0), pe::IMAGE_REL_AMD64_SECREL7 => (RelocationKind::SectionOffset, 7, 0), - typ => (RelocationKind::Coff(typ), 0, 0), + _ => (RelocationKind::Unknown, 0, 0), }, - _ => (RelocationKind::Coff(relocation.typ.get(LE)), 0, 0), + _ => (RelocationKind::Unknown, 0, 0), }; - let target = RelocationTarget::Symbol(SymbolIndex( - relocation.symbol_table_index.get(LE) as usize, - )); + let target = RelocationTarget::Symbol(relocation.symbol()); ( u64::from(relocation.virtual_address.get(LE)), Relocation { @@ -89,6 +90,7 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> Iterator target, addend, implicit_addend: true, + flags, }, ) }) @@ -102,3 +104,10 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> fmt::Debug f.debug_struct("CoffRelocationIterator").finish() } } + +impl pe::ImageRelocation { + /// Get the index of the symbol referenced by this relocation. + pub fn symbol(&self) -> SymbolIndex { + SymbolIndex(self.symbol_table_index.get(LE) as usize) + } +} diff --git a/third_party/rust/object/src/read/coff/section.rs b/third_party/rust/object/src/read/coff/section.rs index 75804034b8bf..280b5c432cee 100644 --- a/third_party/rust/object/src/read/coff/section.rs +++ b/third_party/rust/object/src/read/coff/section.rs @@ -6,12 +6,15 @@ use crate::pe; use crate::read::util::StringTable; use crate::read::{ self, CompressedData, CompressedFileRange, Error, ObjectSection, ObjectSegment, ReadError, - ReadRef, Result, SectionFlags, SectionIndex, SectionKind, SegmentFlags, + ReadRef, RelocationMap, Result, SectionFlags, SectionIndex, SectionKind, SegmentFlags, }; use super::{CoffFile, CoffHeader, CoffRelocationIterator}; /// The table of section headers in a COFF or PE file. +/// +/// Returned by [`CoffHeader::sections`] and +/// [`ImageNtHeaders::sections`](crate::read::pe::ImageNtHeaders::sections). #[derive(Debug, Default, Clone, Copy)] pub struct SectionTable<'data> { sections: &'data [pe::ImageSectionHeader], @@ -35,12 +38,20 @@ impl<'data> SectionTable<'data> { /// Iterate over the section headers. /// - /// Warning: sections indices start at 1. + /// Warning: section indices start at 1. #[inline] pub fn iter(&self) -> slice::Iter<'data, pe::ImageSectionHeader> { self.sections.iter() } + /// Iterate over the section headers and their indices. + pub fn enumerate(&self) -> impl Iterator { + self.sections + .iter() + .enumerate() + .map(|(i, section)| (SectionIndex(i + 1), section)) + } + /// Return true if the section table is empty. #[inline] pub fn is_empty(&self) -> bool { @@ -56,9 +67,9 @@ impl<'data> SectionTable<'data> { /// Return the section header at the given index. /// /// The index is 1-based. - pub fn section(&self, index: usize) -> read::Result<&'data pe::ImageSectionHeader> { + pub fn section(&self, index: SectionIndex) -> read::Result<&'data pe::ImageSectionHeader> { self.sections - .get(index.wrapping_sub(1)) + .get(index.0.wrapping_sub(1)) .read_error("Invalid COFF/PE section index") } @@ -71,12 +82,9 @@ impl<'data> SectionTable<'data> { &self, strings: StringTable<'data, R>, name: &[u8], - ) -> Option<(usize, &'data pe::ImageSectionHeader)> { - self.sections - .iter() - .enumerate() + ) -> Option<(SectionIndex, &'data pe::ImageSectionHeader)> { + self.enumerate() .find(|(_, section)| section.name(strings) == Ok(name)) - .map(|(index, section)| (index + 1, section)) } /// Compute the maximum file offset used by sections. @@ -104,11 +112,11 @@ impl<'data> SectionTable<'data> { } } -/// An iterator over the loadable sections of a `CoffBigFile`. +/// An iterator for the loadable sections in a [`CoffBigFile`](super::CoffBigFile). pub type CoffBigSegmentIterator<'data, 'file, R = &'data [u8]> = CoffSegmentIterator<'data, 'file, R, pe::AnonObjectHeaderBigobj>; -/// An iterator over the loadable sections of a `CoffFile`. +/// An iterator for the loadable sections in a [`CoffFile`]. #[derive(Debug)] pub struct CoffSegmentIterator< 'data, @@ -133,11 +141,15 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> Iterator } } -/// A loadable section of a `CoffBigFile`. +/// A loadable section in a [`CoffBigFile`](super::CoffBigFile). +/// +/// Most functionality is provided by the [`ObjectSegment`] trait implementation. pub type CoffBigSegment<'data, 'file, R = &'data [u8]> = CoffSegment<'data, 'file, R, pe::AnonObjectHeaderBigobj>; -/// A loadable section of a `CoffFile`. +/// A loadable section in a [`CoffFile`]. +/// +/// Most functionality is provided by the [`ObjectSegment`] trait implementation. #[derive(Debug)] pub struct CoffSegment< 'data, @@ -150,6 +162,16 @@ pub struct CoffSegment< } impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> CoffSegment<'data, 'file, R, Coff> { + /// Get the COFF file containing this segment. + pub fn coff_file(&self) -> &'file CoffFile<'data, R, Coff> { + self.file + } + + /// Get the raw COFF section header. + pub fn coff_section(&self) -> &'data pe::ImageSectionHeader { + self.section + } + fn bytes(&self) -> Result<&'data [u8]> { self.section .coff_data(self.file.data) @@ -222,11 +244,11 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectSegment<'data> } } -/// An iterator over the sections of a `CoffBigFile`. +/// An iterator for the sections in a [`CoffBigFile`](super::CoffBigFile). pub type CoffBigSectionIterator<'data, 'file, R = &'data [u8]> = CoffSectionIterator<'data, 'file, R, pe::AnonObjectHeaderBigobj>; -/// An iterator over the sections of a `CoffFile`. +/// An iterator for the sections in a [`CoffFile`]. #[derive(Debug)] pub struct CoffSectionIterator< 'data, @@ -252,11 +274,15 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> Iterator } } -/// A section of a `CoffBigFile`. +/// A section in a [`CoffBigFile`](super::CoffBigFile). +/// +/// Most functionality is provided by the [`ObjectSection`] trait implementation. pub type CoffBigSection<'data, 'file, R = &'data [u8]> = CoffSection<'data, 'file, R, pe::AnonObjectHeaderBigobj>; -/// A section of a `CoffFile`. +/// A section in a [`CoffFile`]. +/// +/// Most functionality is provided by the [`ObjectSection`] trait implementation. #[derive(Debug)] pub struct CoffSection< 'data, @@ -270,6 +296,21 @@ pub struct CoffSection< } impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> CoffSection<'data, 'file, R, Coff> { + /// Get the COFF file containing this section. + pub fn coff_file(&self) -> &'file CoffFile<'data, R, Coff> { + self.file + } + + /// Get the raw COFF section header. + pub fn coff_section(&self) -> &'data pe::ImageSectionHeader { + self.section + } + + /// Get the raw COFF relocations for this section. + pub fn coff_relocations(&self) -> Result<&'data [pe::ImageRelocation]> { + self.section.coff_relocations(self.file.data) + } + fn bytes(&self) -> Result<&'data [u8]> { self.section .coff_data(self.file.data) @@ -338,12 +379,12 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectSection<'data> } #[inline] - fn name_bytes(&self) -> Result<&[u8]> { + fn name_bytes(&self) -> Result<&'data [u8]> { self.section.name(self.file.common.symbols.strings()) } #[inline] - fn name(&self) -> Result<&str> { + fn name(&self) -> Result<&'data str> { let name = self.name_bytes()?; str::from_utf8(name) .ok() @@ -366,13 +407,17 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectSection<'data> } fn relocations(&self) -> CoffRelocationIterator<'data, 'file, R, Coff> { - let relocations = self.section.coff_relocations(self.file.data).unwrap_or(&[]); + let relocations = self.coff_relocations().unwrap_or(&[]); CoffRelocationIterator { file: self.file, iter: relocations.iter(), } } + fn relocation_map(&self) -> read::Result { + RelocationMap::new(self.file, self) + } + fn flags(&self) -> SectionFlags { SectionFlags::Coff { characteristics: self.section.characteristics.get(LE), @@ -528,7 +573,7 @@ impl pe::ImageSectionHeader { ) -> read::Result<&'data [pe::ImageRelocation]> { let mut pointer = self.pointer_to_relocations.get(LE).into(); let mut number: usize = self.number_of_relocations.get(LE).into(); - if number == core::u16::MAX.into() + if number == u16::MAX.into() && self.characteristics.get(LE) & pe::IMAGE_SCN_LNK_NRELOC_OVFL != 0 { // Extended relocations. Read first relocation (which contains extended count) & adjust diff --git a/third_party/rust/object/src/read/coff/symbol.rs b/third_party/rust/object/src/read/coff/symbol.rs index e95468d7ebc2..18867a65c39f 100644 --- a/third_party/rust/object/src/read/coff/symbol.rs +++ b/third_party/rust/object/src/read/coff/symbol.rs @@ -17,6 +17,9 @@ use crate::read::{ /// A table of symbol entries in a COFF or PE file. /// /// Also includes the string table used for the symbol names. +/// +/// Returned by [`CoffHeader::symbols`] and +/// [`ImageNtHeaders::symbols`](crate::read::pe::ImageNtHeaders::symbols). #[derive(Debug)] pub struct SymbolTable<'data, R = &'data [u8], Coff = pe::ImageFileHeader> where @@ -89,13 +92,13 @@ impl<'data, R: ReadRef<'data>, Coff: CoffHeader> SymbolTable<'data, R, Coff> { pub fn iter<'table>(&'table self) -> SymbolIterator<'data, 'table, R, Coff> { SymbolIterator { symbols: self, - index: 0, + index: SymbolIndex(0), } } /// Return the symbol table entry at the given index. #[inline] - pub fn symbol(&self, index: usize) -> Result<&'data Coff::ImageSymbol> { + pub fn symbol(&self, index: SymbolIndex) -> Result<&'data Coff::ImageSymbol> { self.get::(index, 0) } @@ -103,7 +106,7 @@ impl<'data, R: ReadRef<'data>, Coff: CoffHeader> SymbolTable<'data, R, Coff> { /// /// Note that the index is of the symbol, not the first auxiliary record. #[inline] - pub fn aux_function(&self, index: usize) -> Result<&'data pe::ImageAuxSymbolFunction> { + pub fn aux_function(&self, index: SymbolIndex) -> Result<&'data pe::ImageAuxSymbolFunction> { self.get::(index, 1) } @@ -111,15 +114,16 @@ impl<'data, R: ReadRef<'data>, Coff: CoffHeader> SymbolTable<'data, R, Coff> { /// /// Note that the index is of the symbol, not the first auxiliary record. #[inline] - pub fn aux_section(&self, index: usize) -> Result<&'data pe::ImageAuxSymbolSection> { + pub fn aux_section(&self, index: SymbolIndex) -> Result<&'data pe::ImageAuxSymbolSection> { self.get::(index, 1) } /// Return the auxiliary file name for the symbol table entry at the given index. /// /// Note that the index is of the symbol, not the first auxiliary record. - pub fn aux_file_name(&self, index: usize, aux_count: u8) -> Result<&'data [u8]> { + pub fn aux_file_name(&self, index: SymbolIndex, aux_count: u8) -> Result<&'data [u8]> { let entries = index + .0 .checked_add(1) .and_then(|x| Some(x..x.checked_add(aux_count.into())?)) .and_then(|x| self.symbols.get(x)) @@ -133,8 +137,9 @@ impl<'data, R: ReadRef<'data>, Coff: CoffHeader> SymbolTable<'data, R, Coff> { } /// Return the symbol table entry or auxiliary record at the given index and offset. - pub fn get(&self, index: usize, offset: usize) -> Result<&'data T> { + pub fn get(&self, index: SymbolIndex, offset: usize) -> Result<&'data T> { let bytes = index + .0 .checked_add(offset) .and_then(|x| self.symbols.get(x)) .read_error("Invalid COFF symbol index")?; @@ -171,27 +176,28 @@ where Coff: CoffHeader, { symbols: &'table SymbolTable<'data, R, Coff>, - index: usize, + index: SymbolIndex, } impl<'data, 'table, R: ReadRef<'data>, Coff: CoffHeader> Iterator for SymbolIterator<'data, 'table, R, Coff> { - type Item = (usize, &'data Coff::ImageSymbol); + type Item = (SymbolIndex, &'data Coff::ImageSymbol); fn next(&mut self) -> Option { let index = self.index; let symbol = self.symbols.symbol(index).ok()?; - self.index += 1 + symbol.number_of_aux_symbols() as usize; + self.index.0 += 1 + symbol.number_of_aux_symbols() as usize; Some((index, symbol)) } } -/// A symbol table of a `CoffBigFile`. +/// A symbol table in a [`CoffBigFile`](super::CoffBigFile). pub type CoffBigSymbolTable<'data, 'file, R = &'data [u8]> = CoffSymbolTable<'data, 'file, R, pe::AnonObjectHeaderBigobj>; -/// A symbol table of a `CoffFile`. +/// A symbol table in a [`CoffFile`](super::CoffFile) +/// or [`PeFile`](crate::read::pe::PeFile). #[derive(Debug, Clone, Copy)] pub struct CoffSymbolTable<'data, 'file, R = &'data [u8], Coff = pe::ImageFileHeader> where @@ -213,14 +219,11 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectSymbolTable<'data> type SymbolIterator = CoffSymbolIterator<'data, 'file, R, Coff>; fn symbols(&self) -> Self::SymbolIterator { - CoffSymbolIterator { - file: self.file, - index: 0, - } + CoffSymbolIterator::new(self.file) } fn symbol_by_index(&self, index: SymbolIndex) -> Result { - let symbol = self.file.symbols.symbol(index.0)?; + let symbol = self.file.symbols.symbol(index)?; Ok(CoffSymbol { file: self.file, index, @@ -229,18 +232,39 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectSymbolTable<'data> } } -/// An iterator over the symbols of a `CoffBigFile`. +/// An iterator for the symbols in a [`CoffBigFile`](super::CoffBigFile). pub type CoffBigSymbolIterator<'data, 'file, R = &'data [u8]> = CoffSymbolIterator<'data, 'file, R, pe::AnonObjectHeaderBigobj>; -/// An iterator over the symbols of a `CoffFile`. +/// An iterator for the symbols in a [`CoffFile`](super::CoffFile) +/// or [`PeFile`](crate::read::pe::PeFile). pub struct CoffSymbolIterator<'data, 'file, R = &'data [u8], Coff = pe::ImageFileHeader> where R: ReadRef<'data>, Coff: CoffHeader, { - pub(crate) file: &'file CoffCommon<'data, R, Coff>, - pub(crate) index: usize, + file: &'file CoffCommon<'data, R, Coff>, + index: SymbolIndex, +} + +impl<'data, 'file, R, Coff> CoffSymbolIterator<'data, 'file, R, Coff> +where + R: ReadRef<'data>, + Coff: CoffHeader, +{ + pub(crate) fn new(file: &'file CoffCommon<'data, R, Coff>) -> Self { + Self { + file, + index: SymbolIndex(0), + } + } + + pub(crate) fn empty(file: &'file CoffCommon<'data, R, Coff>) -> Self { + Self { + file, + index: SymbolIndex(file.symbols.len()), + } + } } impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> fmt::Debug @@ -259,20 +283,24 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> Iterator fn next(&mut self) -> Option { let index = self.index; let symbol = self.file.symbols.symbol(index).ok()?; - self.index += 1 + symbol.number_of_aux_symbols() as usize; + self.index.0 += 1 + symbol.number_of_aux_symbols() as usize; Some(CoffSymbol { file: self.file, - index: SymbolIndex(index), + index, symbol, }) } } -/// A symbol of a `CoffBigFile`. +/// A symbol in a [`CoffBigFile`](super::CoffBigFile). +/// +/// Most functionality is provided by the [`ObjectSymbol`] trait implementation. pub type CoffBigSymbol<'data, 'file, R = &'data [u8]> = CoffSymbol<'data, 'file, R, pe::AnonObjectHeaderBigobj>; -/// A symbol of a `CoffFile`. +/// A symbol in a [`CoffFile`](super::CoffFile) or [`PeFile`](crate::read::pe::PeFile). +/// +/// Most functionality is provided by the [`ObjectSymbol`] trait implementation. #[derive(Debug, Clone, Copy)] pub struct CoffSymbol<'data, 'file, R = &'data [u8], Coff = pe::ImageFileHeader> where @@ -287,9 +315,15 @@ where impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> CoffSymbol<'data, 'file, R, Coff> { #[inline] /// Get the raw `ImageSymbol` struct. + #[deprecated(note = "Use `coff_symbol` instead")] pub fn raw_symbol(&self) -> &'data Coff::ImageSymbol { self.symbol } + + /// Get the raw `ImageSymbol` struct. + pub fn coff_symbol(&self) -> &'data Coff::ImageSymbol { + self.symbol + } } impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> read::private::Sealed @@ -309,7 +343,7 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectSymbol<'data> if self.symbol.has_aux_file_name() { self.file .symbols - .aux_file_name(self.index.0, self.symbol.number_of_aux_symbols()) + .aux_file_name(self.index, self.symbol.number_of_aux_symbols()) } else { self.symbol.name(self.file.symbols.strings()) } @@ -323,21 +357,9 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectSymbol<'data> } fn address(&self) -> u64 { - // Only return an address for storage classes that we know use an address. - match self.symbol.storage_class() { - pe::IMAGE_SYM_CLASS_STATIC - | pe::IMAGE_SYM_CLASS_WEAK_EXTERNAL - | pe::IMAGE_SYM_CLASS_LABEL => {} - pe::IMAGE_SYM_CLASS_EXTERNAL => { - if self.symbol.section_number() == pe::IMAGE_SYM_UNDEFINED { - // Undefined or common data, neither of which have an address. - return 0; - } - } - _ => return 0, - } self.symbol .address(self.file.image_base, &self.file.sections) + .unwrap_or(None) .unwrap_or(0) } @@ -346,7 +368,7 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectSymbol<'data> pe::IMAGE_SYM_CLASS_STATIC => { // Section symbols may duplicate the size from the section table. if self.symbol.has_aux_section() { - if let Ok(aux) = self.file.symbols.aux_section(self.index.0) { + if let Ok(aux) = self.file.symbols.aux_section(self.index) { u64::from(aux.length.get(LE)) } else { 0 @@ -362,7 +384,7 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectSymbol<'data> u64::from(self.symbol.value()) } else if self.symbol.has_aux_function() { // Function symbols may have a size. - if let Ok(aux) = self.file.symbols.aux_function(self.index.0) { + if let Ok(aux) = self.file.symbols.aux_function(self.index) { u64::from(aux.total_size.get(LE)) } else { 0 @@ -384,7 +406,7 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectSymbol<'data> }; match self.symbol.storage_class() { pe::IMAGE_SYM_CLASS_STATIC => { - if self.symbol.value() == 0 && self.symbol.number_of_aux_symbols() > 0 { + if self.symbol.has_aux_section() { SymbolKind::Section } else { derived_kind @@ -401,12 +423,16 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectSymbol<'data> fn section(&self) -> SymbolSection { match self.symbol.section_number() { pe::IMAGE_SYM_UNDEFINED => { - if self.symbol.storage_class() == pe::IMAGE_SYM_CLASS_EXTERNAL - && self.symbol.value() == 0 - { + if self.symbol.storage_class() == pe::IMAGE_SYM_CLASS_EXTERNAL { + if self.symbol.value() == 0 { + SymbolSection::Undefined + } else { + SymbolSection::Common + } + } else if self.symbol.storage_class() == pe::IMAGE_SYM_CLASS_SECTION { SymbolSection::Undefined } else { - SymbolSection::Common + SymbolSection::Unknown } } pe::IMAGE_SYM_ABSOLUTE => SymbolSection::Absolute, @@ -472,7 +498,7 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectSymbol<'data> fn flags(&self) -> SymbolFlags { if self.symbol.has_aux_section() { - if let Ok(aux) = self.file.symbols.aux_section(self.index.0) { + if let Ok(aux) = self.file.symbols.aux_section(self.index) { let number = if Coff::is_type_bigobj() { u32::from(aux.number.get(LE)) | (u32::from(aux.high_number.get(LE)) << 16) } else { @@ -492,7 +518,7 @@ impl<'data, 'file, R: ReadRef<'data>, Coff: CoffHeader> ObjectSymbol<'data> } } -/// A trait for generic access to `ImageSymbol` and `ImageSymbolEx`. +/// A trait for generic access to [`pe::ImageSymbol`] and [`pe::ImageSymbolEx`]. #[allow(missing_docs)] pub trait ImageSymbol: Debug + Pod { fn raw_name(&self) -> &[u8; 8]; @@ -527,26 +553,43 @@ pub trait ImageSymbol: Debug + Pod { /// Return the symbol address. /// - /// This takes into account the image base and the section address. - fn address(&self, image_base: u64, sections: &SectionTable<'_>) -> Result { - let section_number = self.section_number() as usize; - let section = sections.section(section_number)?; + /// This takes into account the image base and the section address, + /// and only returns an address for symbols that have an address. + fn address(&self, image_base: u64, sections: &SectionTable<'_>) -> Result> { + // Only return an address for storage classes that we know use an address. + match self.storage_class() { + pe::IMAGE_SYM_CLASS_STATIC + | pe::IMAGE_SYM_CLASS_WEAK_EXTERNAL + | pe::IMAGE_SYM_CLASS_LABEL + | pe::IMAGE_SYM_CLASS_EXTERNAL => {} + _ => return Ok(None), + } + let Some(section_index) = self.section() else { + return Ok(None); + }; + let section = sections.section(section_index)?; let virtual_address = u64::from(section.virtual_address.get(LE)); let value = u64::from(self.value()); - Ok(image_base + virtual_address + value) + Ok(Some(image_base + virtual_address + value)) + } + + /// Return the section index for the symbol. + fn section(&self) -> Option { + let section_number = self.section_number(); + if section_number > 0 { + Some(SectionIndex(section_number as usize)) + } else { + None + } } /// Return true if the symbol is a definition of a function or data object. fn is_definition(&self) -> bool { - let section_number = self.section_number(); - if section_number == pe::IMAGE_SYM_UNDEFINED { + if self.section_number() <= 0 { return false; } match self.storage_class() { - pe::IMAGE_SYM_CLASS_STATIC => { - // Exclude section symbols. - !(self.value() == 0 && self.number_of_aux_symbols() > 0) - } + pe::IMAGE_SYM_CLASS_STATIC => !self.has_aux_section(), pe::IMAGE_SYM_CLASS_EXTERNAL | pe::IMAGE_SYM_CLASS_WEAK_EXTERNAL => true, _ => false, } @@ -566,7 +609,7 @@ pub trait ImageSymbol: Debug + Pod { fn has_aux_section(&self) -> bool { self.number_of_aux_symbols() > 0 && self.storage_class() == pe::IMAGE_SYM_CLASS_STATIC - && self.value() == 0 + && self.typ() == 0 } fn base_type(&self) -> u16 { diff --git a/third_party/rust/object/src/read/elf/attributes.rs b/third_party/rust/object/src/read/elf/attributes.rs index 6ec535d7242b..ddddfb256b99 100644 --- a/third_party/rust/object/src/read/elf/attributes.rs +++ b/third_party/rust/object/src/read/elf/attributes.rs @@ -10,7 +10,10 @@ use super::FileHeader; /// /// This may be a GNU attributes section, or an architecture specific attributes section. /// -/// An attributes section contains a series of subsections. +/// An attributes section contains a series of [`AttributesSubsection`]. +/// +/// Returned by [`SectionHeader::attributes`](super::SectionHeader::attributes) +/// and [`SectionHeader::gnu_attributes`](super::SectionHeader::gnu_attributes). #[derive(Debug, Clone)] pub struct AttributesSection<'data, Elf: FileHeader> { endian: Elf::Endian, @@ -24,9 +27,8 @@ impl<'data, Elf: FileHeader> AttributesSection<'data, Elf> { let mut data = Bytes(data); // Skip the version field that is one byte long. - let version = *data - .read::() - .read_error("Invalid ELF attributes section offset or size")?; + // If the section is empty then the version doesn't matter. + let version = data.read::().cloned().unwrap_or(b'A'); Ok(AttributesSection { endian, @@ -54,7 +56,7 @@ impl<'data, Elf: FileHeader> AttributesSection<'data, Elf> { } } -/// An iterator over the subsections in an ELF attributes section. +/// An iterator for the subsections in an [`AttributesSection`]. #[derive(Debug, Clone)] pub struct AttributesSubsectionIterator<'data, Elf: FileHeader> { endian: Elf::Endian, @@ -105,9 +107,10 @@ impl<'data, Elf: FileHeader> AttributesSubsectionIterator<'data, Elf> { } } -/// A subsection in an ELF attributes section. +/// A subsection in an [`AttributesSection`]. /// -/// A subsection is identified by a vendor name. It contains a series of sub-subsections. +/// A subsection is identified by a vendor name. It contains a series of +/// [`AttributesSubsubsection`]. #[derive(Debug, Clone)] pub struct AttributesSubsection<'data, Elf: FileHeader> { endian: Elf::Endian, @@ -136,7 +139,7 @@ impl<'data, Elf: FileHeader> AttributesSubsection<'data, Elf> { } } -/// An iterator over the sub-subsections in an ELF attributes section. +/// An iterator for the sub-subsections in an [`AttributesSubsection`]. #[derive(Debug, Clone)] pub struct AttributesSubsubsectionIterator<'data, Elf: FileHeader> { endian: Elf::Endian, @@ -200,7 +203,7 @@ impl<'data, Elf: FileHeader> AttributesSubsubsectionIterator<'data, Elf> { } } -/// A sub-subsection in an ELF attributes section. +/// A sub-subsection in an [`AttributesSubsection`]. /// /// A sub-subsection is identified by a tag. It contains an optional series of indices, /// followed by a series of attributes. @@ -248,7 +251,7 @@ impl<'data> AttributesSubsubsection<'data> { } } -/// An iterator over the indices in a sub-subsection in an ELF attributes section. +/// An iterator over the indices in an [`AttributesSubsubsection`]. #[derive(Debug, Clone)] pub struct AttributeIndexIterator<'data> { data: Bytes<'data>, @@ -271,7 +274,7 @@ impl<'data> AttributeIndexIterator<'data> { } } -/// A parser for the attributes in a sub-subsection in an ELF attributes section. +/// A parser for the attributes in an [`AttributesSubsubsection`]. /// /// The parser relies on the caller to know the format of the data for each attribute tag. #[derive(Debug, Clone)] diff --git a/third_party/rust/object/src/read/elf/comdat.rs b/third_party/rust/object/src/read/elf/comdat.rs index 1a2f2f44a90a..cfeb04fc2934 100644 --- a/third_party/rust/object/src/read/elf/comdat.rs +++ b/third_party/rust/object/src/read/elf/comdat.rs @@ -7,22 +7,36 @@ use crate::read::{self, ComdatKind, ObjectComdat, ReadError, ReadRef, SectionInd use super::{ElfFile, FileHeader, SectionHeader, Sym}; -/// An iterator over the COMDAT section groups of an `ElfFile32`. +/// An iterator for the COMDAT section groups in an [`ElfFile32`](super::ElfFile32). pub type ElfComdatIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfComdatIterator<'data, 'file, elf::FileHeader32, R>; -/// An iterator over the COMDAT section groups of an `ElfFile64`. +/// An iterator for the COMDAT section groups in an [`ElfFile64`](super::ElfFile64). pub type ElfComdatIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfComdatIterator<'data, 'file, elf::FileHeader64, R>; -/// An iterator over the COMDAT section groups of an `ElfFile`. +/// An iterator for the COMDAT section groups in an [`ElfFile`]. #[derive(Debug)] pub struct ElfComdatIterator<'data, 'file, Elf, R = &'data [u8]> where Elf: FileHeader, R: ReadRef<'data>, { - pub(super) file: &'file ElfFile<'data, Elf, R>, - pub(super) iter: iter::Enumerate>, + file: &'file ElfFile<'data, Elf, R>, + iter: iter::Enumerate>, +} + +impl<'data, 'file, Elf, R> ElfComdatIterator<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + pub(super) fn new( + file: &'file ElfFile<'data, Elf, R>, + ) -> ElfComdatIterator<'data, 'file, Elf, R> { + let mut iter = file.sections.iter().enumerate(); + iter.next(); // Skip null section. + ElfComdatIterator { file, iter } + } } impl<'data, 'file, Elf, R> Iterator for ElfComdatIterator<'data, 'file, Elf, R> @@ -42,14 +56,16 @@ where } } -/// A COMDAT section group of an `ElfFile32`. +/// A COMDAT section group in an [`ElfFile32`](super::ElfFile32). pub type ElfComdat32<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfComdat<'data, 'file, elf::FileHeader32, R>; -/// A COMDAT section group of an `ElfFile64`. +/// A COMDAT section group in an [`ElfFile64`](super::ElfFile64). pub type ElfComdat64<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfComdat<'data, 'file, elf::FileHeader64, R>; -/// A COMDAT section group of an `ElfFile`. +/// A COMDAT section group in an [`ElfFile`]. +/// +/// Most functionality is provided by the [`ObjectComdat`] trait implementation. #[derive(Debug)] pub struct ElfComdat<'data, 'file, Elf, R = &'data [u8]> where @@ -80,6 +96,16 @@ where sections, }) } + + /// Get the ELF file containing this COMDAT section group. + pub fn elf_file(&self) -> &'file ElfFile<'data, Elf, R> { + self.file + } + + /// Get the raw ELF section header for the COMDAT section group. + pub fn elf_section_header(&self) -> &'data Elf::SectionHeader { + self.section + } } impl<'data, 'file, Elf, R> read::private::Sealed for ElfComdat<'data, 'file, Elf, R> @@ -106,14 +132,14 @@ where SymbolIndex(self.section.sh_info(self.file.endian) as usize) } - fn name_bytes(&self) -> read::Result<&[u8]> { + fn name_bytes(&self) -> read::Result<&'data [u8]> { // FIXME: check sh_link - let index = self.section.sh_info(self.file.endian) as usize; + let index = self.symbol(); let symbol = self.file.symbols.symbol(index)?; symbol.name(self.file.endian, self.file.symbols.strings()) } - fn name(&self) -> read::Result<&str> { + fn name(&self) -> read::Result<&'data str> { let name = self.name_bytes()?; str::from_utf8(name) .ok() @@ -128,14 +154,14 @@ where } } -/// An iterator over the sections in a COMDAT section group of an `ElfFile32`. +/// An iterator for the sections in a COMDAT section group in an [`ElfFile32`](super::ElfFile32). pub type ElfComdatSectionIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfComdatSectionIterator<'data, 'file, elf::FileHeader32, R>; -/// An iterator over the sections in a COMDAT section group of an `ElfFile64`. +/// An iterator for the sections in a COMDAT section group in an [`ElfFile64`](super::ElfFile64). pub type ElfComdatSectionIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfComdatSectionIterator<'data, 'file, elf::FileHeader64, R>; -/// An iterator over the sections in a COMDAT section group of an `ElfFile`. +/// An iterator for the sections in a COMDAT section group in an [`ElfFile`]. #[derive(Debug)] pub struct ElfComdatSectionIterator<'data, 'file, Elf, R = &'data [u8]> where diff --git a/third_party/rust/object/src/read/elf/compression.rs b/third_party/rust/object/src/read/elf/compression.rs index 7242dd39c893..de2533f2deaa 100644 --- a/third_party/rust/object/src/read/elf/compression.rs +++ b/third_party/rust/object/src/read/elf/compression.rs @@ -4,7 +4,7 @@ use crate::elf; use crate::endian; use crate::pod::Pod; -/// A trait for generic access to `CompressionHeader32` and `CompressionHeader64`. +/// A trait for generic access to [`elf::CompressionHeader32`] and [`elf::CompressionHeader64`]. #[allow(missing_docs)] pub trait CompressionHeader: Debug + Pod { type Word: Into; diff --git a/third_party/rust/object/src/read/elf/dynamic.rs b/third_party/rust/object/src/read/elf/dynamic.rs index 5fe15b560da9..1661434aca59 100644 --- a/third_party/rust/object/src/read/elf/dynamic.rs +++ b/third_party/rust/object/src/read/elf/dynamic.rs @@ -6,7 +6,7 @@ use crate::endian; use crate::pod::Pod; use crate::read::{ReadError, Result, StringTable}; -/// A trait for generic access to `Dyn32` and `Dyn64`. +/// A trait for generic access to [`elf::Dyn32`] and [`elf::Dyn64`]. #[allow(missing_docs)] pub trait Dyn: Debug + Pod { type Word: Into; diff --git a/third_party/rust/object/src/read/elf/file.rs b/third_party/rust/object/src/read/elf/file.rs index 67be37e21fa6..eb74c284ed22 100644 --- a/third_party/rust/object/src/read/elf/file.rs +++ b/third_party/rust/object/src/read/elf/file.rs @@ -3,11 +3,13 @@ use core::convert::TryInto; use core::fmt::Debug; use core::mem; +use crate::elf; +use crate::endian::{self, Endian, Endianness, U32}; +use crate::pod::Pod; use crate::read::{ self, util, Architecture, ByteString, Bytes, Error, Export, FileFlags, Import, Object, ObjectKind, ReadError, ReadRef, SectionIndex, StringTable, SymbolIndex, }; -use crate::{elf, endian, Endian, Endianness, Pod, U32}; use super::{ CompressionHeader, Dyn, ElfComdat, ElfComdatIterator, ElfDynamicRelocationIterator, ElfSection, @@ -17,15 +19,21 @@ use super::{ }; /// A 32-bit ELF object file. +/// +/// This is a file that starts with [`elf::FileHeader32`], and corresponds +/// to [`crate::FileKind::Elf32`]. pub type ElfFile32<'data, Endian = Endianness, R = &'data [u8]> = ElfFile<'data, elf::FileHeader32, R>; /// A 64-bit ELF object file. +/// +/// This is a file that starts with [`elf::FileHeader64`], and corresponds +/// to [`crate::FileKind::Elf64`]. pub type ElfFile64<'data, Endian = Endianness, R = &'data [u8]> = ElfFile<'data, elf::FileHeader64, R>; /// A partially parsed ELF file. /// -/// Most of the functionality of this type is provided by the `Object` trait implementation. +/// Most functionality is provided by the [`Object`] trait implementation. #[derive(Debug)] pub struct ElfFile<'data, Elf, R = &'data [u8]> where @@ -82,15 +90,55 @@ where } /// Returns the raw ELF file header. + #[deprecated(note = "Use `elf_header` instead")] pub fn raw_header(&self) -> &'data Elf { self.header } /// Returns the raw ELF segments. + #[deprecated(note = "Use `elf_program_headers` instead")] pub fn raw_segments(&self) -> &'data [Elf::ProgramHeader] { self.segments } + /// Get the raw ELF file header. + pub fn elf_header(&self) -> &'data Elf { + self.header + } + + /// Get the raw ELF program headers. + /// + /// Returns an empty slice if the file has no program headers. + pub fn elf_program_headers(&self) -> &'data [Elf::ProgramHeader] { + self.segments + } + + /// Get the ELF section table. + /// + /// Returns an empty section table if the file has no section headers. + pub fn elf_section_table(&self) -> &SectionTable<'data, Elf, R> { + &self.sections + } + + /// Get the ELF symbol table. + /// + /// Returns an empty symbol table if the file has no symbol table. + pub fn elf_symbol_table(&self) -> &SymbolTable<'data, Elf, R> { + &self.symbols + } + + /// Get the ELF dynamic symbol table. + /// + /// Returns an empty symbol table if the file has no dynamic symbol table. + pub fn elf_dynamic_symbol_table(&self) -> &SymbolTable<'data, Elf, R> { + &self.dynamic_symbols + } + + /// Get a mapping for linked relocation sections. + pub fn elf_relocation_sections(&self) -> &RelocationSections { + &self.relocations + } + fn raw_section_by_name<'file>( &'file self, section_name: &[u8], @@ -99,7 +147,7 @@ where .section_by_name(self.endian, section_name) .map(|(index, section)| ElfSection { file: self, - index: SectionIndex(index), + index, section, }) } @@ -134,22 +182,21 @@ where { } -impl<'data, 'file, Elf, R> Object<'data, 'file> for ElfFile<'data, Elf, R> +impl<'data, Elf, R> Object<'data> for ElfFile<'data, Elf, R> where - 'data: 'file, Elf: FileHeader, - R: 'file + ReadRef<'data>, + R: ReadRef<'data>, { - type Segment = ElfSegment<'data, 'file, Elf, R>; - type SegmentIterator = ElfSegmentIterator<'data, 'file, Elf, R>; - type Section = ElfSection<'data, 'file, Elf, R>; - type SectionIterator = ElfSectionIterator<'data, 'file, Elf, R>; - type Comdat = ElfComdat<'data, 'file, Elf, R>; - type ComdatIterator = ElfComdatIterator<'data, 'file, Elf, R>; - type Symbol = ElfSymbol<'data, 'file, Elf, R>; - type SymbolIterator = ElfSymbolIterator<'data, 'file, Elf, R>; - type SymbolTable = ElfSymbolTable<'data, 'file, Elf, R>; - type DynamicRelocationIterator = ElfDynamicRelocationIterator<'data, 'file, Elf, R>; + type Segment<'file> = ElfSegment<'data, 'file, Elf, R> where Self: 'file, 'data: 'file; + type SegmentIterator<'file> = ElfSegmentIterator<'data, 'file, Elf, R> where Self: 'file, 'data: 'file; + type Section<'file> = ElfSection<'data, 'file, Elf, R> where Self: 'file, 'data: 'file; + type SectionIterator<'file> = ElfSectionIterator<'data, 'file, Elf, R> where Self: 'file, 'data: 'file; + type Comdat<'file> = ElfComdat<'data, 'file, Elf, R> where Self: 'file, 'data: 'file; + type ComdatIterator<'file> = ElfComdatIterator<'data, 'file, Elf, R> where Self: 'file, 'data: 'file; + type Symbol<'file> = ElfSymbol<'data, 'file, Elf, R> where Self: 'file, 'data: 'file; + type SymbolIterator<'file> = ElfSymbolIterator<'data, 'file, Elf, R> where Self: 'file, 'data: 'file; + type SymbolTable<'file> = ElfSymbolTable<'data, 'file, Elf, R> where Self: 'file, 'data: 'file; + type DynamicRelocationIterator<'file> = ElfDynamicRelocationIterator<'data, 'file, Elf, R> where Self: 'file, 'data: 'file; fn architecture(&self) -> Architecture { match ( @@ -178,6 +225,9 @@ where // We only support the 64-bit variant s390x here. (elf::EM_S390, true) => Architecture::S390x, (elf::EM_SBF, _) => Architecture::Sbf, + (elf::EM_SHARC, false) => Architecture::Sharc, + (elf::EM_SPARC, false) => Architecture::Sparc, + (elf::EM_SPARC32PLUS, false) => Architecture::Sparc32Plus, (elf::EM_SPARCV9, true) => Architecture::Sparc64, (elf::EM_XTENSA, false) => Architecture::Xtensa, _ => Architecture::Unknown, @@ -205,14 +255,14 @@ where } } - fn segments(&'file self) -> ElfSegmentIterator<'data, 'file, Elf, R> { + fn segments(&self) -> ElfSegmentIterator<'data, '_, Elf, R> { ElfSegmentIterator { file: self, iter: self.segments.iter(), } } - fn section_by_name_bytes( + fn section_by_name_bytes<'file>( &'file self, section_name: &[u8], ) -> Option> { @@ -220,10 +270,7 @@ where .or_else(|| self.zdebug_section_by_name(section_name)) } - fn section_by_index( - &'file self, - index: SectionIndex, - ) -> read::Result> { + fn section_by_index(&self, index: SectionIndex) -> read::Result> { let section = self.sections.section(index)?; Ok(ElfSection { file: self, @@ -232,25 +279,16 @@ where }) } - fn sections(&'file self) -> ElfSectionIterator<'data, 'file, Elf, R> { - ElfSectionIterator { - file: self, - iter: self.sections.iter().enumerate(), - } + fn sections(&self) -> ElfSectionIterator<'data, '_, Elf, R> { + ElfSectionIterator::new(self) } - fn comdats(&'file self) -> ElfComdatIterator<'data, 'file, Elf, R> { - ElfComdatIterator { - file: self, - iter: self.sections.iter().enumerate(), - } + fn comdats(&self) -> ElfComdatIterator<'data, '_, Elf, R> { + ElfComdatIterator::new(self) } - fn symbol_by_index( - &'file self, - index: SymbolIndex, - ) -> read::Result> { - let symbol = self.symbols.symbol(index.0)?; + fn symbol_by_index(&self, index: SymbolIndex) -> read::Result> { + let symbol = self.symbols.symbol(index)?; Ok(ElfSymbol { endian: self.endian, symbols: &self.symbols, @@ -259,15 +297,11 @@ where }) } - fn symbols(&'file self) -> ElfSymbolIterator<'data, 'file, Elf, R> { - ElfSymbolIterator { - endian: self.endian, - symbols: &self.symbols, - index: 0, - } + fn symbols(&self) -> ElfSymbolIterator<'data, '_, Elf, R> { + ElfSymbolIterator::new(self.endian, &self.symbols) } - fn symbol_table(&'file self) -> Option> { + fn symbol_table(&self) -> Option> { if self.symbols.is_empty() { return None; } @@ -277,15 +311,11 @@ where }) } - fn dynamic_symbols(&'file self) -> ElfSymbolIterator<'data, 'file, Elf, R> { - ElfSymbolIterator { - endian: self.endian, - symbols: &self.dynamic_symbols, - index: 0, - } + fn dynamic_symbols(&self) -> ElfSymbolIterator<'data, '_, Elf, R> { + ElfSymbolIterator::new(self.endian, &self.dynamic_symbols) } - fn dynamic_symbol_table(&'file self) -> Option> { + fn dynamic_symbol_table(&self) -> Option> { if self.dynamic_symbols.is_empty() { return None; } @@ -295,7 +325,7 @@ where }) } - fn dynamic_relocations( + fn dynamic_relocations<'file>( &'file self, ) -> Option> { Some(ElfDynamicRelocationIterator { @@ -305,17 +335,24 @@ where }) } - /// Get the imported symbols. fn imports(&self) -> read::Result>> { + let versions = self.sections.versions(self.endian, self.data)?; + let mut imports = Vec::new(); - for symbol in self.dynamic_symbols.iter() { + for (index, symbol) in self.dynamic_symbols.enumerate() { if symbol.is_undefined(self.endian) { let name = symbol.name(self.endian, self.dynamic_symbols.strings())?; if !name.is_empty() { - // TODO: use symbol versioning to determine library + let library = if let Some(svt) = versions.as_ref() { + let vi = svt.version_index(self.endian, index); + svt.version(vi)?.and_then(|v| v.file()) + } else { + None + } + .unwrap_or(&[]); imports.push(Import { name: ByteString(name), - library: ByteString(&[]), + library: ByteString(library), }); } } @@ -323,7 +360,6 @@ where Ok(imports) } - /// Get the exported symbols. fn exports(&self) -> read::Result>> { let mut exports = Vec::new(); for symbol in self.dynamic_symbols.iter() { @@ -436,7 +472,7 @@ where } } -/// A trait for generic access to `FileHeader32` and `FileHeader64`. +/// A trait for generic access to [`elf::FileHeader32`] and [`elf::FileHeader64`]. #[allow(missing_docs)] pub trait FileHeader: Debug + Pod { // Ideally this would be a `u64: From`, but can't express that. @@ -461,7 +497,7 @@ pub trait FileHeader: Debug + Pod { /// /// This is a property of the type, not a value in the header data. /// - /// This is the same as `is_type_64`, but is non-dispatchable. + /// This is the same as [`Self::is_type_64`], but is non-dispatchable. fn is_type_64_sized() -> bool where Self: Sized; @@ -676,6 +712,18 @@ pub trait FileHeader: Debug + Pod { .read_error("Invalid ELF section header offset/size/alignment") } + /// Get the section index of the section header string table. + /// + /// Returns `Err` for invalid values (including if the index is 0). + fn section_strings_index<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result { + self.shstrndx(endian, data) + .map(|index| SectionIndex(index as usize)) + } + /// Return the string table for the section headers. fn section_strings<'data, R: ReadRef<'data>>( &self, @@ -686,8 +734,8 @@ pub trait FileHeader: Debug + Pod { if sections.is_empty() { return Ok(StringTable::default()); } - let index = self.shstrndx(endian, data)? as usize; - let shstrtab = sections.get(index).read_error("Invalid ELF e_shstrndx")?; + let index = self.section_strings_index(endian, data)?; + let shstrtab = sections.get(index.0).read_error("Invalid ELF e_shstrndx")?; let strings = if let Some((shstrtab_offset, shstrtab_size)) = shstrtab.file_range(endian) { let shstrtab_end = shstrtab_offset .checked_add(shstrtab_size) diff --git a/third_party/rust/object/src/read/elf/hash.rs b/third_party/rust/object/src/read/elf/hash.rs index aadbb9208ace..bbc7ae93ab03 100644 --- a/third_party/rust/object/src/read/elf/hash.rs +++ b/third_party/rust/object/src/read/elf/hash.rs @@ -1,12 +1,14 @@ use core::mem; use crate::elf; -use crate::read::{ReadError, ReadRef, Result}; -use crate::{U32, U64}; +use crate::endian::{U32, U64}; +use crate::read::{ReadError, ReadRef, Result, SymbolIndex}; use super::{FileHeader, Sym, SymbolTable, Version, VersionTable}; /// A SysV symbol hash table in an ELF file. +/// +/// Returned by [`SectionHeader::hash`](super::SectionHeader::hash). #[derive(Debug)] pub struct HashTable<'data, Elf: FileHeader> { buckets: &'data [U32], @@ -16,8 +18,8 @@ pub struct HashTable<'data, Elf: FileHeader> { impl<'data, Elf: FileHeader> HashTable<'data, Elf> { /// Parse a SysV hash table. /// - /// `data` should be from a `SHT_HASH` section, or from a - /// segment pointed to via the `DT_HASH` entry. + /// `data` should be from an [`elf::SHT_HASH`] section, or from a + /// segment pointed to via the [`elf::DT_HASH`] entry. /// /// The header is read at offset 0 in the given `data`. pub fn parse(endian: Elf::Endian, data: &'data [u8]) -> Result { @@ -39,6 +41,14 @@ impl<'data, Elf: FileHeader> HashTable<'data, Elf> { self.chains.len() as u32 } + fn bucket(&self, endian: Elf::Endian, hash: u32) -> SymbolIndex { + SymbolIndex(self.buckets[(hash as usize) % self.buckets.len()].get(endian) as usize) + } + + fn chain(&self, endian: Elf::Endian, index: SymbolIndex) -> SymbolIndex { + SymbolIndex(self.chains[index.0].get(endian) as usize) + } + /// Use the hash table to find the symbol table entry with the given name, hash and version. pub fn find>( &self, @@ -48,13 +58,13 @@ impl<'data, Elf: FileHeader> HashTable<'data, Elf> { version: Option<&Version<'_>>, symbols: &SymbolTable<'data, Elf, R>, versions: &VersionTable<'data, Elf>, - ) -> Option<(usize, &'data Elf::Sym)> { + ) -> Option<(SymbolIndex, &'data Elf::Sym)> { // Get the chain start from the bucket for this hash. - let mut index = self.buckets[(hash as usize) % self.buckets.len()].get(endian) as usize; + let mut index = self.bucket(endian, hash); // Avoid infinite loop. let mut i = 0; let strings = symbols.strings(); - while index != 0 && i < self.chains.len() { + while index != SymbolIndex(0) && i < self.chains.len() { if let Ok(symbol) = symbols.symbol(index) { if symbol.name(endian, strings) == Ok(name) && versions.matches(endian, index, version) @@ -62,7 +72,7 @@ impl<'data, Elf: FileHeader> HashTable<'data, Elf> { return Some((index, symbol)); } } - index = self.chains.get(index)?.get(endian) as usize; + index = self.chain(endian, index); i += 1; } None @@ -70,6 +80,8 @@ impl<'data, Elf: FileHeader> HashTable<'data, Elf> { } /// A GNU symbol hash table in an ELF file. +/// +/// Returned by [`SectionHeader::gnu_hash`](super::SectionHeader::gnu_hash). #[derive(Debug)] pub struct GnuHashTable<'data, Elf: FileHeader> { symbol_base: u32, @@ -82,15 +94,15 @@ pub struct GnuHashTable<'data, Elf: FileHeader> { impl<'data, Elf: FileHeader> GnuHashTable<'data, Elf> { /// Parse a GNU hash table. /// - /// `data` should be from a `SHT_GNU_HASH` section, or from a - /// segment pointed to via the `DT_GNU_HASH` entry. + /// `data` should be from an [`elf::SHT_GNU_HASH`] section, or from a + /// segment pointed to via the [`elf::DT_GNU_HASH`] entry. /// /// The header is read at offset 0 in the given `data`. /// /// The header does not contain a length field, and so all of `data` /// will be used as the hash table values. It does not matter if this /// is longer than needed, and this will often the case when accessing - /// the hash table via the `DT_GNU_HASH` entry. + /// the hash table via the [`elf::DT_GNU_HASH`] entry. pub fn parse(endian: Elf::Endian, data: &'data [u8]) -> Result { let mut offset = 0; let header = data @@ -154,6 +166,10 @@ impl<'data, Elf: FileHeader> GnuHashTable<'data, Elf> { None } + fn bucket(&self, endian: Elf::Endian, hash: u32) -> SymbolIndex { + SymbolIndex(self.buckets[(hash as usize) % self.buckets.len()].get(endian) as usize) + } + /// Use the hash table to find the symbol table entry with the given name, hash, and version. pub fn find>( &self, @@ -163,7 +179,7 @@ impl<'data, Elf: FileHeader> GnuHashTable<'data, Elf> { version: Option<&Version<'_>>, symbols: &SymbolTable<'data, Elf, R>, versions: &VersionTable<'data, Elf>, - ) -> Option<(usize, &'data Elf::Sym)> { + ) -> Option<(SymbolIndex, &'data Elf::Sym)> { let word_bits = mem::size_of::() as u32 * 8; // Test against bloom filter. @@ -190,17 +206,17 @@ impl<'data, Elf: FileHeader> GnuHashTable<'data, Elf> { } // Get the chain start from the bucket for this hash. - let mut index = self.buckets[(hash as usize) % self.buckets.len()].get(endian) as usize; - if index == 0 { + let mut index = self.bucket(endian, hash); + if index == SymbolIndex(0) { return None; } // Test symbols in the chain. let strings = symbols.strings(); - let symbols = symbols.symbols().get(index..)?; + let symbols = symbols.symbols().get(index.0..)?; let values = self .values - .get(index.checked_sub(self.symbol_base as usize)?..)?; + .get(index.0.checked_sub(self.symbol_base as usize)?..)?; for (symbol, value) in symbols.iter().zip(values.iter()) { let value = value.get(endian); if value | 1 == hash | 1 { @@ -213,7 +229,7 @@ impl<'data, Elf: FileHeader> GnuHashTable<'data, Elf> { if value & 1 != 0 { break; } - index += 1; + index.0 += 1; } None } diff --git a/third_party/rust/object/src/read/elf/mod.rs b/third_party/rust/object/src/read/elf/mod.rs index 07db6cd66006..66931bdd5479 100644 --- a/third_party/rust/object/src/read/elf/mod.rs +++ b/third_party/rust/object/src/read/elf/mod.rs @@ -1,9 +1,45 @@ //! Support for reading ELF files. //! -//! Defines traits to abstract over the difference between ELF32/ELF64, -//! and implements read functionality in terms of these traits. +//! Traits are used to abstract over the difference between 32-bit and 64-bit ELF. +//! The primary trait for this is [`FileHeader`]. //! -//! Also provides `ElfFile` and related types which implement the `Object` trait. +//! ## High level API +//! +//! [`ElfFile`] implements the [`Object`](crate::read::Object) trait for ELF files. +//! [`ElfFile`] is parameterised by [`FileHeader`] to allow reading both 32-bit and +//! 64-bit ELF. There are type aliases for these parameters ([`ElfFile32`] and +//! [`ElfFile64`]). +//! +//! ## Low level API +//! +//! The [`FileHeader`] trait can be directly used to parse both [`elf::FileHeader32`] +//! and [`elf::FileHeader64`]. +//! +//! ### Example for low level API +//! ```no_run +//! use object::elf; +//! use object::read::elf::{FileHeader, Sym}; +//! use std::error::Error; +//! use std::fs; +//! +//! /// Reads a file and displays the name of each symbol. +//! fn main() -> Result<(), Box> { +//! # #[cfg(feature = "std")] { +//! let data = fs::read("path/to/binary")?; +//! let elf = elf::FileHeader64::::parse(&*data)?; +//! let endian = elf.endian()?; +//! let sections = elf.sections(endian, &*data)?; +//! let symbols = sections.symbols(endian, &*data, elf::SHT_SYMTAB)?; +//! for symbol in symbols.iter() { +//! let name = symbol.name(endian, symbols.strings())?; +//! println!("{}", String::from_utf8_lossy(name)); +//! } +//! # } +//! Ok(()) +//! } +//! ``` +#[cfg(doc)] +use crate::elf; mod file; pub use file::*; diff --git a/third_party/rust/object/src/read/elf/note.rs b/third_party/rust/object/src/read/elf/note.rs index 84d4179de2c9..e2beef922485 100644 --- a/third_party/rust/object/src/read/elf/note.rs +++ b/third_party/rust/object/src/read/elf/note.rs @@ -10,6 +10,9 @@ use crate::read::{self, Bytes, Error, ReadError}; use super::FileHeader; /// An iterator over the notes in an ELF section or segment. +/// +/// Returned [`ProgramHeader::notes`](super::ProgramHeader::notes) +/// and [`SectionHeader::notes`](super::SectionHeader::notes). #[derive(Debug)] pub struct NoteIterator<'data, Elf> where @@ -84,7 +87,7 @@ where } } -/// A parsed `NoteHeader`. +/// A parsed [`NoteHeader`]. #[derive(Debug)] pub struct Note<'data, Elf> where @@ -141,7 +144,7 @@ impl<'data, Elf: FileHeader> Note<'data, Elf> { self.desc } - /// Return an iterator for properties if this note's type is `NT_GNU_PROPERTY_TYPE_0`. + /// Return an iterator for properties if this note's type is [`elf::NT_GNU_PROPERTY_TYPE_0`]. pub fn gnu_properties( &self, endian: Elf::Endian, @@ -160,7 +163,7 @@ impl<'data, Elf: FileHeader> Note<'data, Elf> { } } -/// A trait for generic access to `NoteHeader32` and `NoteHeader64`. +/// A trait for generic access to [`elf::NoteHeader32`] and [`elf::NoteHeader64`]. #[allow(missing_docs)] pub trait NoteHeader: Debug + Pod { type Endian: endian::Endian; @@ -208,7 +211,9 @@ impl NoteHeader for elf::NoteHeader64 { } } -/// An iterator over the properties in a `NT_GNU_PROPERTY_TYPE_0` note. +/// An iterator for the properties in a [`elf::NT_GNU_PROPERTY_TYPE_0`] note. +/// +/// Returned by [`Note::gnu_properties`]. #[derive(Debug)] pub struct GnuPropertyIterator<'data, Endian: endian::Endian> { endian: Endian, @@ -236,7 +241,7 @@ impl<'data, Endian: endian::Endian> GnuPropertyIterator<'data, Endian> { } } -/// A property in a `NT_GNU_PROPERTY_TYPE_0` note. +/// A property in a [`elf::NT_GNU_PROPERTY_TYPE_0`] note. #[derive(Debug)] pub struct GnuProperty<'data> { pr_type: u32, diff --git a/third_party/rust/object/src/read/elf/relocation.rs b/third_party/rust/object/src/read/elf/relocation.rs index 78032dfdbc0f..3790e0279169 100644 --- a/third_party/rust/object/src/read/elf/relocation.rs +++ b/third_party/rust/object/src/read/elf/relocation.rs @@ -7,14 +7,14 @@ use crate::elf; use crate::endian::{self, Endianness}; use crate::pod::Pod; use crate::read::{ - self, Error, ReadRef, Relocation, RelocationEncoding, RelocationKind, RelocationTarget, - SectionIndex, SymbolIndex, + self, Error, ReadRef, Relocation, RelocationEncoding, RelocationFlags, RelocationKind, + RelocationTarget, SectionIndex, SymbolIndex, }; use super::{ElfFile, FileHeader, SectionHeader, SectionTable}; /// A mapping from section index to associated relocation sections. -#[derive(Debug)] +#[derive(Debug, Default)] pub struct RelocationSections { relocations: Vec, } @@ -34,23 +34,30 @@ impl RelocationSections { if sh_type == elf::SHT_REL || sh_type == elf::SHT_RELA { // The symbol indices used in relocations must be for the symbol table // we are expecting to use. - let sh_link = SectionIndex(section.sh_link(endian) as usize); + let sh_link = section.link(endian); if sh_link != symbol_section { continue; } - let sh_info = section.sh_info(endian) as usize; - if sh_info == 0 { + let sh_info = section.info_link(endian); + if sh_info == SectionIndex(0) { // Skip dynamic relocations. continue; } - if sh_info >= relocations.len() { + if sh_info.0 >= relocations.len() { return Err(Error("Invalid ELF sh_info for relocation section")); } + // We don't support relocations that apply to other relocation sections + // because it interferes with the chaining of relocation sections below. + let sh_info_type = sections.section(sh_info)?.sh_type(endian); + if sh_info_type == elf::SHT_REL || sh_info_type == elf::SHT_RELA { + return Err(Error("Unsupported ELF sh_info for relocation section")); + } + // Handle multiple relocation sections by chaining them. - let next = relocations[sh_info]; - relocations[sh_info] = index; + let next = relocations[sh_info.0]; + relocations[sh_info.0] = index; relocations[index] = next; } } @@ -61,8 +68,12 @@ impl RelocationSections { /// /// This may also be called with a relocation section index, and it will return the /// next associated relocation section. - pub fn get(&self, index: usize) -> Option { - self.relocations.get(index).cloned().filter(|x| *x != 0) + pub fn get(&self, index: SectionIndex) -> Option { + self.relocations + .get(index.0) + .cloned() + .filter(|x| *x != 0) + .map(SectionIndex) } } @@ -91,14 +102,14 @@ impl<'data, Elf: FileHeader> Iterator for ElfRelaIterator<'data, Elf> { } } -/// An iterator over the dynamic relocations for an `ElfFile32`. +/// An iterator for the dynamic relocations in an [`ElfFile32`](super::ElfFile32). pub type ElfDynamicRelocationIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfDynamicRelocationIterator<'data, 'file, elf::FileHeader32, R>; -/// An iterator over the dynamic relocations for an `ElfFile64`. +/// An iterator for the dynamic relocations in an [`ElfFile64`](super::ElfFile64). pub type ElfDynamicRelocationIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfDynamicRelocationIterator<'data, 'file, elf::FileHeader64, R>; -/// An iterator over the dynamic relocations for an `ElfFile`. +/// An iterator for the dynamic relocations in an [`ElfFile`]. pub struct ElfDynamicRelocationIterator<'data, 'file, Elf, R = &'data [u8]> where Elf: FileHeader, @@ -132,8 +143,7 @@ where let section = self.file.sections.section(self.section_index).ok()?; self.section_index.0 += 1; - let sh_link = SectionIndex(section.sh_link(endian) as usize); - if sh_link != self.file.dynamic_symbols.section() { + if section.link(endian) != self.file.dynamic_symbols.section() { continue; } @@ -164,14 +174,14 @@ where } } -/// An iterator over the relocations for an `ElfSection32`. +/// An iterator for the relocations for an [`ElfSection32`](super::ElfSection32). pub type ElfSectionRelocationIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSectionRelocationIterator<'data, 'file, elf::FileHeader32, R>; -/// An iterator over the relocations for an `ElfSection64`. +/// An iterator for the relocations for an [`ElfSection64`](super::ElfSection64). pub type ElfSectionRelocationIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSectionRelocationIterator<'data, 'file, elf::FileHeader64, R>; -/// An iterator over the relocations for an `ElfSection`. +/// An iterator for the relocations for an [`ElfSection`](super::ElfSection). pub struct ElfSectionRelocationIterator<'data, 'file, Elf, R = &'data [u8]> where Elf: FileHeader, @@ -201,7 +211,7 @@ where } self.relocations = None; } - self.section_index = SectionIndex(self.file.relocations.get(self.section_index.0)?); + self.section_index = self.file.relocations.get(self.section_index)?; // The construction of RelocationSections ensures section_index is valid. let section = self.file.sections.section(self.section_index).unwrap(); match section.sh_type(endian) { @@ -239,10 +249,12 @@ fn parse_relocation( ) -> Relocation { let mut encoding = RelocationEncoding::Generic; let is_mips64el = header.is_mips64el(endian); + let r_type = reloc.r_type(endian, is_mips64el); + let flags = RelocationFlags::Elf { r_type }; let (kind, size) = match header.e_machine(endian) { elf::EM_AARCH64 => { if header.is_type_64() { - match reloc.r_type(endian, false) { + match r_type { elf::R_AARCH64_ABS64 => (RelocationKind::Absolute, 64), elf::R_AARCH64_ABS32 => (RelocationKind::Absolute, 32), elf::R_AARCH64_ABS16 => (RelocationKind::Absolute, 16), @@ -253,35 +265,35 @@ fn parse_relocation( encoding = RelocationEncoding::AArch64Call; (RelocationKind::PltRelative, 26) } - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), } } else { - match reloc.r_type(endian, false) { + match r_type { elf::R_AARCH64_P32_ABS32 => (RelocationKind::Absolute, 32), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), } } } - elf::EM_ARM => match reloc.r_type(endian, false) { + elf::EM_ARM => match r_type { elf::R_ARM_ABS32 => (RelocationKind::Absolute, 32), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_AVR => match reloc.r_type(endian, false) { + elf::EM_AVR => match r_type { elf::R_AVR_32 => (RelocationKind::Absolute, 32), elf::R_AVR_16 => (RelocationKind::Absolute, 16), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_BPF => match reloc.r_type(endian, false) { + elf::EM_BPF => match r_type { elf::R_BPF_64_64 => (RelocationKind::Absolute, 64), elf::R_BPF_64_32 => (RelocationKind::Absolute, 32), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_CSKY => match reloc.r_type(endian, false) { + elf::EM_CSKY => match r_type { elf::R_CKCORE_ADDR32 => (RelocationKind::Absolute, 32), elf::R_CKCORE_PCREL32 => (RelocationKind::Relative, 32), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_386 => match reloc.r_type(endian, false) { + elf::EM_386 => match r_type { elf::R_386_32 => (RelocationKind::Absolute, 32), elf::R_386_PC32 => (RelocationKind::Relative, 32), elf::R_386_GOT32 => (RelocationKind::Got, 32), @@ -292,9 +304,9 @@ fn parse_relocation( elf::R_386_PC16 => (RelocationKind::Relative, 16), elf::R_386_8 => (RelocationKind::Absolute, 8), elf::R_386_PC8 => (RelocationKind::Relative, 8), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_X86_64 => match reloc.r_type(endian, false) { + elf::EM_X86_64 => match r_type { elf::R_X86_64_64 => (RelocationKind::Absolute, 64), elf::R_X86_64_PC32 => (RelocationKind::Relative, 32), elf::R_X86_64_GOT32 => (RelocationKind::Got, 32), @@ -309,16 +321,17 @@ fn parse_relocation( elf::R_X86_64_PC16 => (RelocationKind::Relative, 16), elf::R_X86_64_8 => (RelocationKind::Absolute, 8), elf::R_X86_64_PC8 => (RelocationKind::Relative, 8), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_HEXAGON => match reloc.r_type(endian, false) { + elf::EM_HEXAGON => match r_type { elf::R_HEX_32 => (RelocationKind::Absolute, 32), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_LOONGARCH => match reloc.r_type(endian, false) { + elf::EM_LOONGARCH => match r_type { elf::R_LARCH_32 => (RelocationKind::Absolute, 32), elf::R_LARCH_64 => (RelocationKind::Absolute, 64), elf::R_LARCH_32_PCREL => (RelocationKind::Relative, 32), + elf::R_LARCH_64_PCREL => (RelocationKind::Relative, 64), elf::R_LARCH_B16 => { encoding = RelocationEncoding::LoongArchBranch; (RelocationKind::Relative, 16) @@ -331,34 +344,34 @@ fn parse_relocation( encoding = RelocationEncoding::LoongArchBranch; (RelocationKind::Relative, 26) } - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_MIPS => match reloc.r_type(endian, is_mips64el) { + elf::EM_MIPS => match r_type { elf::R_MIPS_16 => (RelocationKind::Absolute, 16), elf::R_MIPS_32 => (RelocationKind::Absolute, 32), elf::R_MIPS_64 => (RelocationKind::Absolute, 64), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_MSP430 => match reloc.r_type(endian, false) { + elf::EM_MSP430 => match r_type { elf::R_MSP430_32 => (RelocationKind::Absolute, 32), elf::R_MSP430_16_BYTE => (RelocationKind::Absolute, 16), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_PPC => match reloc.r_type(endian, false) { + elf::EM_PPC => match r_type { elf::R_PPC_ADDR32 => (RelocationKind::Absolute, 32), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_PPC64 => match reloc.r_type(endian, false) { + elf::EM_PPC64 => match r_type { elf::R_PPC64_ADDR32 => (RelocationKind::Absolute, 32), elf::R_PPC64_ADDR64 => (RelocationKind::Absolute, 64), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_RISCV => match reloc.r_type(endian, false) { + elf::EM_RISCV => match r_type { elf::R_RISCV_32 => (RelocationKind::Absolute, 32), elf::R_RISCV_64 => (RelocationKind::Absolute, 64), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_S390 => match reloc.r_type(endian, false) { + elf::EM_S390 => match r_type { elf::R_390_8 => (RelocationKind::Absolute, 8), elf::R_390_16 => (RelocationKind::Absolute, 16), elf::R_390_32 => (RelocationKind::Absolute, 32), @@ -397,32 +410,79 @@ fn parse_relocation( encoding = RelocationEncoding::S390xDbl; (RelocationKind::GotBaseRelative, 32) } - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_SBF => match reloc.r_type(endian, false) { + elf::EM_SBF => match r_type { elf::R_SBF_64_64 => (RelocationKind::Absolute, 64), elf::R_SBF_64_32 => (RelocationKind::Absolute, 32), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - elf::EM_SPARC | elf::EM_SPARC32PLUS | elf::EM_SPARCV9 => { - match reloc.r_type(endian, false) { - elf::R_SPARC_32 | elf::R_SPARC_UA32 => (RelocationKind::Absolute, 32), - elf::R_SPARC_64 | elf::R_SPARC_UA64 => (RelocationKind::Absolute, 64), - r_type => (RelocationKind::Elf(r_type), 0), + elf::EM_SHARC => match r_type { + elf::R_SHARC_ADDR24_V3 => { + encoding = RelocationEncoding::SharcTypeA; + (RelocationKind::Absolute, 24) } - } - elf::EM_XTENSA => match reloc.r_type(endian, false) { + elf::R_SHARC_ADDR32_V3 => { + encoding = RelocationEncoding::SharcTypeA; + (RelocationKind::Absolute, 32) + } + elf::R_SHARC_ADDR_VAR_V3 => { + encoding = RelocationEncoding::Generic; + (RelocationKind::Absolute, 32) + } + elf::R_SHARC_PCRSHORT_V3 => { + encoding = RelocationEncoding::SharcTypeA; + (RelocationKind::Relative, 6) + } + elf::R_SHARC_PCRLONG_V3 => { + encoding = RelocationEncoding::SharcTypeA; + (RelocationKind::Relative, 24) + } + elf::R_SHARC_DATA6_V3 => { + encoding = RelocationEncoding::SharcTypeA; + (RelocationKind::Absolute, 6) + } + elf::R_SHARC_DATA16_V3 => { + encoding = RelocationEncoding::SharcTypeA; + (RelocationKind::Absolute, 16) + } + elf::R_SHARC_DATA6_VISA_V3 => { + encoding = RelocationEncoding::SharcTypeB; + (RelocationKind::Absolute, 6) + } + elf::R_SHARC_DATA7_VISA_V3 => { + encoding = RelocationEncoding::SharcTypeB; + (RelocationKind::Absolute, 7) + } + elf::R_SHARC_DATA16_VISA_V3 => { + encoding = RelocationEncoding::SharcTypeB; + (RelocationKind::Absolute, 16) + } + elf::R_SHARC_PCR6_VISA_V3 => { + encoding = RelocationEncoding::SharcTypeB; + (RelocationKind::Relative, 16) + } + elf::R_SHARC_ADDR_VAR16_V3 => { + encoding = RelocationEncoding::Generic; + (RelocationKind::Absolute, 16) + } + _ => (RelocationKind::Unknown, 0), + }, + elf::EM_SPARC | elf::EM_SPARC32PLUS | elf::EM_SPARCV9 => match r_type { + elf::R_SPARC_32 | elf::R_SPARC_UA32 => (RelocationKind::Absolute, 32), + elf::R_SPARC_64 | elf::R_SPARC_UA64 => (RelocationKind::Absolute, 64), + _ => (RelocationKind::Unknown, 0), + }, + elf::EM_XTENSA => match r_type { elf::R_XTENSA_32 => (RelocationKind::Absolute, 32), elf::R_XTENSA_32_PCREL => (RelocationKind::Relative, 32), - r_type => (RelocationKind::Elf(r_type), 0), + _ => (RelocationKind::Unknown, 0), }, - _ => (RelocationKind::Elf(reloc.r_type(endian, false)), 0), + _ => (RelocationKind::Unknown, 0), }; - let sym = reloc.r_sym(endian, is_mips64el) as usize; - let target = if sym == 0 { - RelocationTarget::Absolute - } else { - RelocationTarget::Symbol(SymbolIndex(sym)) + let target = match reloc.symbol(endian, is_mips64el) { + None => RelocationTarget::Absolute, + Some(symbol) => RelocationTarget::Symbol(symbol), }; Relocation { kind, @@ -431,10 +491,11 @@ fn parse_relocation( target, addend: reloc.r_addend(endian).into(), implicit_addend, + flags, } } -/// A trait for generic access to `Rel32` and `Rel64`. +/// A trait for generic access to [`elf::Rel32`] and [`elf::Rel64`]. #[allow(missing_docs)] pub trait Rel: Debug + Pod + Clone { type Word: Into; @@ -445,6 +506,18 @@ pub trait Rel: Debug + Pod + Clone { fn r_info(&self, endian: Self::Endian) -> Self::Word; fn r_sym(&self, endian: Self::Endian) -> u32; fn r_type(&self, endian: Self::Endian) -> u32; + + /// Get the symbol index referenced by the relocation. + /// + /// Returns `None` for the null symbol index. + fn symbol(&self, endian: Self::Endian) -> Option { + let sym = self.r_sym(endian); + if sym == 0 { + None + } else { + Some(SymbolIndex(sym as usize)) + } + } } impl Rel for elf::Rel32 { @@ -499,7 +572,7 @@ impl Rel for elf::Rel64 { } } -/// A trait for generic access to `Rela32` and `Rela64`. +/// A trait for generic access to [`elf::Rela32`] and [`elf::Rela64`]. #[allow(missing_docs)] pub trait Rela: Debug + Pod + Clone { type Word: Into; @@ -511,6 +584,18 @@ pub trait Rela: Debug + Pod + Clone { fn r_addend(&self, endian: Self::Endian) -> Self::Sword; fn r_sym(&self, endian: Self::Endian, is_mips64el: bool) -> u32; fn r_type(&self, endian: Self::Endian, is_mips64el: bool) -> u32; + + /// Get the symbol index referenced by the relocation. + /// + /// Returns `None` for the null symbol index. + fn symbol(&self, endian: Self::Endian, is_mips64el: bool) -> Option { + let sym = self.r_sym(endian, is_mips64el); + if sym == 0 { + None + } else { + Some(SymbolIndex(sym as usize)) + } + } } impl Rela for elf::Rela32 { diff --git a/third_party/rust/object/src/read/elf/section.rs b/third_party/rust/object/src/read/elf/section.rs index df08f9e3e2b4..f175d84d78e0 100644 --- a/third_party/rust/object/src/read/elf/section.rs +++ b/third_party/rust/object/src/read/elf/section.rs @@ -1,12 +1,13 @@ use core::fmt::Debug; -use core::{iter, mem, slice, str}; +use core::{iter, slice, str}; use crate::elf; use crate::endian::{self, Endianness, U32Bytes}; -use crate::pod::Pod; +use crate::pod::{self, Pod}; use crate::read::{ - self, Bytes, CompressedData, CompressedFileRange, CompressionFormat, Error, ObjectSection, - ReadError, ReadRef, SectionFlags, SectionIndex, SectionKind, StringTable, + self, gnu_compression, CompressedData, CompressedFileRange, CompressionFormat, Error, + ObjectSection, ReadError, ReadRef, RelocationMap, SectionFlags, SectionIndex, SectionKind, + StringTable, }; use super::{ @@ -18,7 +19,9 @@ use super::{ /// The table of section headers in an ELF file. /// /// Also includes the string table used for the section names. -#[derive(Debug, Default, Clone, Copy)] +/// +/// Returned by [`FileHeader::sections`]. +#[derive(Debug, Clone, Copy)] pub struct SectionTable<'data, Elf: FileHeader, R = &'data [u8]> where R: ReadRef<'data>, @@ -27,6 +30,15 @@ where strings: StringTable<'data, R>, } +impl<'data, Elf: FileHeader, R: ReadRef<'data>> Default for SectionTable<'data, Elf, R> { + fn default() -> Self { + SectionTable { + sections: &[], + strings: StringTable::default(), + } + } +} + impl<'data, Elf: FileHeader, R: ReadRef<'data>> SectionTable<'data, Elf, R> { /// Create a new section table. #[inline] @@ -35,11 +47,24 @@ impl<'data, Elf: FileHeader, R: ReadRef<'data>> SectionTable<'data, Elf, R> { } /// Iterate over the section headers. + /// + /// This includes the null section at index 0, which you will usually need to skip. #[inline] pub fn iter(&self) -> slice::Iter<'data, Elf::SectionHeader> { self.sections.iter() } + /// Iterate over the section headers and their indices. + /// + /// This includes the null section at index 0, which you will usually need to skip. + #[inline] + pub fn enumerate(&self) -> impl Iterator { + self.sections + .iter() + .enumerate() + .map(|(i, section)| (SectionIndex(i), section)) + } + /// Return true if the section table is empty. #[inline] pub fn is_empty(&self) -> bool { @@ -52,8 +77,13 @@ impl<'data, Elf: FileHeader, R: ReadRef<'data>> SectionTable<'data, Elf, R> { self.sections.len() } - /// Return the section header at the given index. + /// Get the section header at the given index. + /// + /// Returns an error for the null section at index 0. pub fn section(&self, index: SectionIndex) -> read::Result<&'data Elf::SectionHeader> { + if index == SectionIndex(0) { + return Err(read::Error("Invalid ELF section index")); + } self.sections .get(index.0) .read_error("Invalid ELF section index") @@ -66,10 +96,8 @@ impl<'data, Elf: FileHeader, R: ReadRef<'data>> SectionTable<'data, Elf, R> { &self, endian: Elf::Endian, name: &[u8], - ) -> Option<(usize, &'data Elf::SectionHeader)> { - self.sections - .iter() - .enumerate() + ) -> Option<(SectionIndex, &'data Elf::SectionHeader)> { + self.enumerate() .find(|(_, section)| self.section_name(endian, section) == Ok(name)) } @@ -77,13 +105,14 @@ impl<'data, Elf: FileHeader, R: ReadRef<'data>> SectionTable<'data, Elf, R> { pub fn section_name( &self, endian: Elf::Endian, - section: &'data Elf::SectionHeader, + section: &Elf::SectionHeader, ) -> read::Result<&'data [u8]> { section.name(endian, self.strings) } /// Return the string table at the given section index. /// + /// Returns an empty string table if the index is 0. /// Returns an error if the section is not a string table. #[inline] pub fn strings( @@ -92,6 +121,9 @@ impl<'data, Elf: FileHeader, R: ReadRef<'data>> SectionTable<'data, Elf, R> { data: R, index: SectionIndex, ) -> read::Result> { + if index == SectionIndex(0) { + return Ok(StringTable::default()); + } self.section(index)? .strings(endian, data)? .read_error("Invalid ELF string section type") @@ -109,16 +141,12 @@ impl<'data, Elf: FileHeader, R: ReadRef<'data>> SectionTable<'data, Elf, R> { ) -> read::Result> { debug_assert!(sh_type == elf::SHT_DYNSYM || sh_type == elf::SHT_SYMTAB); - let (index, section) = match self - .iter() - .enumerate() - .find(|s| s.1.sh_type(endian) == sh_type) - { + let (index, section) = match self.enumerate().find(|s| s.1.sh_type(endian) == sh_type) { Some(s) => s, None => return Ok(SymbolTable::default()), }; - SymbolTable::parse(endian, data, self, SectionIndex(index), section) + SymbolTable::parse(endian, data, self, index, section) } /// Return the symbol table at the given section index. @@ -318,22 +346,34 @@ impl<'data, Elf: FileHeader, R: ReadRef<'data>> SectionTable<'data, Elf, R> { } } -/// An iterator over the sections of an `ElfFile32`. +/// An iterator for the sections in an [`ElfFile32`](super::ElfFile32). pub type ElfSectionIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSectionIterator<'data, 'file, elf::FileHeader32, R>; -/// An iterator over the sections of an `ElfFile64`. +/// An iterator for the sections in an [`ElfFile64`](super::ElfFile64). pub type ElfSectionIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSectionIterator<'data, 'file, elf::FileHeader64, R>; -/// An iterator over the sections of an `ElfFile`. +/// An iterator for the sections in an [`ElfFile`]. #[derive(Debug)] pub struct ElfSectionIterator<'data, 'file, Elf, R = &'data [u8]> where Elf: FileHeader, R: ReadRef<'data>, { - pub(super) file: &'file ElfFile<'data, Elf, R>, - pub(super) iter: iter::Enumerate>, + file: &'file ElfFile<'data, Elf, R>, + iter: iter::Enumerate>, +} + +impl<'data, 'file, Elf, R> ElfSectionIterator<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + pub(super) fn new(file: &'file ElfFile<'data, Elf, R>) -> Self { + let mut iter = file.sections.iter().enumerate(); + iter.next(); // Skip null section. + ElfSectionIterator { file, iter } + } } impl<'data, 'file, Elf, R> Iterator for ElfSectionIterator<'data, 'file, Elf, R> @@ -352,14 +392,16 @@ where } } -/// A section of an `ElfFile32`. +/// A section in an [`ElfFile32`](super::ElfFile32). pub type ElfSection32<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSection<'data, 'file, elf::FileHeader32, R>; -/// A section of an `ElfFile64`. +/// A section in an [`ElfFile64`](super::ElfFile64). pub type ElfSection64<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSection<'data, 'file, elf::FileHeader64, R>; -/// A section of an `ElfFile`. +/// A section in an [`ElfFile`]. +/// +/// Most functionality is provided by the [`ObjectSection`] trait implementation. #[derive(Debug)] pub struct ElfSection<'data, 'file, Elf, R = &'data [u8]> where @@ -372,6 +414,73 @@ where } impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ElfSection<'data, 'file, Elf, R> { + /// Get the ELF file containing this section. + pub fn elf_file(&self) -> &'file ElfFile<'data, Elf, R> { + self.file + } + + /// Get the raw ELF section header. + pub fn elf_section_header(&self) -> &'data Elf::SectionHeader { + self.section + } + + /// Get the index of the relocation section that references this section. + /// + /// Returns `None` if there are no relocations. + /// Returns an error if there are multiple relocation sections that reference this section. + pub fn elf_relocation_section_index(&self) -> read::Result> { + let Some(relocation_index) = self.file.relocations.get(self.index) else { + return Ok(None); + }; + if self.file.relocations.get(relocation_index).is_some() { + return Err(Error( + "Unsupported ELF section with multiple relocation sections", + )); + } + Ok(Some(relocation_index)) + } + + /// Get the relocation section that references this section. + /// + /// Returns `None` if there are no relocations. + /// Returns an error if there are multiple relocation sections that reference this section. + pub fn elf_relocation_section(&self) -> read::Result> { + let Some(relocation_index) = self.elf_relocation_section_index()? else { + return Ok(None); + }; + self.file.sections.section(relocation_index).map(Some) + } + + /// Get the `Elf::Rel` entries that apply to this section. + /// + /// Returns an empty slice if there are no relocations. + /// Returns an error if there are multiple relocation sections that reference this section. + pub fn elf_linked_rel(&self) -> read::Result<&'data [Elf::Rel]> { + let Some(relocation_section) = self.elf_relocation_section()? else { + return Ok(&[]); + }; + // The linked symbol table was already checked when self.file.relocations was created. + let Some((rel, _)) = relocation_section.rel(self.file.endian, self.file.data)? else { + return Ok(&[]); + }; + Ok(rel) + } + + /// Get the `Elf::Rela` entries that apply to this section. + /// + /// Returns an empty slice if there are no relocations. + /// Returns an error if there are multiple relocation sections that reference this section. + pub fn elf_linked_rela(&self) -> read::Result<&'data [Elf::Rela]> { + let Some(relocation_section) = self.elf_relocation_section()? else { + return Ok(&[]); + }; + // The linked symbol table was already checked when self.file.relocations was created. + let Some((rela, _)) = relocation_section.rela(self.file.endian, self.file.data)? else { + return Ok(&[]); + }; + Ok(rela) + } + fn bytes(&self) -> read::Result<&'data [u8]> { self.section .data(self.file.endian, self.file.data) @@ -400,46 +509,19 @@ impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ElfSection<'data, 'file, } } - /// Try GNU-style "ZLIB" header decompression. + // Try GNU-style "ZLIB" header decompression. fn maybe_compressed_gnu(&self) -> read::Result> { - let name = match self.name() { - Ok(name) => name, - // I think it's ok to ignore this error? - Err(_) => return Ok(None), - }; - if !name.starts_with(".zdebug_") { + if !self + .name() + .map_or(false, |name| name.starts_with(".zdebug_")) + { return Ok(None); } let (section_offset, section_size) = self - .section - .file_range(self.file.endian) + .file_range() .read_error("Invalid ELF GNU compressed section type")?; - let mut offset = section_offset; - let data = self.file.data; - // Assume ZLIB-style uncompressed data is no more than 4GB to avoid accidentally - // huge allocations. This also reduces the chance of accidentally matching on a - // .debug_str that happens to start with "ZLIB". - if data - .read_bytes(&mut offset, 8) - .read_error("ELF GNU compressed section is too short")? - != b"ZLIB\0\0\0\0" - { - return Err(Error("Invalid ELF GNU compressed section header")); - } - let uncompressed_size = data - .read::>(&mut offset) - .read_error("ELF GNU compressed section is too short")? - .get(endian::BigEndian) - .into(); - let compressed_size = section_size - .checked_sub(offset - section_offset) - .read_error("ELF GNU compressed section is too short")?; - Ok(Some(CompressedFileRange { - format: CompressionFormat::Zlib, - offset, - compressed_size, - uncompressed_size, - })) + gnu_compression::compressed_file_range(self.file.data, section_offset, section_size) + .map(Some) } } @@ -510,13 +592,13 @@ where self.compressed_file_range()?.data(self.file.data) } - fn name_bytes(&self) -> read::Result<&[u8]> { + fn name_bytes(&self) -> read::Result<&'data [u8]> { self.file .sections .section_name(self.file.endian, self.section) } - fn name(&self) -> read::Result<&str> { + fn name(&self) -> read::Result<&'data str> { let name = self.name_bytes()?; str::from_utf8(name) .ok() @@ -585,6 +667,10 @@ where } } + fn relocation_map(&self) -> read::Result { + RelocationMap::new(self.file, self) + } + fn flags(&self) -> SectionFlags { SectionFlags::Elf { sh_flags: self.section.sh_flags(self.file.endian).into(), @@ -592,7 +678,7 @@ where } } -/// A trait for generic access to `SectionHeader32` and `SectionHeader64`. +/// A trait for generic access to [`elf::SectionHeader32`] and [`elf::SectionHeader64`]. #[allow(missing_docs)] pub trait SectionHeader: Debug + Pod { type Elf: FileHeader; @@ -621,6 +707,26 @@ pub trait SectionHeader: Debug + Pod { .read_error("Invalid ELF section name offset") } + /// Get the `sh_link` field as a section index. + /// + /// This may return a null section index, and does not check for validity. + fn link(&self, endian: Self::Endian) -> SectionIndex { + SectionIndex(self.sh_link(endian) as usize) + } + + /// Return true if the `SHF_INFO_LINK` flag is set. + fn has_info_link(&self, endian: Self::Endian) -> bool { + self.sh_flags(endian).into() & u64::from(elf::SHF_INFO_LINK) != 0 + } + + /// Get the `sh_info` field as a section index. + /// + /// This does not check the `SHF_INFO_LINK` flag. + /// This may return a null section index, and does not check for validity. + fn info_link(&self, endian: Self::Endian) -> SectionIndex { + SectionIndex(self.sh_info(endian) as usize) + } + /// Return the offset and size of the section in the file. /// /// Returns `None` for sections that have no data in the file. @@ -659,8 +765,7 @@ pub trait SectionHeader: Debug + Pod { endian: Self::Endian, data: R, ) -> read::Result<&'data [T]> { - let mut data = self.data(endian, data).map(Bytes)?; - data.read_slice(data.len() / mem::size_of::()) + pod::slice_from_all_bytes(self.data(endian, data)?) .read_error("Invalid ELF section size or offset") } @@ -724,8 +829,7 @@ pub trait SectionHeader: Debug + Pod { let rel = self .data_as_array(endian, data) .read_error("Invalid ELF relocation section offset or size")?; - let link = SectionIndex(self.sh_link(endian) as usize); - Ok(Some((rel, link))) + Ok(Some((rel, self.link(endian)))) } /// Return the `Elf::Rela` entries in the section. @@ -745,8 +849,7 @@ pub trait SectionHeader: Debug + Pod { let rela = self .data_as_array(endian, data) .read_error("Invalid ELF relocation section offset or size")?; - let link = SectionIndex(self.sh_link(endian) as usize); - Ok(Some((rela, link))) + Ok(Some((rela, self.link(endian)))) } /// Return entries in a dynamic section. @@ -766,8 +869,7 @@ pub trait SectionHeader: Debug + Pod { let dynamic = self .data_as_array(endian, data) .read_error("Invalid ELF dynamic section offset or size")?; - let link = SectionIndex(self.sh_link(endian) as usize); - Ok(Some((dynamic, link))) + Ok(Some((dynamic, self.link(endian)))) } /// Return a note iterator for the section data. @@ -804,19 +906,11 @@ pub trait SectionHeader: Debug + Pod { if self.sh_type(endian) != elf::SHT_GROUP { return Ok(None); } - let mut data = self - .data(endian, data) - .read_error("Invalid ELF group section offset or size") - .map(Bytes)?; - let flag = data - .read::>() - .read_error("Invalid ELF group section offset or size")? - .get(endian); - let count = data.len() / mem::size_of::>(); - let sections = data - .read_slice(count) - .read_error("Invalid ELF group section offset or size")?; - Ok(Some((flag, sections))) + let msg = "Invalid ELF group section offset or size"; + let data = self.data(endian, data).read_error(msg)?; + let (flag, data) = pod::from_bytes::>(data).read_error(msg)?; + let sections = pod::slice_from_all_bytes(data).read_error(msg)?; + Ok(Some((flag.get(endian), sections))) } /// Return the header of a SysV hash section. @@ -858,8 +952,7 @@ pub trait SectionHeader: Debug + Pod { .data(endian, data) .read_error("Invalid ELF hash section offset or size")?; let hash = HashTable::parse(endian, data)?; - let link = SectionIndex(self.sh_link(endian) as usize); - Ok(Some((hash, link))) + Ok(Some((hash, self.link(endian)))) } /// Return the header of a GNU hash section. @@ -901,8 +994,7 @@ pub trait SectionHeader: Debug + Pod { .data(endian, data) .read_error("Invalid ELF GNU hash section offset or size")?; let hash = GnuHashTable::parse(endian, data)?; - let link = SectionIndex(self.sh_link(endian) as usize); - Ok(Some((hash, link))) + Ok(Some((hash, self.link(endian)))) } /// Return the contents of a `SHT_GNU_VERSYM` section. @@ -922,8 +1014,7 @@ pub trait SectionHeader: Debug + Pod { let versym = self .data_as_array(endian, data) .read_error("Invalid ELF GNU versym section offset or size")?; - let link = SectionIndex(self.sh_link(endian) as usize); - Ok(Some((versym, link))) + Ok(Some((versym, self.link(endian)))) } /// Return an iterator for the entries of a `SHT_GNU_VERDEF` section. @@ -943,8 +1034,10 @@ pub trait SectionHeader: Debug + Pod { let verdef = self .data(endian, data) .read_error("Invalid ELF GNU verdef section offset or size")?; - let link = SectionIndex(self.sh_link(endian) as usize); - Ok(Some((VerdefIterator::new(endian, verdef), link))) + Ok(Some(( + VerdefIterator::new(endian, verdef), + self.link(endian), + ))) } /// Return an iterator for the entries of a `SHT_GNU_VERNEED` section. @@ -964,8 +1057,10 @@ pub trait SectionHeader: Debug + Pod { let verneed = self .data(endian, data) .read_error("Invalid ELF GNU verneed section offset or size")?; - let link = SectionIndex(self.sh_link(endian) as usize); - Ok(Some((VerneedIterator::new(endian, verneed), link))) + Ok(Some(( + VerneedIterator::new(endian, verneed), + self.link(endian), + ))) } /// Return the contents of a `SHT_GNU_ATTRIBUTES` section. diff --git a/third_party/rust/object/src/read/elf/segment.rs b/third_party/rust/object/src/read/elf/segment.rs index 3972731ecf2f..7ef09108b282 100644 --- a/third_party/rust/object/src/read/elf/segment.rs +++ b/third_party/rust/object/src/read/elf/segment.rs @@ -1,21 +1,21 @@ use core::fmt::Debug; -use core::{mem, slice, str}; +use core::{slice, str}; use crate::elf; use crate::endian::{self, Endianness}; -use crate::pod::Pod; -use crate::read::{self, Bytes, ObjectSegment, ReadError, ReadRef, SegmentFlags}; +use crate::pod::{self, Pod}; +use crate::read::{self, ObjectSegment, ReadError, ReadRef, SegmentFlags}; use super::{ElfFile, FileHeader, NoteIterator}; -/// An iterator over the segments of an `ElfFile32`. +/// An iterator for the segments in an [`ElfFile32`](super::ElfFile32). pub type ElfSegmentIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSegmentIterator<'data, 'file, elf::FileHeader32, R>; -/// An iterator over the segments of an `ElfFile64`. +/// An iterator for the segments in an [`ElfFile64`](super::ElfFile64). pub type ElfSegmentIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSegmentIterator<'data, 'file, elf::FileHeader64, R>; -/// An iterator over the segments of an `ElfFile`. +/// An iterator for the segments in an [`ElfFile`]. #[derive(Debug)] pub struct ElfSegmentIterator<'data, 'file, Elf, R = &'data [u8]> where @@ -46,14 +46,16 @@ where } } -/// A segment of an `ElfFile32`. +/// A segment in an [`ElfFile32`](super::ElfFile32). pub type ElfSegment32<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSegment<'data, 'file, elf::FileHeader32, R>; -/// A segment of an `ElfFile64`. +/// A segment in an [`ElfFile64`](super::ElfFile64). pub type ElfSegment64<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSegment<'data, 'file, elf::FileHeader64, R>; -/// A segment of an `ElfFile`. +/// A segment in an [`ElfFile`]. +/// +/// Most functionality is provided by the [`ObjectSegment`] trait implementation. #[derive(Debug)] pub struct ElfSegment<'data, 'file, Elf, R = &'data [u8]> where @@ -65,6 +67,16 @@ where } impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ElfSegment<'data, 'file, Elf, R> { + /// Get the ELF file containing this segment. + pub fn elf_file(&self) -> &'file ElfFile<'data, Elf, R> { + self.file + } + + /// Get the raw ELF program header for the segment. + pub fn elf_program_header(&self) -> &'data Elf::ProgramHeader { + self.segment + } + fn bytes(&self) -> read::Result<&'data [u8]> { self.segment .data(self.file.endian, self.file.data) @@ -135,7 +147,7 @@ where } } -/// A trait for generic access to `ProgramHeader32` and `ProgramHeader64`. +/// A trait for generic access to [`elf::ProgramHeader32`] and [`elf::ProgramHeader64`]. #[allow(missing_docs)] pub trait ProgramHeader: Debug + Pod { type Elf: FileHeader; @@ -178,8 +190,7 @@ pub trait ProgramHeader: Debug + Pod { endian: Self::Endian, data: R, ) -> Result<&'data [T], ()> { - let mut data = self.data(endian, data).map(Bytes)?; - data.read_slice(data.len() / mem::size_of::()) + pod::slice_from_all_bytes(self.data(endian, data)?) } /// Return the segment data in the given virtual address range @@ -219,6 +230,28 @@ pub trait ProgramHeader: Debug + Pod { Ok(Some(dynamic)) } + /// Return the data in an interpreter segment. + /// + /// Returns `Ok(None)` if the segment is not `PT_INTERP`. + /// Returns `Err` for invalid values. + fn interpreter<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result> { + if self.p_type(endian) != elf::PT_INTERP { + return Ok(None); + } + let data = self + .data(endian, data) + .read_error("Invalid ELF interpreter segment offset or size")?; + let len = data + .iter() + .position(|&b| b == 0) + .read_error("Invalid ELF interpreter segment data")?; + Ok(Some(&data[..len])) + } + /// Return a note iterator for the segment data. /// /// Returns `Ok(None)` if the segment does not contain notes. diff --git a/third_party/rust/object/src/read/elf/symbol.rs b/third_party/rust/object/src/read/elf/symbol.rs index ee5aa37f1957..3c0bee35e044 100644 --- a/third_party/rust/object/src/read/elf/symbol.rs +++ b/third_party/rust/object/src/read/elf/symbol.rs @@ -4,20 +4,22 @@ use core::fmt::Debug; use core::slice; use core::str; -use crate::endian::{self, Endianness}; +use crate::elf; +use crate::endian::{self, Endianness, U32}; use crate::pod::Pod; use crate::read::util::StringTable; use crate::read::{ self, ObjectSymbol, ObjectSymbolTable, ReadError, ReadRef, SectionIndex, SymbolFlags, SymbolIndex, SymbolKind, SymbolMap, SymbolMapEntry, SymbolScope, SymbolSection, }; -use crate::{elf, U32}; use super::{FileHeader, SectionHeader, SectionTable}; /// A table of symbol entries in an ELF file. /// /// Also includes the string table used for the symbol names. +/// +/// Returned by [`SectionTable::symbols`]. #[derive(Debug, Clone, Copy)] pub struct SymbolTable<'data, Elf: FileHeader, R = &'data [u8]> where @@ -67,11 +69,9 @@ impl<'data, Elf: FileHeader, R: ReadRef<'data>> SymbolTable<'data, Elf, R> { let mut shndx_section = SectionIndex(0); let mut shndx = &[][..]; - for (i, s) in sections.iter().enumerate() { - if s.sh_type(endian) == elf::SHT_SYMTAB_SHNDX - && s.sh_link(endian) as usize == section_index.0 - { - shndx_section = SectionIndex(i); + for (i, s) in sections.enumerate() { + if s.sh_type(endian) == elf::SHT_SYMTAB_SHNDX && s.link(endian) == section_index { + shndx_section = i; shndx = s .data_as_array(endian, data) .read_error("Invalid ELF symtab_shndx data")?; @@ -119,11 +119,24 @@ impl<'data, Elf: FileHeader, R: ReadRef<'data>> SymbolTable<'data, Elf, R> { } /// Iterate over the symbols. + /// + /// This includes the null symbol at index 0, which you will usually need to skip. #[inline] pub fn iter(&self) -> slice::Iter<'data, Elf::Sym> { self.symbols.iter() } + /// Iterate over the symbols and their indices. + /// + /// This includes the null symbol at index 0, which you will usually need to skip. + #[inline] + pub fn enumerate(&self) -> impl Iterator { + self.symbols + .iter() + .enumerate() + .map(|(i, sym)| (SymbolIndex(i), sym)) + } + /// Return true if the symbol table is empty. #[inline] pub fn is_empty(&self) -> bool { @@ -136,17 +149,22 @@ impl<'data, Elf: FileHeader, R: ReadRef<'data>> SymbolTable<'data, Elf, R> { self.symbols.len() } - /// Return the symbol at the given index. - pub fn symbol(&self, index: usize) -> read::Result<&'data Elf::Sym> { + /// Get the symbol at the given index. + /// + /// Returns an error for null entry at index 0. + pub fn symbol(&self, index: SymbolIndex) -> read::Result<&'data Elf::Sym> { + if index == SymbolIndex(0) { + return Err(read::Error("Invalid ELF symbol index")); + } self.symbols - .get(index) + .get(index.0) .read_error("Invalid ELF symbol index") } /// Return the extended section index for the given symbol if present. #[inline] - pub fn shndx(&self, endian: Elf::Endian, index: usize) -> Option { - self.shndx.get(index).map(|x| x.get(endian)) + pub fn shndx(&self, endian: Elf::Endian, index: SymbolIndex) -> Option { + self.shndx.get(index.0).map(|x| x.get(endian)) } /// Return the section index for the given symbol. @@ -155,26 +173,28 @@ impl<'data, Elf: FileHeader, R: ReadRef<'data>> SymbolTable<'data, Elf, R> { pub fn symbol_section( &self, endian: Elf::Endian, - symbol: &'data Elf::Sym, - index: usize, + symbol: &Elf::Sym, + index: SymbolIndex, ) -> read::Result> { match symbol.st_shndx(endian) { elf::SHN_UNDEF => Ok(None), - elf::SHN_XINDEX => self - .shndx(endian, index) - .read_error("Missing ELF symbol extended index") - .map(|index| Some(SectionIndex(index as usize))), + elf::SHN_XINDEX => { + let shndx = self + .shndx(endian, index) + .read_error("Missing ELF symbol extended index")?; + if shndx == 0 { + Ok(None) + } else { + Ok(Some(SectionIndex(shndx as usize))) + } + } shndx if shndx < elf::SHN_LORESERVE => Ok(Some(SectionIndex(shndx.into()))), _ => Ok(None), } } /// Return the symbol name for the given symbol. - pub fn symbol_name( - &self, - endian: Elf::Endian, - symbol: &'data Elf::Sym, - ) -> read::Result<&'data [u8]> { + pub fn symbol_name(&self, endian: Elf::Endian, symbol: &Elf::Sym) -> read::Result<&'data [u8]> { symbol.name(endian, self.strings) } @@ -197,14 +217,14 @@ impl<'data, Elf: FileHeader, R: ReadRef<'data>> SymbolTable<'data, Elf, R> { } } -/// A symbol table of an `ElfFile32`. +/// A symbol table in an [`ElfFile32`](super::ElfFile32). pub type ElfSymbolTable32<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSymbolTable<'data, 'file, elf::FileHeader32, R>; -/// A symbol table of an `ElfFile32`. +/// A symbol table in an [`ElfFile32`](super::ElfFile32). pub type ElfSymbolTable64<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSymbolTable<'data, 'file, elf::FileHeader64, R>; -/// A symbol table of an `ElfFile`. +/// A symbol table in an [`ElfFile`](super::ElfFile). #[derive(Debug, Clone, Copy)] pub struct ElfSymbolTable<'data, 'file, Elf, R = &'data [u8]> where @@ -227,15 +247,11 @@ impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ObjectSymbolTable<'data> type SymbolIterator = ElfSymbolIterator<'data, 'file, Elf, R>; fn symbols(&self) -> Self::SymbolIterator { - ElfSymbolIterator { - endian: self.endian, - symbols: self.symbols, - index: 0, - } + ElfSymbolIterator::new(self.endian, self.symbols) } fn symbol_by_index(&self, index: SymbolIndex) -> read::Result { - let symbol = self.symbols.symbol(index.0)?; + let symbol = self.symbols.symbol(index)?; Ok(ElfSymbol { endian: self.endian, symbols: self.symbols, @@ -245,22 +261,36 @@ impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ObjectSymbolTable<'data> } } -/// An iterator over the symbols of an `ElfFile32`. +/// An iterator for the symbols in an [`ElfFile32`](super::ElfFile32). pub type ElfSymbolIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSymbolIterator<'data, 'file, elf::FileHeader32, R>; -/// An iterator over the symbols of an `ElfFile64`. +/// An iterator for the symbols in an [`ElfFile64`](super::ElfFile64). pub type ElfSymbolIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSymbolIterator<'data, 'file, elf::FileHeader64, R>; -/// An iterator over the symbols of an `ElfFile`. +/// An iterator for the symbols in an [`ElfFile`](super::ElfFile). pub struct ElfSymbolIterator<'data, 'file, Elf, R = &'data [u8]> where Elf: FileHeader, R: ReadRef<'data>, { - pub(super) endian: Elf::Endian, - pub(super) symbols: &'file SymbolTable<'data, Elf, R>, - pub(super) index: usize, + endian: Elf::Endian, + symbols: &'file SymbolTable<'data, Elf, R>, + index: SymbolIndex, +} + +impl<'data, 'file, Elf, R> ElfSymbolIterator<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + pub(super) fn new(endian: Elf::Endian, symbols: &'file SymbolTable<'data, Elf, R>) -> Self { + ElfSymbolIterator { + endian, + symbols, + index: SymbolIndex(1), + } + } } impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> fmt::Debug @@ -278,25 +308,27 @@ impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> Iterator fn next(&mut self) -> Option { let index = self.index; - let symbol = self.symbols.symbols.get(index)?; - self.index += 1; + let symbol = self.symbols.symbols.get(index.0)?; + self.index.0 += 1; Some(ElfSymbol { endian: self.endian, symbols: self.symbols, - index: SymbolIndex(index), + index, symbol, }) } } -/// A symbol of an `ElfFile32`. +/// A symbol in an [`ElfFile32`](super::ElfFile32). pub type ElfSymbol32<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSymbol<'data, 'file, elf::FileHeader32, R>; -/// A symbol of an `ElfFile64`. +/// A symbol in an [`ElfFile64`](super::ElfFile64). pub type ElfSymbol64<'data, 'file, Endian = Endianness, R = &'data [u8]> = ElfSymbol<'data, 'file, elf::FileHeader64, R>; -/// A symbol of an `ElfFile`. +/// A symbol in an [`ElfFile`](super::ElfFile). +/// +/// Most functionality is provided by the [`ObjectSymbol`] trait implementation. #[derive(Debug, Clone, Copy)] pub struct ElfSymbol<'data, 'file, Elf, R = &'data [u8]> where @@ -310,11 +342,22 @@ where } impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ElfSymbol<'data, 'file, Elf, R> { + /// Get the endianness of the ELF file. + pub fn endian(&self) -> Elf::Endian { + self.endian + } + /// Return a reference to the raw symbol structure. #[inline] + #[deprecated(note = "Use `elf_symbol` instead")] pub fn raw_symbol(&self) -> &'data Elf::Sym { self.symbol } + + /// Get the raw ELF symbol structure. + pub fn elf_symbol(&self) -> &'data Elf::Sym { + self.symbol + } } impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> read::private::Sealed @@ -353,8 +396,7 @@ impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> fn kind(&self) -> SymbolKind { match self.symbol.st_type() { - elf::STT_NOTYPE if self.index.0 == 0 => SymbolKind::Null, - elf::STT_NOTYPE => SymbolKind::Label, + elf::STT_NOTYPE => SymbolKind::Unknown, elf::STT_OBJECT | elf::STT_COMMON => SymbolKind::Data, elf::STT_FUNC | elf::STT_GNU_IFUNC => SymbolKind::Text, elf::STT_SECTION => SymbolKind::Section, @@ -375,7 +417,8 @@ impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> } } elf::SHN_COMMON => SymbolSection::Common, - elf::SHN_XINDEX => match self.symbols.shndx(self.endian, self.index.0) { + elf::SHN_XINDEX => match self.symbols.shndx(self.endian, self.index) { + Some(0) => SymbolSection::None, Some(index) => SymbolSection::Section(SectionIndex(index as usize)), None => SymbolSection::Unknown, }, @@ -388,7 +431,7 @@ impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> #[inline] fn is_undefined(&self) -> bool { - self.symbol.st_shndx(self.endian) == elf::SHN_UNDEF + self.symbol.is_undefined(self.endian) } #[inline] @@ -398,12 +441,12 @@ impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> #[inline] fn is_common(&self) -> bool { - self.symbol.st_shndx(self.endian) == elf::SHN_COMMON + self.symbol.is_common(self.endian) } #[inline] fn is_weak(&self) -> bool { - self.symbol.st_bind() == elf::STB_WEAK + self.symbol.is_weak() } fn scope(&self) -> SymbolScope { @@ -426,12 +469,12 @@ impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> #[inline] fn is_global(&self) -> bool { - self.symbol.st_bind() != elf::STB_LOCAL + !self.symbol.is_local() } #[inline] fn is_local(&self) -> bool { - self.symbol.st_bind() == elf::STB_LOCAL + self.symbol.is_local() } #[inline] @@ -443,7 +486,7 @@ impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> } } -/// A trait for generic access to `Sym32` and `Sym64`. +/// A trait for generic access to [`elf::Sym32`] and [`elf::Sym64`]. #[allow(missing_docs)] pub trait Sym: Debug + Pod { type Word: Into; @@ -470,7 +513,7 @@ pub trait Sym: Debug + Pod { .read_error("Invalid ELF symbol name offset") } - /// Return true if the symbol is undefined. + /// Return true if the symbol section is `SHN_UNDEF`. #[inline] fn is_undefined(&self, endian: Self::Endian) -> bool { self.st_shndx(endian) == elf::SHN_UNDEF @@ -478,9 +521,35 @@ pub trait Sym: Debug + Pod { /// Return true if the symbol is a definition of a function or data object. fn is_definition(&self, endian: Self::Endian) -> bool { - let st_type = self.st_type(); - (st_type == elf::STT_NOTYPE || st_type == elf::STT_FUNC || st_type == elf::STT_OBJECT) - && self.st_shndx(endian) != elf::SHN_UNDEF + let shndx = self.st_shndx(endian); + if shndx == elf::SHN_UNDEF || (shndx >= elf::SHN_LORESERVE && shndx != elf::SHN_XINDEX) { + return false; + } + match self.st_type() { + elf::STT_NOTYPE => self.st_size(endian).into() != 0, + elf::STT_FUNC | elf::STT_OBJECT => true, + _ => false, + } + } + + /// Return true if the symbol section is `SHN_COMMON`. + fn is_common(&self, endian: Self::Endian) -> bool { + self.st_shndx(endian) == elf::SHN_COMMON + } + + /// Return true if the symbol section is `SHN_ABS`. + fn is_absolute(&self, endian: Self::Endian) -> bool { + self.st_shndx(endian) == elf::SHN_ABS + } + + /// Return true if the symbol binding is `STB_LOCAL`. + fn is_local(&self) -> bool { + self.st_bind() == elf::STB_LOCAL + } + + /// Return true if the symbol binding is `STB_WEAK`. + fn is_weak(&self) -> bool { + self.st_bind() == elf::STB_WEAK } } diff --git a/third_party/rust/object/src/read/elf/version.rs b/third_party/rust/object/src/read/elf/version.rs index cc87bbef1c8d..d0bd504e6e67 100644 --- a/third_party/rust/object/src/read/elf/version.rs +++ b/third_party/rust/object/src/read/elf/version.rs @@ -1,6 +1,6 @@ use alloc::vec::Vec; -use crate::read::{Bytes, ReadError, ReadRef, Result, StringTable}; +use crate::read::{Bytes, ReadError, ReadRef, Result, StringTable, SymbolIndex}; use crate::{elf, endian}; use super::FileHeader; @@ -33,13 +33,14 @@ impl VersionIndex { /// A version definition or requirement. /// -/// This is derived from entries in the `SHT_GNU_verdef` and `SHT_GNU_verneed` sections. +/// This is derived from entries in the [`elf::SHT_GNU_VERDEF`] and [`elf::SHT_GNU_VERNEED`] sections. #[derive(Debug, Default, Clone, Copy)] pub struct Version<'data> { name: &'data [u8], hash: u32, // Used to keep track of valid indices in `VersionTable`. valid: bool, + file: Option<&'data [u8]>, } impl<'data> Version<'data> { @@ -52,13 +53,24 @@ impl<'data> Version<'data> { pub fn hash(&self) -> u32 { self.hash } + + /// Return the filename of the library containing this version. + /// + /// This is the `vn_file` field of the associated entry in [`elf::SHT_GNU_VERNEED`]. + /// or `None` if the version info was parsed from a [`elf::SHT_GNU_VERDEF`] section. + pub fn file(&self) -> Option<&'data [u8]> { + self.file + } } /// A table of version definitions and requirements. /// /// It allows looking up the version information for a given symbol index. /// -/// This is derived from entries in the `SHT_GNU_versym`, `SHT_GNU_verdef` and `SHT_GNU_verneed` sections. +/// This is derived from entries in the [`elf::SHT_GNU_VERSYM`], [`elf::SHT_GNU_VERDEF`] +/// and [`elf::SHT_GNU_VERNEED`] sections. +/// +/// Returned by [`SectionTable::versions`](super::SectionTable::versions). #[derive(Debug, Clone)] pub struct VersionTable<'data, Elf: FileHeader> { symbols: &'data [elf::Versym], @@ -125,12 +137,13 @@ impl<'data, Elf: FileHeader> VersionTable<'data, Elf> { name: verdaux.name(endian, strings)?, hash: verdef.vd_hash.get(endian), valid: true, + file: None, }; } } } if let Some(mut verneeds) = verneeds { - while let Some((_, mut vernauxs)) = verneeds.next()? { + while let Some((verneed, mut vernauxs)) = verneeds.next()? { while let Some(vernaux) = vernauxs.next()? { let index = vernaux.vna_other.get(endian) & elf::VERSYM_VERSION; if index <= elf::VER_NDX_GLOBAL { @@ -141,6 +154,7 @@ impl<'data, Elf: FileHeader> VersionTable<'data, Elf> { name: vernaux.name(endian, strings)?, hash: vernaux.vna_hash.get(endian), valid: true, + file: Some(verneed.file(endian, strings)?), }; } } @@ -158,8 +172,8 @@ impl<'data, Elf: FileHeader> VersionTable<'data, Elf> { } /// Return version index for a given symbol index. - pub fn version_index(&self, endian: Elf::Endian, index: usize) -> VersionIndex { - let version_index = match self.symbols.get(index) { + pub fn version_index(&self, endian: Elf::Endian, index: SymbolIndex) -> VersionIndex { + let version_index = match self.symbols.get(index.0) { Some(x) => x.0.get(endian), // Ideally this would be VER_NDX_LOCAL for undefined symbols, // but currently there are no checks that need this distinction. @@ -188,7 +202,12 @@ impl<'data, Elf: FileHeader> VersionTable<'data, Elf> { /// Returns false for any error. /// /// Note: this function hasn't been fully tested and is likely to be incomplete. - pub fn matches(&self, endian: Elf::Endian, index: usize, need: Option<&Version<'_>>) -> bool { + pub fn matches( + &self, + endian: Elf::Endian, + index: SymbolIndex, + need: Option<&Version<'_>>, + ) -> bool { let version_index = self.version_index(endian, index); let def = match self.version(version_index) { Ok(def) => def, @@ -210,7 +229,7 @@ impl<'data, Elf: FileHeader> VersionTable<'data, Elf> { } } -/// An iterator over the entries in an ELF `SHT_GNU_verdef` section. +/// An iterator for the entries in an ELF [`elf::SHT_GNU_VERDEF`] section. #[derive(Debug, Clone)] pub struct VerdefIterator<'data, Elf: FileHeader> { endian: Elf::Endian, @@ -257,7 +276,7 @@ impl<'data, Elf: FileHeader> VerdefIterator<'data, Elf> { } } -/// An iterator over the auxiliary records for an entry in an ELF `SHT_GNU_verdef` section. +/// An iterator for the auxiliary records for an entry in an ELF [`elf::SHT_GNU_VERDEF`] section. #[derive(Debug, Clone)] pub struct VerdauxIterator<'data, Elf: FileHeader> { endian: Elf::Endian, @@ -293,7 +312,7 @@ impl<'data, Elf: FileHeader> VerdauxIterator<'data, Elf> { } } -/// An iterator over the entries in an ELF `SHT_GNU_verneed` section. +/// An iterator for the entries in an ELF [`elf::SHT_GNU_VERNEED`] section. #[derive(Debug, Clone)] pub struct VerneedIterator<'data, Elf: FileHeader> { endian: Elf::Endian, @@ -345,7 +364,7 @@ impl<'data, Elf: FileHeader> VerneedIterator<'data, Elf> { } } -/// An iterator over the auxiliary records for an entry in an ELF `SHT_GNU_verneed` section. +/// An iterator for the auxiliary records for an entry in an ELF [`elf::SHT_GNU_VERNEED`] section. #[derive(Debug, Clone)] pub struct VernauxIterator<'data, Elf: FileHeader> { endian: Elf::Endian, diff --git a/third_party/rust/object/src/read/gnu_compression.rs b/third_party/rust/object/src/read/gnu_compression.rs new file mode 100644 index 000000000000..7ef7d91e372c --- /dev/null +++ b/third_party/rust/object/src/read/gnu_compression.rs @@ -0,0 +1,36 @@ +use crate::read::{self, Error, ReadError as _}; +use crate::{endian, CompressedFileRange, CompressionFormat, ReadRef, U32Bytes}; + +// Attempt to parse the the CompressedFileRange for a section using the GNU-style +// inline compression header format. This is used by the Go compiler in Mach-O files +// as well as by the GNU linker in some ELF files. +pub(super) fn compressed_file_range<'data, R: ReadRef<'data>>( + file_data: R, + section_offset: u64, + section_size: u64, +) -> read::Result { + let mut offset = section_offset; + // Assume ZLIB-style uncompressed data is no more than 4GB to avoid accidentally + // huge allocations. This also reduces the chance of accidentally matching on a + // .debug_str that happens to start with "ZLIB". + let header = file_data + .read_bytes(&mut offset, 8) + .read_error("GNU compressed section is too short")?; + if header != b"ZLIB\0\0\0\0" { + return Err(Error("Invalid GNU compressed section header")); + } + let uncompressed_size = file_data + .read::>(&mut offset) + .read_error("GNU compressed section is too short")? + .get(endian::BigEndian) + .into(); + let compressed_size = section_size + .checked_sub(offset - section_offset) + .read_error("GNU compressed section is too short")?; + Ok(CompressedFileRange { + format: CompressionFormat::Zlib, + offset, + compressed_size, + uncompressed_size, + }) +} diff --git a/third_party/rust/object/src/read/macho/dyld_cache.rs b/third_party/rust/object/src/read/macho/dyld_cache.rs index 68f27f54917e..6375a36915c8 100644 --- a/third_party/rust/object/src/read/macho/dyld_cache.rs +++ b/third_party/rust/object/src/read/macho/dyld_cache.rs @@ -1,8 +1,9 @@ use alloc::vec::Vec; use core::slice; -use crate::read::{Error, File, ReadError, ReadRef, Result}; -use crate::{macho, Architecture, Endian, Endianness}; +use crate::endian::{Endian, Endianness}; +use crate::macho; +use crate::read::{Architecture, Error, File, ReadError, ReadRef, Result}; /// A parsed representation of the dyld shared cache. #[derive(Debug)] @@ -30,8 +31,25 @@ where mappings: &'data [macho::DyldCacheMappingInfo], } -// This is the offset of the images_across_all_subcaches_count field. -const MIN_HEADER_SIZE_SUBCACHES: u32 = 0x1c4; +/// A slice of structs describing each subcache. The struct gained +/// an additional field (the file suffix) in dyld-1042.1 (macOS 13 / iOS 16), +/// so this is an enum of the two possible slice types. +#[derive(Debug, Clone, Copy)] +#[non_exhaustive] +pub enum DyldSubCacheSlice<'data, E: Endian> { + /// V1, used between dyld-940 and dyld-1042.1. + V1(&'data [macho::DyldSubCacheEntryV1]), + /// V2, used since dyld-1042.1. + V2(&'data [macho::DyldSubCacheEntryV2]), +} + +// This is the offset of the end of the images_across_all_subcaches_count field. +const MIN_HEADER_SIZE_SUBCACHES_V1: u32 = 0x1c8; + +// This is the offset of the end of the cacheSubType field. +// This field comes right after the images_across_all_subcaches_count field, +// and we don't currently have it in our definition of the DyldCacheHeader type. +const MIN_HEADER_SIZE_SUBCACHES_V2: u32 = 0x1d0; impl<'data, E, R> DyldCache<'data, E, R> where @@ -39,19 +57,24 @@ where R: ReadRef<'data>, { /// Parse the raw dyld shared cache data. + /// /// For shared caches from macOS 12 / iOS 15 and above, the subcache files need to be - /// supplied as well, in the correct order, with the .symbols subcache last (if present). - /// For example, data would be the data for dyld_shared_cache_x86_64, - /// and subcache_data would be the data for [dyld_shared_cache_x86_64.1, dyld_shared_cache_x86_64.2, ...] + /// supplied as well, in the correct order, with the `.symbols` subcache last (if present). + /// For example, `data` would be the data for `dyld_shared_cache_x86_64`, + /// and `subcache_data` would be the data for `[dyld_shared_cache_x86_64.1, dyld_shared_cache_x86_64.2, ...]`. pub fn parse(data: R, subcache_data: &[R]) -> Result { let header = macho::DyldCacheHeader::parse(data)?; let (arch, endian) = header.parse_magic()?; let mappings = header.mappings(endian, data)?; let symbols_subcache_uuid = header.symbols_subcache_uuid(endian); - let subcaches_info = header.subcaches(endian, data)?.unwrap_or(&[]); - - if subcache_data.len() != subcaches_info.len() + symbols_subcache_uuid.is_some() as usize { + let subcaches_info = header.subcaches(endian, data)?; + let subcaches_count = match subcaches_info { + Some(DyldSubCacheSlice::V1(subcaches)) => subcaches.len(), + Some(DyldSubCacheSlice::V2(subcaches)) => subcaches.len(), + None => 0, + }; + if subcache_data.len() != subcaches_count + symbols_subcache_uuid.is_some() as usize { return Err(Error("Incorrect number of SubCaches")); } @@ -64,15 +87,22 @@ where (None, subcache_data) }; - // Read the regular SubCaches (.1, .2, ...), if present. + // Read the regular SubCaches, if present. let mut subcaches = Vec::new(); - for (&data, info) in subcache_data.iter().zip(subcaches_info.iter()) { - let sc_header = macho::DyldCacheHeader::::parse(data)?; - if sc_header.uuid != info.uuid { - return Err(Error("Unexpected SubCache UUID")); + if let Some(subcaches_info) = subcaches_info { + let (v1, v2) = match subcaches_info { + DyldSubCacheSlice::V1(s) => (s, &[][..]), + DyldSubCacheSlice::V2(s) => (&[][..], s), + }; + let uuids = v1.iter().map(|e| &e.uuid).chain(v2.iter().map(|e| &e.uuid)); + for (&data, uuid) in subcache_data.iter().zip(uuids) { + let sc_header = macho::DyldCacheHeader::::parse(data)?; + if &sc_header.uuid != uuid { + return Err(Error("Unexpected SubCache UUID")); + } + let mappings = sc_header.mappings(endian, data)?; + subcaches.push(DyldSubCache { data, mappings }); } - let mappings = sc_header.mappings(endian, data)?; - subcaches.push(DyldSubCache { data, mappings }); } // Read the .symbols SubCache, if present. @@ -253,19 +283,30 @@ impl macho::DyldCacheHeader { } /// Return the information about subcaches, if present. + /// + /// Returns `None` for dyld caches produced before dyld-940 (macOS 12). pub fn subcaches<'data, R: ReadRef<'data>>( &self, endian: E, data: R, - ) -> Result]>> { - if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + ) -> Result>> { + let header_size = self.mapping_offset.get(endian); + if header_size >= MIN_HEADER_SIZE_SUBCACHES_V2 { let subcaches = data - .read_slice_at::>( + .read_slice_at::>( self.subcaches_offset.get(endian).into(), self.subcaches_count.get(endian) as usize, ) .read_error("Invalid dyld subcaches size or alignment")?; - Ok(Some(subcaches)) + Ok(Some(DyldSubCacheSlice::V2(subcaches))) + } else if header_size >= MIN_HEADER_SIZE_SUBCACHES_V1 { + let subcaches = data + .read_slice_at::>( + self.subcaches_offset.get(endian).into(), + self.subcaches_count.get(endian) as usize, + ) + .read_error("Invalid dyld subcaches size or alignment")?; + Ok(Some(DyldSubCacheSlice::V1(subcaches))) } else { Ok(None) } @@ -273,7 +314,7 @@ impl macho::DyldCacheHeader { /// Return the UUID for the .symbols subcache, if present. pub fn symbols_subcache_uuid(&self, endian: E) -> Option<[u8; 16]> { - if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES_V1 { let uuid = self.symbols_subcache_uuid; if uuid != [0; 16] { return Some(uuid); @@ -288,7 +329,7 @@ impl macho::DyldCacheHeader { endian: E, data: R, ) -> Result<&'data [macho::DyldCacheImageInfo]> { - if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES_V1 { data.read_slice_at::>( self.images_across_all_subcaches_offset.get(endian).into(), self.images_across_all_subcaches_count.get(endian) as usize, diff --git a/third_party/rust/object/src/read/macho/fat.rs b/third_party/rust/object/src/read/macho/fat.rs index d4301b7e1166..9a049d9808fc 100644 --- a/third_party/rust/object/src/read/macho/fat.rs +++ b/third_party/rust/object/src/read/macho/fat.rs @@ -1,48 +1,64 @@ +use crate::endian::BigEndian; +use crate::macho; +use crate::pod::Pod; use crate::read::{Architecture, Error, ReadError, ReadRef, Result}; -use crate::{macho, BigEndian, Pod}; pub use macho::{FatArch32, FatArch64, FatHeader}; -impl FatHeader { - /// Attempt to parse a fat header. - /// - /// Does not validate the magic value. - pub fn parse<'data, R: ReadRef<'data>>(file: R) -> Result<&'data FatHeader> { - file.read_at::(0) - .read_error("Invalid fat header size or alignment") - } +/// A 32-bit Mach-O universal binary. +/// +/// This is a file that starts with [`macho::FatHeader`], and corresponds +/// to [`crate::FileKind::MachOFat32`]. +pub type MachOFatFile32<'data> = MachOFatFile<'data, macho::FatArch32>; - /// Attempt to parse a fat header and 32-bit fat arches. - pub fn parse_arch32<'data, R: ReadRef<'data>>(file: R) -> Result<&'data [FatArch32]> { +/// A 64-bit Mach-O universal binary. +/// +/// This is a file that starts with [`macho::FatHeader`], and corresponds +/// to [`crate::FileKind::MachOFat64`]. +pub type MachOFatFile64<'data> = MachOFatFile<'data, macho::FatArch64>; + +/// A Mach-O universal binary. +/// +/// This is a file that starts with [`macho::FatHeader`], and corresponds +/// to [`crate::FileKind::MachOFat32`] or [`crate::FileKind::MachOFat64`]. +#[derive(Debug, Clone)] +pub struct MachOFatFile<'data, Fat: FatArch> { + header: &'data macho::FatHeader, + arches: &'data [Fat], +} + +impl<'data, Fat: FatArch> MachOFatFile<'data, Fat> { + /// Attempt to parse the fat header and fat arches. + pub fn parse>(data: R) -> Result { let mut offset = 0; - let header = file + let header = data .read::(&mut offset) .read_error("Invalid fat header size or alignment")?; - if header.magic.get(BigEndian) != macho::FAT_MAGIC { - return Err(Error("Invalid 32-bit fat magic")); + if header.magic.get(BigEndian) != Fat::MAGIC { + return Err(Error("Invalid fat magic")); } - file.read_slice::(&mut offset, header.nfat_arch.get(BigEndian) as usize) - .read_error("Invalid nfat_arch") + let arches = data + .read_slice::(&mut offset, header.nfat_arch.get(BigEndian) as usize) + .read_error("Invalid nfat_arch")?; + Ok(MachOFatFile { header, arches }) } - /// Attempt to parse a fat header and 64-bit fat arches. - pub fn parse_arch64<'data, R: ReadRef<'data>>(file: R) -> Result<&'data [FatArch64]> { - let mut offset = 0; - let header = file - .read::(&mut offset) - .read_error("Invalid fat header size or alignment")?; - if header.magic.get(BigEndian) != macho::FAT_MAGIC_64 { - return Err(Error("Invalid 64-bit fat magic")); - } - file.read_slice::(&mut offset, header.nfat_arch.get(BigEndian) as usize) - .read_error("Invalid nfat_arch") + /// Return the fat header + pub fn header(&self) -> &'data macho::FatHeader { + self.header + } + + /// Return the array of fat arches. + pub fn arches(&self) -> &'data [Fat] { + self.arches } } -/// A trait for generic access to `FatArch32` and `FatArch64`. +/// A trait for generic access to [`macho::FatArch32`] and [`macho::FatArch64`]. #[allow(missing_docs)] pub trait FatArch: Pod { type Word: Into; + const MAGIC: u32; fn cputype(&self) -> u32; fn cpusubtype(&self) -> u32; @@ -75,6 +91,7 @@ pub trait FatArch: Pod { impl FatArch for FatArch32 { type Word = u32; + const MAGIC: u32 = macho::FAT_MAGIC; fn cputype(&self) -> u32 { self.cputype.get(BigEndian) @@ -99,6 +116,7 @@ impl FatArch for FatArch32 { impl FatArch for FatArch64 { type Word = u64; + const MAGIC: u32 = macho::FAT_MAGIC_64; fn cputype(&self) -> u32 { self.cputype.get(BigEndian) diff --git a/third_party/rust/object/src/read/macho/file.rs b/third_party/rust/object/src/read/macho/file.rs index 368c28bbd22d..d3721cd127f3 100644 --- a/third_party/rust/object/src/read/macho/file.rs +++ b/third_party/rust/object/src/read/macho/file.rs @@ -2,12 +2,14 @@ use alloc::vec::Vec; use core::fmt::Debug; use core::{mem, str}; +use crate::endian::{self, BigEndian, Endian, Endianness}; +use crate::macho; +use crate::pod::Pod; use crate::read::{ - self, Architecture, ComdatKind, Error, Export, FileFlags, Import, NoDynamicRelocationIterator, - Object, ObjectComdat, ObjectKind, ObjectMap, ObjectSection, ReadError, ReadRef, Result, - SectionIndex, SymbolIndex, + self, Architecture, ByteString, ComdatKind, Error, Export, FileFlags, Import, + NoDynamicRelocationIterator, Object, ObjectComdat, ObjectKind, ObjectMap, ObjectSection, + ReadError, ReadRef, Result, SectionIndex, SubArchitecture, SymbolIndex, }; -use crate::{endian, macho, BigEndian, ByteString, Endian, Endianness, Pod}; use super::{ DyldCacheImage, LoadCommandIterator, MachOSection, MachOSectionInternal, MachOSectionIterator, @@ -16,15 +18,21 @@ use super::{ }; /// A 32-bit Mach-O object file. +/// +/// This is a file that starts with [`macho::MachHeader32`], and corresponds +/// to [`crate::FileKind::MachO32`]. pub type MachOFile32<'data, Endian = Endianness, R = &'data [u8]> = MachOFile<'data, macho::MachHeader32, R>; /// A 64-bit Mach-O object file. +/// +/// This is a file that starts with [`macho::MachHeader64`], and corresponds +/// to [`crate::FileKind::MachO64`]. pub type MachOFile64<'data, Endian = Endianness, R = &'data [u8]> = MachOFile<'data, macho::MachHeader64, R>; /// A partially parsed Mach-O file. /// -/// Most of the functionality of this type is provided by the `Object` trait implementation. +/// Most of the functionality of this type is provided by the [`Object`] trait implementation. #[derive(Debug)] pub struct MachOFile<'data, Mach, R = &'data [u8]> where @@ -36,7 +44,7 @@ where pub(super) header_offset: u64, pub(super) header: &'data Mach, pub(super) segments: Vec>, - pub(super) sections: Vec>, + pub(super) sections: Vec>, pub(super) symbols: SymbolTable<'data, Mach, R>, } @@ -57,11 +65,10 @@ where if let Ok(mut commands) = header.load_commands(endian, data, 0) { while let Ok(Some(command)) = commands.next() { if let Some((segment, section_data)) = Mach::Segment::from_command(command)? { - let segment_index = segments.len(); segments.push(MachOSegmentInternal { segment, data }); for section in segment.sections(endian, section_data)? { let index = SectionIndex(sections.len() + 1); - sections.push(MachOSectionInternal::parse(index, segment_index, section)); + sections.push(MachOSectionInternal::parse(index, section, data)); } } else if let Some(symtab) = command.symtab()? { symbols = symtab.symbols(endian, data)?; @@ -102,6 +109,7 @@ where if let Some((segment, section_data)) = Mach::Segment::from_command(command)? { // Each segment can be stored in a different subcache. Get the segment's // address and look it up in the cache mappings, to find the correct cache data. + // This was observed for the arm64e __LINKEDIT segment in macOS 12.0.1. let addr = segment.vmaddr(endian).into(); let (data, _offset) = image .cache @@ -110,12 +118,11 @@ where if segment.name() == macho::SEG_LINKEDIT.as_bytes() { linkedit_data = Some(data); } - let segment_index = segments.len(); segments.push(MachOSegmentInternal { segment, data }); for section in segment.sections(endian, section_data)? { let index = SectionIndex(sections.len() + 1); - sections.push(MachOSectionInternal::parse(index, segment_index, section)); + sections.push(MachOSectionInternal::parse(index, section, data)); } } else if let Some(st) = command.symtab()? { symtab = Some(st); @@ -146,7 +153,7 @@ where pub(super) fn section_internal( &self, index: SectionIndex, - ) -> Result<&MachOSectionInternal<'data, Mach>> { + ) -> Result<&MachOSectionInternal<'data, Mach, R>> { index .0 .checked_sub(1) @@ -154,13 +161,51 @@ where .read_error("Invalid Mach-O section index") } - pub(super) fn segment_internal( - &self, - index: usize, - ) -> Result<&MachOSegmentInternal<'data, Mach, R>> { - self.segments - .get(index) - .read_error("Invalid Mach-O segment index") + /// Returns the endianness. + pub fn endian(&self) -> Mach::Endian { + self.endian + } + + /// Returns the raw data. + pub fn data(&self) -> R { + self.data + } + + /// Returns the raw Mach-O file header. + #[deprecated(note = "Use `macho_header` instead")] + pub fn raw_header(&self) -> &'data Mach { + self.header + } + + /// Get the raw Mach-O file header. + pub fn macho_header(&self) -> &'data Mach { + self.header + } + + /// Get the Mach-O load commands. + pub fn macho_load_commands(&self) -> Result> { + self.header + .load_commands(self.endian, self.data, self.header_offset) + } + + /// Get the Mach-O symbol table. + /// + /// Returns an empty symbol table if the file has no symbol table. + pub fn macho_symbol_table(&self) -> &SymbolTable<'data, Mach, R> { + &self.symbols + } + + /// Return the `LC_BUILD_VERSION` load command if present. + pub fn build_version(&self) -> Result>> { + let mut commands = self + .header + .load_commands(self.endian, self.data, self.header_offset)?; + while let Some(command) = commands.next()? { + if let Some(build_version) = command.build_version()? { + return Ok(Some(build_version)); + } + } + Ok(None) } } @@ -171,22 +216,21 @@ where { } -impl<'data, 'file, Mach, R> Object<'data, 'file> for MachOFile<'data, Mach, R> +impl<'data, Mach, R> Object<'data> for MachOFile<'data, Mach, R> where - 'data: 'file, Mach: MachHeader, - R: 'file + ReadRef<'data>, + R: ReadRef<'data>, { - type Segment = MachOSegment<'data, 'file, Mach, R>; - type SegmentIterator = MachOSegmentIterator<'data, 'file, Mach, R>; - type Section = MachOSection<'data, 'file, Mach, R>; - type SectionIterator = MachOSectionIterator<'data, 'file, Mach, R>; - type Comdat = MachOComdat<'data, 'file, Mach, R>; - type ComdatIterator = MachOComdatIterator<'data, 'file, Mach, R>; - type Symbol = MachOSymbol<'data, 'file, Mach, R>; - type SymbolIterator = MachOSymbolIterator<'data, 'file, Mach, R>; - type SymbolTable = MachOSymbolTable<'data, 'file, Mach, R>; - type DynamicRelocationIterator = NoDynamicRelocationIterator; + type Segment<'file> = MachOSegment<'data, 'file, Mach, R> where Self: 'file, 'data: 'file; + type SegmentIterator<'file> = MachOSegmentIterator<'data, 'file, Mach, R> where Self: 'file, 'data: 'file; + type Section<'file> = MachOSection<'data, 'file, Mach, R> where Self: 'file, 'data: 'file; + type SectionIterator<'file> = MachOSectionIterator<'data, 'file, Mach, R> where Self: 'file, 'data: 'file; + type Comdat<'file> = MachOComdat<'data, 'file, Mach, R> where Self: 'file, 'data: 'file; + type ComdatIterator<'file> = MachOComdatIterator<'data, 'file, Mach, R> where Self: 'file, 'data: 'file; + type Symbol<'file> = MachOSymbol<'data, 'file, Mach, R> where Self: 'file, 'data: 'file; + type SymbolIterator<'file> = MachOSymbolIterator<'data, 'file, Mach, R> where Self: 'file, 'data: 'file; + type SymbolTable<'file> = MachOSymbolTable<'data, 'file, Mach, R> where Self: 'file, 'data: 'file; + type DynamicRelocationIterator<'file> = NoDynamicRelocationIterator where Self: 'file, 'data: 'file; fn architecture(&self) -> Architecture { match self.header.cputype(self.endian) { @@ -202,6 +246,16 @@ where } } + fn sub_architecture(&self) -> Option { + match ( + self.header.cputype(self.endian), + self.header.cpusubtype(self.endian), + ) { + (macho::CPU_TYPE_ARM64, macho::CPU_SUBTYPE_ARM64E) => Some(SubArchitecture::Arm64E), + _ => None, + } + } + #[inline] fn is_little_endian(&self) -> bool { self.header.is_little_endian() @@ -222,49 +276,46 @@ where } } - fn segments(&'file self) -> MachOSegmentIterator<'data, 'file, Mach, R> { + fn segments(&self) -> MachOSegmentIterator<'data, '_, Mach, R> { MachOSegmentIterator { file: self, iter: self.segments.iter(), } } - fn section_by_name_bytes( + fn section_by_name_bytes<'file>( &'file self, section_name: &[u8], ) -> Option> { - // Translate the "." prefix to the "__" prefix used by OSX/Mach-O, eg - // ".debug_info" to "__debug_info", and limit to 16 bytes total. - let system_name = if section_name.starts_with(b".") { - if section_name.len() > 15 { - Some(§ion_name[1..15]) - } else { - Some(§ion_name[1..]) - } - } else { - None + // Translate the section_name by stripping the query_prefix to construct + // a function that matches names starting with name_prefix, taking into + // consideration the maximum section name length. + let make_prefix_matcher = |query_prefix: &'static [u8], name_prefix: &'static [u8]| { + const MAX_SECTION_NAME_LEN: usize = 16; + let suffix = section_name.strip_prefix(query_prefix).map(|suffix| { + let max_len = MAX_SECTION_NAME_LEN - name_prefix.len(); + &suffix[..suffix.len().min(max_len)] + }); + move |name: &[u8]| suffix.is_some() && name.strip_prefix(name_prefix) == suffix }; - let cmp_section_name = |section: &MachOSection<'data, 'file, Mach, R>| { - section - .name_bytes() - .map(|name| { - section_name == name - || system_name - .filter(|system_name| { - name.starts_with(b"__") && name[2..] == **system_name - }) - .is_some() - }) - .unwrap_or(false) - }; - - self.sections().find(cmp_section_name) + // Matches "__text" when searching for ".text" and "__debug_str_offs" + // when searching for ".debug_str_offsets", as is common in + // macOS/Mach-O. + let matches_underscores_prefix = make_prefix_matcher(b".", b"__"); + // Matches "__zdebug_info" when searching for ".debug_info" and + // "__zdebug_str_off" when searching for ".debug_str_offsets", as is + // used by Go when using GNU-style compression. + let matches_zdebug_prefix = make_prefix_matcher(b".debug_", b"__zdebug_"); + self.sections().find(|section| { + section.name_bytes().map_or(false, |name| { + name == section_name + || matches_underscores_prefix(name) + || matches_zdebug_prefix(name) + }) + }) } - fn section_by_index( - &'file self, - index: SectionIndex, - ) -> Result> { + fn section_by_index(&self, index: SectionIndex) -> Result> { let internal = *self.section_internal(index)?; Ok(MachOSection { file: self, @@ -272,50 +323,41 @@ where }) } - fn sections(&'file self) -> MachOSectionIterator<'data, 'file, Mach, R> { + fn sections(&self) -> MachOSectionIterator<'data, '_, Mach, R> { MachOSectionIterator { file: self, iter: self.sections.iter(), } } - fn comdats(&'file self) -> MachOComdatIterator<'data, 'file, Mach, R> { + fn comdats(&self) -> MachOComdatIterator<'data, '_, Mach, R> { MachOComdatIterator { file: self } } - fn symbol_by_index( - &'file self, - index: SymbolIndex, - ) -> Result> { - let nlist = self.symbols.symbol(index.0)?; + fn symbol_by_index(&self, index: SymbolIndex) -> Result> { + let nlist = self.symbols.symbol(index)?; MachOSymbol::new(self, index, nlist).read_error("Unsupported Mach-O symbol index") } - fn symbols(&'file self) -> MachOSymbolIterator<'data, 'file, Mach, R> { - MachOSymbolIterator { - file: self, - index: 0, - } + fn symbols(&self) -> MachOSymbolIterator<'data, '_, Mach, R> { + MachOSymbolIterator::new(self) } #[inline] - fn symbol_table(&'file self) -> Option> { + fn symbol_table(&self) -> Option> { Some(MachOSymbolTable { file: self }) } - fn dynamic_symbols(&'file self) -> MachOSymbolIterator<'data, 'file, Mach, R> { - MachOSymbolIterator { - file: self, - index: self.symbols.len(), - } + fn dynamic_symbols(&self) -> MachOSymbolIterator<'data, '_, Mach, R> { + MachOSymbolIterator::empty(self) } #[inline] - fn dynamic_symbol_table(&'file self) -> Option> { + fn dynamic_symbol_table(&self) -> Option> { None } - fn object_map(&'file self) -> ObjectMap<'data> { + fn object_map(&self) -> ObjectMap<'data> { self.symbols.object_map(self.endian) } @@ -345,7 +387,7 @@ where let index = dysymtab.iundefsym.get(self.endian) as usize; let number = dysymtab.nundefsym.get(self.endian) as usize; for i in index..(index.wrapping_add(number)) { - let symbol = self.symbols.symbol(i)?; + let symbol = self.symbols.symbol(SymbolIndex(i))?; let name = symbol.name(self.endian, self.symbols.strings())?; let library = if twolevel { libraries @@ -381,7 +423,7 @@ where let index = dysymtab.iextdefsym.get(self.endian) as usize; let number = dysymtab.nextdefsym.get(self.endian) as usize; for i in index..(index.wrapping_add(number)) { - let symbol = self.symbols.symbol(i)?; + let symbol = self.symbols.symbol(SymbolIndex(i))?; let name = symbol.name(self.endian, self.symbols.strings())?; let address = symbol.n_value(self.endian).into(); exports.push(Export { @@ -394,7 +436,7 @@ where } #[inline] - fn dynamic_relocations(&'file self) -> Option { + fn dynamic_relocations(&self) -> Option { None } @@ -431,14 +473,16 @@ where } } -/// An iterator over the COMDAT section groups of a `MachOFile64`. +/// An iterator for the COMDAT section groups in a [`MachOFile64`]. pub type MachOComdatIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOComdatIterator<'data, 'file, macho::MachHeader32, R>; -/// An iterator over the COMDAT section groups of a `MachOFile64`. +/// An iterator for the COMDAT section groups in a [`MachOFile64`]. pub type MachOComdatIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOComdatIterator<'data, 'file, macho::MachHeader64, R>; -/// An iterator over the COMDAT section groups of a `MachOFile`. +/// An iterator for the COMDAT section groups in a [`MachOFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct MachOComdatIterator<'data, 'file, Mach, R = &'data [u8]> where @@ -462,15 +506,17 @@ where } } -/// A COMDAT section group of a `MachOFile32`. +/// A COMDAT section group in a [`MachOFile32`]. pub type MachOComdat32<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOComdat<'data, 'file, macho::MachHeader32, R>; -/// A COMDAT section group of a `MachOFile64`. +/// A COMDAT section group in a [`MachOFile64`]. pub type MachOComdat64<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOComdat<'data, 'file, macho::MachHeader64, R>; -/// A COMDAT section group of a `MachOFile`. +/// A COMDAT section group in a [`MachOFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct MachOComdat<'data, 'file, Mach, R = &'data [u8]> where @@ -506,12 +552,12 @@ where } #[inline] - fn name_bytes(&self) -> Result<&[u8]> { + fn name_bytes(&self) -> Result<&'data [u8]> { unreachable!(); } #[inline] - fn name(&self) -> Result<&str> { + fn name(&self) -> Result<&'data str> { unreachable!(); } @@ -521,14 +567,16 @@ where } } -/// An iterator over the sections in a COMDAT section group of a `MachOFile32`. +/// An iterator for the sections in a COMDAT section group in a [`MachOFile32`]. pub type MachOComdatSectionIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOComdatSectionIterator<'data, 'file, macho::MachHeader32, R>; -/// An iterator over the sections in a COMDAT section group of a `MachOFile64`. +/// An iterator for the sections in a COMDAT section group in a [`MachOFile64`]. pub type MachOComdatSectionIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOComdatSectionIterator<'data, 'file, macho::MachHeader64, R>; -/// An iterator over the sections in a COMDAT section group of a `MachOFile`. +/// An iterator for the sections in a COMDAT section group in a [`MachOFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct MachOComdatSectionIterator<'data, 'file, Mach, R = &'data [u8]> where @@ -551,7 +599,7 @@ where } } -/// A trait for generic access to `MachHeader32` and `MachHeader64`. +/// A trait for generic access to [`macho::MachHeader32`] and [`macho::MachHeader64`]. #[allow(missing_docs)] pub trait MachHeader: Debug + Pod { type Word: Into; diff --git a/third_party/rust/object/src/read/macho/load_command.rs b/third_party/rust/object/src/read/macho/load_command.rs index e9af89d8bf43..bc49530d0456 100644 --- a/third_party/rust/object/src/read/macho/load_command.rs +++ b/third_party/rust/object/src/read/macho/load_command.rs @@ -7,7 +7,7 @@ use crate::pod::Pod; use crate::read::macho::{MachHeader, SymbolTable}; use crate::read::{Bytes, Error, ReadError, ReadRef, Result, StringTable}; -/// An iterator over the load commands of a `MachHeader`. +/// An iterator for the load commands from a [`MachHeader`]. #[derive(Debug, Default, Clone, Copy)] pub struct LoadCommandIterator<'data, E: Endian> { endian: E, @@ -51,7 +51,7 @@ impl<'data, E: Endian> LoadCommandIterator<'data, E> { } } -/// The data for a `LoadCommand`. +/// The data for a [`macho::LoadCommand`]. #[derive(Debug, Clone, Copy)] pub struct LoadCommandData<'data, E: Endian> { cmd: u32, @@ -61,14 +61,14 @@ pub struct LoadCommandData<'data, E: Endian> { } impl<'data, E: Endian> LoadCommandData<'data, E> { - /// Return the `cmd` field of the `LoadCommand`. + /// Return the `cmd` field of the [`macho::LoadCommand`]. /// /// This is one of the `LC_` constants. pub fn cmd(&self) -> u32 { self.cmd } - /// Return the `cmdsize` field of the `LoadCommand`. + /// Return the `cmdsize` field of the [`macho::LoadCommand`]. pub fn cmdsize(&self) -> u32 { self.data.len() as u32 } @@ -81,7 +81,7 @@ impl<'data, E: Endian> LoadCommandData<'data, E> { .read_error("Invalid Mach-O command size") } - /// Raw bytes of this LoadCommand structure. + /// Raw bytes of this [`macho::LoadCommand`] structure. pub fn raw_data(&self) -> &'data [u8] { self.data.0 } @@ -163,7 +163,7 @@ impl<'data, E: Endian> LoadCommandData<'data, E> { }) } - /// Try to parse this command as a `SegmentCommand32`. + /// Try to parse this command as a [`macho::SegmentCommand32`]. /// /// Returns the segment command and the data containing the sections. pub fn segment_32(self) -> Result, &'data [u8])>> { @@ -176,7 +176,7 @@ impl<'data, E: Endian> LoadCommandData<'data, E> { } } - /// Try to parse this command as a `SymtabCommand`. + /// Try to parse this command as a [`macho::SymtabCommand`]. /// /// Returns the segment command and the data containing the sections. pub fn symtab(self) -> Result>> { @@ -187,7 +187,7 @@ impl<'data, E: Endian> LoadCommandData<'data, E> { } } - /// Try to parse this command as a `DysymtabCommand`. + /// Try to parse this command as a [`macho::DysymtabCommand`]. pub fn dysymtab(self) -> Result>> { if self.cmd == macho::LC_DYSYMTAB { Some(self.data()).transpose() @@ -196,7 +196,7 @@ impl<'data, E: Endian> LoadCommandData<'data, E> { } } - /// Try to parse this command as a `DylibCommand`. + /// Try to parse this command as a [`macho::DylibCommand`]. pub fn dylib(self) -> Result>> { if self.cmd == macho::LC_LOAD_DYLIB || self.cmd == macho::LC_LOAD_WEAK_DYLIB @@ -210,7 +210,7 @@ impl<'data, E: Endian> LoadCommandData<'data, E> { } } - /// Try to parse this command as a `UuidCommand`. + /// Try to parse this command as a [`macho::UuidCommand`]. pub fn uuid(self) -> Result>> { if self.cmd == macho::LC_UUID { Some(self.data()).transpose() @@ -219,7 +219,7 @@ impl<'data, E: Endian> LoadCommandData<'data, E> { } } - /// Try to parse this command as a `SegmentCommand64`. + /// Try to parse this command as a [`macho::SegmentCommand64`]. pub fn segment_64(self) -> Result, &'data [u8])>> { if self.cmd == macho::LC_SEGMENT_64 { let mut data = self.data; @@ -230,7 +230,7 @@ impl<'data, E: Endian> LoadCommandData<'data, E> { } } - /// Try to parse this command as a `DyldInfoCommand`. + /// Try to parse this command as a [`macho::DyldInfoCommand`]. pub fn dyld_info(self) -> Result>> { if self.cmd == macho::LC_DYLD_INFO || self.cmd == macho::LC_DYLD_INFO_ONLY { Some(self.data()).transpose() @@ -239,7 +239,7 @@ impl<'data, E: Endian> LoadCommandData<'data, E> { } } - /// Try to parse this command as an `EntryPointCommand`. + /// Try to parse this command as an [`macho::EntryPointCommand`]. pub fn entry_point(self) -> Result>> { if self.cmd == macho::LC_MAIN { Some(self.data()).transpose() @@ -247,9 +247,18 @@ impl<'data, E: Endian> LoadCommandData<'data, E> { Ok(None) } } + + /// Try to parse this command as a [`macho::BuildVersionCommand`]. + pub fn build_version(self) -> Result>> { + if self.cmd == macho::LC_BUILD_VERSION { + Some(self.data()).transpose() + } else { + Ok(None) + } + } } -/// A `LoadCommand` that has been interpreted according to its `cmd` field. +/// A [`macho::LoadCommand`] that has been interpreted according to its `cmd` field. #[derive(Debug, Clone, Copy)] #[non_exhaustive] pub enum LoadCommandVariant<'data, E: Endian> { @@ -363,11 +372,15 @@ mod tests { #[test] fn cmd_size_invalid() { - let mut commands = LoadCommandIterator::new(LittleEndian, &[0; 8], 10); + #[repr(align(16))] + struct Align([u8; N]); + let mut commands = LoadCommandIterator::new(LittleEndian, &Align([0; 8]).0, 10); assert!(commands.next().is_err()); - let mut commands = LoadCommandIterator::new(LittleEndian, &[0, 0, 0, 0, 7, 0, 0, 0, 0], 10); + let mut commands = + LoadCommandIterator::new(LittleEndian, &Align([0, 0, 0, 0, 7, 0, 0, 0, 0]).0, 10); assert!(commands.next().is_err()); - let mut commands = LoadCommandIterator::new(LittleEndian, &[0, 0, 0, 0, 8, 0, 0, 0, 0], 10); + let mut commands = + LoadCommandIterator::new(LittleEndian, &Align([0, 0, 0, 0, 8, 0, 0, 0, 0]).0, 10); assert!(commands.next().is_ok()); } } diff --git a/third_party/rust/object/src/read/macho/mod.rs b/third_party/rust/object/src/read/macho/mod.rs index f07ed581b6a9..ab51ff32ebe4 100644 --- a/third_party/rust/object/src/read/macho/mod.rs +++ b/third_party/rust/object/src/read/macho/mod.rs @@ -1,9 +1,51 @@ //! Support for reading Mach-O files. //! -//! Defines traits to abstract over the difference between 32-bit and 64-bit -//! Mach-O files, and implements read functionality in terms of these traits. +//! Traits are used to abstract over the difference between 32-bit and 64-bit Mach-O +//! files. The primary trait for this is [`MachHeader`]. //! -//! Also provides `MachOFile` and related types which implement the `Object` trait. +//! ## High level API +//! +//! [`MachOFile`] implements the [`Object`](crate::read::Object) trait for Mach-O files. +//! [`MachOFile`] is parameterised by [`MachHeader`] to allow reading both 32-bit and +//! 64-bit Mach-O files. There are type aliases for these parameters ([`MachOFile32`] and +//! [`MachOFile64`]). +//! +//! ## Low level API +//! +//! The [`MachHeader`] trait can be directly used to parse both [`macho::MachHeader32`] +//! and [`macho::MachHeader64`]. Additionally, [`FatHeader`] and the [`FatArch`] trait +//! can be used to iterate images in multi-architecture binaries, and [`DyldCache`] can +//! be used to locate images in a dyld shared cache. +//! +//! ### Example for low level API +//! ```no_run +//! use object::macho; +//! use object::read::macho::{MachHeader, Nlist}; +//! use std::error::Error; +//! use std::fs; +//! +//! /// Reads a file and displays the name of each symbol. +//! fn main() -> Result<(), Box> { +//! # #[cfg(feature = "std")] { +//! let data = fs::read("path/to/binary")?; +//! let header = macho::MachHeader64::::parse(&*data, 0)?; +//! let endian = header.endian()?; +//! let mut commands = header.load_commands(endian, &*data, 0)?; +//! while let Some(command) = commands.next()? { +//! if let Some(symtab_command) = command.symtab()? { +//! let symbols = symtab_command.symbols::, _>(endian, &*data)?; +//! for symbol in symbols.iter() { +//! let name = symbol.name(endian, symbols.strings())?; +//! println!("{}", String::from_utf8_lossy(name)); +//! } +//! } +//! } +//! # } +//! Ok(()) +//! } +//! ``` +#[cfg(doc)] +use crate::macho; mod dyld_cache; pub use dyld_cache::*; diff --git a/third_party/rust/object/src/read/macho/relocation.rs b/third_party/rust/object/src/read/macho/relocation.rs index 18e22ef7064c..30988d69d0a2 100644 --- a/third_party/rust/object/src/read/macho/relocation.rs +++ b/third_party/rust/object/src/read/macho/relocation.rs @@ -3,20 +3,20 @@ use core::{fmt, slice}; use crate::endian::Endianness; use crate::macho; use crate::read::{ - ReadRef, Relocation, RelocationEncoding, RelocationKind, RelocationTarget, SectionIndex, - SymbolIndex, + ReadRef, Relocation, RelocationEncoding, RelocationFlags, RelocationKind, RelocationTarget, + SectionIndex, SymbolIndex, }; use super::{MachHeader, MachOFile}; -/// An iterator over the relocations in a `MachOSection32`. +/// An iterator for the relocations in a [`MachOSection32`](super::MachOSection32). pub type MachORelocationIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachORelocationIterator<'data, 'file, macho::MachHeader32, R>; -/// An iterator over the relocations in a `MachOSection64`. +/// An iterator for the relocations in a [`MachOSection64`](super::MachOSection64). pub type MachORelocationIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachORelocationIterator<'data, 'file, macho::MachHeader64, R>; -/// An iterator over the relocations in a `MachOSection`. +/// An iterator for the relocations in a [`MachOSection`](super::MachOSection). pub struct MachORelocationIterator<'data, 'file, Mach, R = &'data [u8]> where Mach: MachHeader, @@ -34,6 +34,7 @@ where type Item = (u64, Relocation); fn next(&mut self) -> Option { + let mut paired_addend = 0; loop { let reloc = self.relocations.next()?; let endian = self.file.endian; @@ -44,30 +45,32 @@ where continue; } let reloc = reloc.info(self.file.endian); + let flags = RelocationFlags::MachO { + r_type: reloc.r_type, + r_pcrel: reloc.r_pcrel, + r_length: reloc.r_length, + }; let mut encoding = RelocationEncoding::Generic; let kind = match cputype { macho::CPU_TYPE_ARM => match (reloc.r_type, reloc.r_pcrel) { (macho::ARM_RELOC_VANILLA, false) => RelocationKind::Absolute, - _ => RelocationKind::MachO { - value: reloc.r_type, - relative: reloc.r_pcrel, - }, + _ => RelocationKind::Unknown, }, macho::CPU_TYPE_ARM64 | macho::CPU_TYPE_ARM64_32 => { match (reloc.r_type, reloc.r_pcrel) { (macho::ARM64_RELOC_UNSIGNED, false) => RelocationKind::Absolute, - _ => RelocationKind::MachO { - value: reloc.r_type, - relative: reloc.r_pcrel, - }, + (macho::ARM64_RELOC_ADDEND, _) => { + paired_addend = i64::from(reloc.r_symbolnum) + .wrapping_shl(64 - 24) + .wrapping_shr(64 - 24); + continue; + } + _ => RelocationKind::Unknown, } } macho::CPU_TYPE_X86 => match (reloc.r_type, reloc.r_pcrel) { (macho::GENERIC_RELOC_VANILLA, false) => RelocationKind::Absolute, - _ => RelocationKind::MachO { - value: reloc.r_type, - relative: reloc.r_pcrel, - }, + _ => RelocationKind::Unknown, }, macho::CPU_TYPE_X86_64 => match (reloc.r_type, reloc.r_pcrel) { (macho::X86_64_RELOC_UNSIGNED, false) => RelocationKind::Absolute, @@ -84,15 +87,9 @@ where encoding = RelocationEncoding::X86RipRelativeMovq; RelocationKind::GotRelative } - _ => RelocationKind::MachO { - value: reloc.r_type, - relative: reloc.r_pcrel, - }, - }, - _ => RelocationKind::MachO { - value: reloc.r_type, - relative: reloc.r_pcrel, + _ => RelocationKind::Unknown, }, + _ => RelocationKind::Unknown, }; let size = 8 << reloc.r_length; let target = if reloc.r_extern { @@ -100,7 +97,31 @@ where } else { RelocationTarget::Section(SectionIndex(reloc.r_symbolnum as usize)) }; - let addend = if reloc.r_pcrel { -4 } else { 0 }; + let implicit_addend = paired_addend == 0; + let mut addend = paired_addend; + if reloc.r_pcrel { + // For PC relative relocations on some architectures, the + // addend does not include the offset required due to the + // PC being different from the place of the relocation. + // This differs from other file formats, so adjust the + // addend here to account for this. + match cputype { + macho::CPU_TYPE_X86 => { + addend -= 1 << reloc.r_length; + } + macho::CPU_TYPE_X86_64 => { + addend -= 1 << reloc.r_length; + match reloc.r_type { + macho::X86_64_RELOC_SIGNED_1 => addend -= 1, + macho::X86_64_RELOC_SIGNED_2 => addend -= 2, + macho::X86_64_RELOC_SIGNED_4 => addend -= 4, + _ => {} + } + } + // TODO: maybe missing support for some architectures and relocations + _ => {} + } + } return Some(( reloc.r_address as u64, Relocation { @@ -109,7 +130,8 @@ where size, target, addend, - implicit_addend: true, + implicit_addend, + flags, }, )); } diff --git a/third_party/rust/object/src/read/macho/section.rs b/third_party/rust/object/src/read/macho/section.rs index f43a5b83d46e..d27a9a9f757f 100644 --- a/third_party/rust/object/src/read/macho/section.rs +++ b/third_party/rust/object/src/read/macho/section.rs @@ -5,27 +5,27 @@ use crate::endian::{self, Endianness}; use crate::macho; use crate::pod::Pod; use crate::read::{ - self, CompressedData, CompressedFileRange, ObjectSection, ReadError, ReadRef, Result, - SectionFlags, SectionIndex, SectionKind, + self, gnu_compression, CompressedData, CompressedFileRange, ObjectSection, ReadError, ReadRef, + RelocationMap, Result, SectionFlags, SectionIndex, SectionKind, }; use super::{MachHeader, MachOFile, MachORelocationIterator}; -/// An iterator over the sections of a `MachOFile32`. +/// An iterator for the sections in a [`MachOFile32`](super::MachOFile32). pub type MachOSectionIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOSectionIterator<'data, 'file, macho::MachHeader32, R>; -/// An iterator over the sections of a `MachOFile64`. +/// An iterator for the sections in a [`MachOFile64`](super::MachOFile64). pub type MachOSectionIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOSectionIterator<'data, 'file, macho::MachHeader64, R>; -/// An iterator over the sections of a `MachOFile`. +/// An iterator for the sections in a [`MachOFile`]. pub struct MachOSectionIterator<'data, 'file, Mach, R = &'data [u8]> where Mach: MachHeader, R: ReadRef<'data>, { pub(super) file: &'file MachOFile<'data, Mach, R>, - pub(super) iter: slice::Iter<'file, MachOSectionInternal<'data, Mach>>, + pub(super) iter: slice::Iter<'file, MachOSectionInternal<'data, Mach, R>>, } impl<'data, 'file, Mach, R> fmt::Debug for MachOSectionIterator<'data, 'file, Mach, R> @@ -54,14 +54,16 @@ where } } -/// A section of a `MachOFile32`. +/// A section in a [`MachOFile32`](super::MachOFile32). pub type MachOSection32<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOSection<'data, 'file, macho::MachHeader32, R>; -/// A section of a `MachOFile64`. +/// A section in a [`MachOFile64`](super::MachOFile64). pub type MachOSection64<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOSection<'data, 'file, macho::MachHeader64, R>; -/// A section of a `MachOFile`. +/// A section in a [`MachOFile`]. +/// +/// Most functionality is provided by the [`ObjectSection`] trait implementation. #[derive(Debug)] pub struct MachOSection<'data, 'file, Mach, R = &'data [u8]> where @@ -69,7 +71,7 @@ where R: ReadRef<'data>, { pub(super) file: &'file MachOFile<'data, Mach, R>, - pub(super) internal: MachOSectionInternal<'data, Mach>, + pub(super) internal: MachOSectionInternal<'data, Mach, R>, } impl<'data, 'file, Mach, R> MachOSection<'data, 'file, Mach, R> @@ -77,14 +79,44 @@ where Mach: MachHeader, R: ReadRef<'data>, { - fn bytes(&self) -> Result<&'data [u8]> { - let segment_index = self.internal.segment_index; - let segment = self.file.segment_internal(segment_index)?; + /// Get the Mach-O file containing this section. + pub fn macho_file(&self) -> &'file MachOFile<'data, Mach, R> { + self.file + } + + /// Get the raw Mach-O section structure. + pub fn macho_section(&self) -> &'data Mach::Section { + self.internal.section + } + + /// Get the raw Mach-O relocation entries. + pub fn macho_relocations(&self) -> Result<&'data [macho::Relocation]> { self.internal .section - .data(self.file.endian, segment.data) + .relocations(self.file.endian, self.internal.data) + } + + fn bytes(&self) -> Result<&'data [u8]> { + self.internal + .section + .data(self.file.endian, self.internal.data) .read_error("Invalid Mach-O section size or offset") } + + // Try GNU-style "ZLIB" header decompression. + fn maybe_compressed_gnu(&self) -> Result> { + if !self + .name() + .map_or(false, |name| name.starts_with("__zdebug_")) + { + return Ok(None); + } + let (section_offset, section_size) = self + .file_range() + .read_error("Invalid ELF GNU compressed section type")?; + gnu_compression::compressed_file_range(self.internal.data, section_offset, section_size) + .map(Some) + } } impl<'data, 'file, Mach, R> read::private::Sealed for MachOSection<'data, 'file, Mach, R> @@ -145,23 +177,25 @@ where )) } - #[inline] fn compressed_file_range(&self) -> Result { - Ok(CompressedFileRange::none(self.file_range())) + Ok(if let Some(data) = self.maybe_compressed_gnu()? { + data + } else { + CompressedFileRange::none(self.file_range()) + }) + } + + fn compressed_data(&self) -> read::Result> { + self.compressed_file_range()?.data(self.file.data) } #[inline] - fn compressed_data(&self) -> Result> { - self.data().map(CompressedData::none) - } - - #[inline] - fn name_bytes(&self) -> Result<&[u8]> { + fn name_bytes(&self) -> Result<&'data [u8]> { Ok(self.internal.section.name()) } #[inline] - fn name(&self) -> Result<&str> { + fn name(&self) -> Result<&'data str> { str::from_utf8(self.internal.section.name()) .ok() .read_error("Non UTF-8 Mach-O section name") @@ -188,15 +222,14 @@ where fn relocations(&self) -> MachORelocationIterator<'data, 'file, Mach, R> { MachORelocationIterator { file: self.file, - relocations: self - .internal - .section - .relocations(self.file.endian, self.file.data) - .unwrap_or(&[]) - .iter(), + relocations: self.macho_relocations().unwrap_or(&[]).iter(), } } + fn relocation_map(&self) -> read::Result { + RelocationMap::new(self.file, self) + } + fn flags(&self) -> SectionFlags { SectionFlags::MachO { flags: self.internal.section.flags(self.file.endian), @@ -205,19 +238,19 @@ where } #[derive(Debug, Clone, Copy)] -pub(super) struct MachOSectionInternal<'data, Mach: MachHeader> { +pub(super) struct MachOSectionInternal<'data, Mach: MachHeader, R: ReadRef<'data>> { pub index: SectionIndex, - pub segment_index: usize, pub kind: SectionKind, pub section: &'data Mach::Section, + /// The data for the file that contains the section data. + /// + /// This is required for dyld caches, where this may be a different subcache + /// from the file containing the Mach-O load commands. + pub data: R, } -impl<'data, Mach: MachHeader> MachOSectionInternal<'data, Mach> { - pub(super) fn parse( - index: SectionIndex, - segment_index: usize, - section: &'data Mach::Section, - ) -> Self { +impl<'data, Mach: MachHeader, R: ReadRef<'data>> MachOSectionInternal<'data, Mach, R> { + pub(super) fn parse(index: SectionIndex, section: &'data Mach::Section, data: R) -> Self { // TODO: we don't validate flags, should we? let kind = match (section.segment_name(), section.name()) { (b"__TEXT", b"__text") => SectionKind::Text, @@ -240,14 +273,14 @@ impl<'data, Mach: MachHeader> MachOSectionInternal<'data, Mach> { }; MachOSectionInternal { index, - segment_index, kind, section, + data, } } } -/// A trait for generic access to `Section32` and `Section64`. +/// A trait for generic access to [`macho::Section32`] and [`macho::Section64`]. #[allow(missing_docs)] pub trait Section: Debug + Pod { type Word: Into; diff --git a/third_party/rust/object/src/read/macho/segment.rs b/third_party/rust/object/src/read/macho/segment.rs index 01037e1dd415..f704c5c987ed 100644 --- a/third_party/rust/object/src/read/macho/segment.rs +++ b/third_party/rust/object/src/read/macho/segment.rs @@ -8,14 +8,14 @@ use crate::read::{self, ObjectSegment, ReadError, ReadRef, Result, SegmentFlags} use super::{LoadCommandData, MachHeader, MachOFile, Section}; -/// An iterator over the segments of a `MachOFile32`. +/// An iterator for the segments in a [`MachOFile32`](super::MachOFile32). pub type MachOSegmentIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOSegmentIterator<'data, 'file, macho::MachHeader32, R>; -/// An iterator over the segments of a `MachOFile64`. +/// An iterator for the segments in a [`MachOFile64`](super::MachOFile64). pub type MachOSegmentIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOSegmentIterator<'data, 'file, macho::MachHeader64, R>; -/// An iterator over the segments of a `MachOFile`. +/// An iterator for the segments in a [`MachOFile`]. #[derive(Debug)] pub struct MachOSegmentIterator<'data, 'file, Mach, R = &'data [u8]> where @@ -41,14 +41,16 @@ where } } -/// A segment of a `MachOFile32`. +/// A segment in a [`MachOFile32`](super::MachOFile32). pub type MachOSegment32<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOSegment<'data, 'file, macho::MachHeader32, R>; -/// A segment of a `MachOFile64`. +/// A segment in a [`MachOFile64`](super::MachOFile64). pub type MachOSegment64<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOSegment<'data, 'file, macho::MachHeader64, R>; -/// A segment of a `MachOFile`. +/// A segment in a [`MachOFile`]. +/// +/// Most functionality is provided by the [`ObjectSegment`] trait implementation. #[derive(Debug)] pub struct MachOSegment<'data, 'file, Mach, R = &'data [u8]> where @@ -64,10 +66,20 @@ where Mach: MachHeader, R: ReadRef<'data>, { + /// Get the Mach-O file containing this segment. + pub fn macho_file(&self) -> &'file MachOFile<'data, Mach, R> { + self.file + } + + /// Get the raw Mach-O segment structure. + pub fn macho_segment(&self) -> &'data Mach::Segment { + self.internal.segment + } + fn bytes(&self) -> Result<&'data [u8]> { self.internal .segment - .data(self.file.endian, self.file.data) + .data(self.file.endian, self.internal.data) .read_error("Invalid Mach-O segment size or offset") } } @@ -147,11 +159,15 @@ where #[derive(Debug, Clone, Copy)] pub(super) struct MachOSegmentInternal<'data, Mach: MachHeader, R: ReadRef<'data>> { - pub data: R, pub segment: &'data Mach::Segment, + /// The data for the file that contains the segment data. + /// + /// This is required for dyld caches, where this may be a different subcache + /// from the file containing the Mach-O load commands. + pub data: R, } -/// A trait for generic access to `SegmentCommand32` and `SegmentCommand64`. +/// A trait for generic access to [`macho::SegmentCommand32`] and [`macho::SegmentCommand64`]. #[allow(missing_docs)] pub trait Segment: Debug + Pod { type Word: Into; diff --git a/third_party/rust/object/src/read/macho/symbol.rs b/third_party/rust/object/src/read/macho/symbol.rs index ef88521451ec..f5579fce77eb 100644 --- a/third_party/rust/object/src/read/macho/symbol.rs +++ b/third_party/rust/object/src/read/macho/symbol.rs @@ -7,9 +7,9 @@ use crate::macho; use crate::pod::Pod; use crate::read::util::StringTable; use crate::read::{ - self, ObjectMap, ObjectMapEntry, ObjectSymbol, ObjectSymbolTable, ReadError, ReadRef, Result, - SectionIndex, SectionKind, SymbolFlags, SymbolIndex, SymbolKind, SymbolMap, SymbolMapEntry, - SymbolScope, SymbolSection, + self, ObjectMap, ObjectMapEntry, ObjectMapFile, ObjectSymbol, ObjectSymbolTable, ReadError, + ReadRef, Result, SectionIndex, SectionKind, SymbolFlags, SymbolIndex, SymbolKind, SymbolMap, + SymbolMapEntry, SymbolScope, SymbolSection, }; use super::{MachHeader, MachOFile}; @@ -17,6 +17,8 @@ use super::{MachHeader, MachOFile}; /// A table of symbol entries in a Mach-O file. /// /// Also includes the string table used for the symbol names. +/// +/// Returned by [`macho::SymtabCommand::symbols`]. #[derive(Debug, Clone, Copy)] pub struct SymbolTable<'data, Mach: MachHeader, R = &'data [u8]> where @@ -66,9 +68,9 @@ impl<'data, Mach: MachHeader, R: ReadRef<'data>> SymbolTable<'data, Mach, R> { } /// Return the symbol at the given index. - pub fn symbol(&self, index: usize) -> Result<&'data Mach::Nlist> { + pub fn symbol(&self, index: SymbolIndex) -> Result<&'data Mach::Nlist> { self.symbols - .get(index) + .get(index.0) .read_error("Invalid Mach-O symbol index") } @@ -113,7 +115,20 @@ impl<'data, Mach: MachHeader, R: ReadRef<'data>> SymbolTable<'data, Mach, R> { if let Ok(name) = nlist.name(endian, self.strings) { if !name.is_empty() { object = Some(objects.len()); - objects.push(name); + // `N_OSO` symbol names can be either `/path/to/object.o` + // or `/path/to/archive.a(object.o)`. + let (path, member) = name + .split_last() + .and_then(|(last, head)| { + if *last != b')' { + return None; + } + let index = head.iter().position(|&x| x == b'(')?; + let (archive, rest) = head.split_at(index); + Some((archive, Some(&rest[1..]))) + }) + .unwrap_or((name, None)); + objects.push(ObjectMapFile::new(path, member)); } } } @@ -143,14 +158,14 @@ impl<'data, Mach: MachHeader, R: ReadRef<'data>> SymbolTable<'data, Mach, R> { } } -/// An iterator over the symbols of a `MachOFile32`. +/// A symbol table in a [`MachOFile32`](super::MachOFile32). pub type MachOSymbolTable32<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOSymbolTable<'data, 'file, macho::MachHeader32, R>; -/// An iterator over the symbols of a `MachOFile64`. +/// A symbol table in a [`MachOFile64`](super::MachOFile64). pub type MachOSymbolTable64<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOSymbolTable<'data, 'file, macho::MachHeader64, R>; -/// A symbol table of a `MachOFile`. +/// A symbol table in a [`MachOFile`]. #[derive(Debug, Clone, Copy)] pub struct MachOSymbolTable<'data, 'file, Mach, R = &'data [u8]> where @@ -176,33 +191,50 @@ where type SymbolIterator = MachOSymbolIterator<'data, 'file, Mach, R>; fn symbols(&self) -> Self::SymbolIterator { - MachOSymbolIterator { - file: self.file, - index: 0, - } + MachOSymbolIterator::new(self.file) } fn symbol_by_index(&self, index: SymbolIndex) -> Result { - let nlist = self.file.symbols.symbol(index.0)?; + let nlist = self.file.symbols.symbol(index)?; MachOSymbol::new(self.file, index, nlist).read_error("Unsupported Mach-O symbol index") } } -/// An iterator over the symbols of a `MachOFile32`. +/// An iterator for the symbols in a [`MachOFile32`](super::MachOFile32). pub type MachOSymbolIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOSymbolIterator<'data, 'file, macho::MachHeader32, R>; -/// An iterator over the symbols of a `MachOFile64`. +/// An iterator for the symbols in a [`MachOFile64`](super::MachOFile64). pub type MachOSymbolIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOSymbolIterator<'data, 'file, macho::MachHeader64, R>; -/// An iterator over the symbols of a `MachOFile`. +/// An iterator for the symbols in a [`MachOFile`]. pub struct MachOSymbolIterator<'data, 'file, Mach, R = &'data [u8]> where Mach: MachHeader, R: ReadRef<'data>, { - pub(super) file: &'file MachOFile<'data, Mach, R>, - pub(super) index: usize, + file: &'file MachOFile<'data, Mach, R>, + index: SymbolIndex, +} + +impl<'data, 'file, Mach, R> MachOSymbolIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) fn new(file: &'file MachOFile<'data, Mach, R>) -> Self { + MachOSymbolIterator { + file, + index: SymbolIndex(0), + } + } + + pub(super) fn empty(file: &'file MachOFile<'data, Mach, R>) -> Self { + MachOSymbolIterator { + file, + index: SymbolIndex(file.symbols.len()), + } + } } impl<'data, 'file, Mach, R> fmt::Debug for MachOSymbolIterator<'data, 'file, Mach, R> @@ -225,23 +257,25 @@ where fn next(&mut self) -> Option { loop { let index = self.index; - let nlist = self.file.symbols.symbols.get(index)?; - self.index += 1; - if let Some(symbol) = MachOSymbol::new(self.file, SymbolIndex(index), nlist) { + let nlist = self.file.symbols.symbols.get(index.0)?; + self.index.0 += 1; + if let Some(symbol) = MachOSymbol::new(self.file, index, nlist) { return Some(symbol); } } } } -/// A symbol of a `MachOFile32`. +/// A symbol in a [`MachOFile32`](super::MachOFile32). pub type MachOSymbol32<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOSymbol<'data, 'file, macho::MachHeader32, R>; -/// A symbol of a `MachOFile64`. +/// A symbol in a [`MachOFile64`](super::MachOFile64). pub type MachOSymbol64<'data, 'file, Endian = Endianness, R = &'data [u8]> = MachOSymbol<'data, 'file, macho::MachHeader64, R>; -/// A symbol of a `MachOFile`. +/// A symbol in a [`MachOFile`]. +/// +/// Most functionality is provided by the [`ObjectSymbol`] trait implementation. #[derive(Debug, Clone, Copy)] pub struct MachOSymbol<'data, 'file, Mach, R = &'data [u8]> where @@ -268,6 +302,16 @@ where } Some(MachOSymbol { file, index, nlist }) } + + /// Get the Mach-O file containing this symbol. + pub fn macho_file(&self) -> &'file MachOFile<'data, Mach, R> { + self.file + } + + /// Get the raw Mach-O symbol structure. + pub fn macho_symbol(&self) -> &'data Mach::Nlist { + self.nlist + } } impl<'data, 'file, Mach, R> read::private::Sealed for MachOSymbol<'data, 'file, Mach, R> @@ -394,7 +438,7 @@ where } } -/// A trait for generic access to `Nlist32` and `Nlist64`. +/// A trait for generic access to [`macho::Nlist32`] and [`macho::Nlist64`]. #[allow(missing_docs)] pub trait Nlist: Debug + Pod { type Word: Into; @@ -432,7 +476,7 @@ pub trait Nlist: Debug + Pod { /// Return true if the symbol is a definition of a function or data object. fn is_definition(&self) -> bool { let n_type = self.n_type(); - n_type & macho::N_STAB == 0 && n_type & macho::N_TYPE != macho::N_UNDF + n_type & macho::N_STAB == 0 && n_type & macho::N_TYPE == macho::N_SECT } /// Return the library ordinal. diff --git a/third_party/rust/object/src/read/mod.rs b/third_party/rust/object/src/read/mod.rs index 8230d43ba546..50bcd7b2f1ab 100644 --- a/third_party/rust/object/src/read/mod.rs +++ b/third_party/rust/object/src/read/mod.rs @@ -1,22 +1,69 @@ //! Interface for reading object files. +//! +//! ## Unified read API +//! +//! The [`Object`] trait provides a unified read API for accessing common features of +//! object files, such as sections and symbols. There is an implementation of this +//! trait for [`File`], which allows reading any file format, as well as implementations +//! for each file format: +//! [`ElfFile`](elf::ElfFile), [`MachOFile`](macho::MachOFile), [`CoffFile`](coff::CoffFile), +//! [`PeFile`](pe::PeFile), [`WasmFile`](wasm::WasmFile), [`XcoffFile`](xcoff::XcoffFile). +//! +//! ## Low level read API +//! +//! The submodules for each file format define helpers that operate on the raw structs. +//! These can be used instead of the unified API, or in conjunction with it to access +//! details that are not available via the unified API. +//! +//! See the [submodules](#modules) for examples of the low level read API. +//! +//! ## Naming Convention +//! +//! Types that form part of the unified API for a file format are prefixed with the +//! name of the file format. +//! +//! ## Example for unified read API +//! ```no_run +//! use object::{Object, ObjectSection}; +//! use std::error::Error; +//! use std::fs; +//! +//! /// Reads a file and displays the name of each section. +//! fn main() -> Result<(), Box> { +//! # #[cfg(all(feature = "read", feature = "std"))] { +//! let data = fs::read("path/to/binary")?; +//! let file = object::File::parse(&*data)?; +//! for section in file.sections() { +//! println!("{}", section.name()?); +//! } +//! # } +//! Ok(()) +//! } +//! ``` use alloc::borrow::Cow; use alloc::vec::Vec; use core::{fmt, result}; -use crate::common::*; +#[cfg(not(feature = "std"))] +use alloc::collections::btree_map::BTreeMap as Map; +#[cfg(feature = "std")] +use std::collections::hash_map::HashMap as Map; + +pub use crate::common::*; mod read_ref; pub use read_ref::*; -#[cfg(feature = "std")] mod read_cache; -#[cfg(feature = "std")] pub use read_cache::*; mod util; pub use util::*; +#[cfg(any(feature = "elf", feature = "macho"))] +mod gnu_compression; + #[cfg(any( feature = "coff", feature = "elf", @@ -66,7 +113,7 @@ mod private { /// The error type used within the read module. #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct Error(&'static str); +pub struct Error(pub(crate) &'static str); impl fmt::Display for Error { #[inline] @@ -110,7 +157,7 @@ impl ReadError for Option { target_pointer_width = "32", feature = "elf" ))] -pub type NativeFile<'data, R = &'data [u8]> = elf::ElfFile32<'data, crate::Endianness, R>; +pub type NativeFile<'data, R = &'data [u8]> = elf::ElfFile32<'data, crate::endian::Endianness, R>; /// The native executable file for the target platform. #[cfg(all( @@ -119,15 +166,17 @@ pub type NativeFile<'data, R = &'data [u8]> = elf::ElfFile32<'data, crate::Endia target_pointer_width = "64", feature = "elf" ))] -pub type NativeFile<'data, R = &'data [u8]> = elf::ElfFile64<'data, crate::Endianness, R>; +pub type NativeFile<'data, R = &'data [u8]> = elf::ElfFile64<'data, crate::endian::Endianness, R>; /// The native executable file for the target platform. #[cfg(all(target_os = "macos", target_pointer_width = "32", feature = "macho"))] -pub type NativeFile<'data, R = &'data [u8]> = macho::MachOFile32<'data, crate::Endianness, R>; +pub type NativeFile<'data, R = &'data [u8]> = + macho::MachOFile32<'data, crate::endian::Endianness, R>; /// The native executable file for the target platform. #[cfg(all(target_os = "macos", target_pointer_width = "64", feature = "macho"))] -pub type NativeFile<'data, R = &'data [u8]> = macho::MachOFile64<'data, crate::Endianness, R>; +pub type NativeFile<'data, R = &'data [u8]> = + macho::MachOFile64<'data, crate::endian::Endianness, R>; /// The native executable file for the target platform. #[cfg(all(target_os = "windows", target_pointer_width = "32", feature = "pe"))] @@ -146,53 +195,85 @@ pub type NativeFile<'data, R = &'data [u8]> = wasm::WasmFile<'data, R>; #[non_exhaustive] pub enum FileKind { /// A Unix archive. + /// + /// See [`archive::ArchiveFile`]. #[cfg(feature = "archive")] Archive, /// A COFF object file. + /// + /// See [`coff::CoffFile`]. #[cfg(feature = "coff")] Coff, /// A COFF bigobj object file. /// /// This supports a larger number of sections. + /// + /// See [`coff::CoffBigFile`]. #[cfg(feature = "coff")] CoffBig, /// A Windows short import file. + /// + /// See [`coff::ImportFile`]. #[cfg(feature = "coff")] CoffImport, /// A dyld cache file containing Mach-O images. + /// + /// See [`macho::DyldCache`] #[cfg(feature = "macho")] DyldCache, /// A 32-bit ELF file. + /// + /// See [`elf::ElfFile32`]. #[cfg(feature = "elf")] Elf32, /// A 64-bit ELF file. + /// + /// See [`elf::ElfFile64`]. #[cfg(feature = "elf")] Elf64, /// A 32-bit Mach-O file. + /// + /// See [`macho::MachOFile32`]. #[cfg(feature = "macho")] MachO32, /// A 64-bit Mach-O file. + /// + /// See [`macho::MachOFile64`]. #[cfg(feature = "macho")] MachO64, /// A 32-bit Mach-O fat binary. + /// + /// See [`macho::MachOFatFile32`]. #[cfg(feature = "macho")] MachOFat32, /// A 64-bit Mach-O fat binary. + /// + /// See [`macho::MachOFatFile64`]. #[cfg(feature = "macho")] MachOFat64, /// A 32-bit PE file. + /// + /// See [`pe::PeFile32`]. #[cfg(feature = "pe")] Pe32, /// A 64-bit PE file. + /// + /// See [`pe::PeFile64`]. #[cfg(feature = "pe")] Pe64, /// A Wasm file. + /// + /// See [`wasm::WasmFile`]. #[cfg(feature = "wasm")] Wasm, /// A 32-bit XCOFF file. + /// + /// See [`xcoff::XcoffFile32`]. #[cfg(feature = "xcoff")] Xcoff32, /// A 64-bit XCOFF file. + /// + /// See [`xcoff::XcoffFile64`]. #[cfg(feature = "xcoff")] Xcoff64, } @@ -214,7 +295,8 @@ impl FileKind { let kind = match [magic[0], magic[1], magic[2], magic[3], magic[4], magic[5], magic[6], magic[7]] { #[cfg(feature = "archive")] - [b'!', b'<', b'a', b'r', b'c', b'h', b'>', b'\n'] => FileKind::Archive, + [b'!', b'<', b'a', b'r', b'c', b'h', b'>', b'\n'] + | [b'!', b'<', b't', b'h', b'i', b'n', b'>', b'\n'] => FileKind::Archive, #[cfg(feature = "macho")] [b'd', b'y', b'l', b'd', b'_', b'v', b'1', b' '] => FileKind::DyldCache, #[cfg(feature = "elf")] @@ -232,7 +314,7 @@ impl FileKind { #[cfg(feature = "macho")] [0xca, 0xfe, 0xba, 0xbf, ..] => FileKind::MachOFat64, #[cfg(feature = "wasm")] - [0x00, b'a', b's', b'm', ..] => FileKind::Wasm, + [0x00, b'a', b's', b'm', _, _, 0x00, 0x00] => FileKind::Wasm, #[cfg(feature = "pe")] [b'M', b'Z', ..] if offset == 0 => { // offset == 0 restriction is because optional_header_magic only looks at offset 0 @@ -252,6 +334,8 @@ impl FileKind { [0xc4, 0x01, ..] // COFF arm64 | [0x64, 0xaa, ..] + // COFF arm64ec + | [0x41, 0xa6, ..] // COFF x86 | [0x4c, 0x01, ..] // COFF x86-64 @@ -277,6 +361,8 @@ impl FileKind { } /// An object kind. +/// +/// Returned by [`Object::kind`]. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[non_exhaustive] pub enum ObjectKind { @@ -292,15 +378,27 @@ pub enum ObjectKind { Core, } -/// The index used to identify a section of a file. +/// The index used to identify a section in a file. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct SectionIndex(pub usize); -/// The index used to identify a symbol of a file. +impl fmt::Display for SectionIndex { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +/// The index used to identify a symbol in a symbol table. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct SymbolIndex(pub usize); -/// The section where a symbol is defined. +impl fmt::Display for SymbolIndex { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +/// The section where an [`ObjectSymbol`] is defined. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[non_exhaustive] pub enum SymbolSection { @@ -332,13 +430,17 @@ impl SymbolSection { } } -/// An entry in a `SymbolMap`. +/// An entry in a [`SymbolMap`]. pub trait SymbolMapEntry { /// The symbol address. fn address(&self) -> u64; } -/// A map from addresses to symbols. +/// A map from addresses to symbol information. +/// +/// The symbol information depends on the chosen entry type, such as [`SymbolMapName`]. +/// +/// Returned by [`Object::symbol_map`]. #[derive(Debug, Default, Clone)] pub struct SymbolMap { symbols: Vec, @@ -349,7 +451,7 @@ impl SymbolMap { /// /// This function will sort the symbols by address. pub fn new(mut symbols: Vec) -> Self { - symbols.sort_unstable_by_key(|s| s.address()); + symbols.sort_by_key(|s| s.address()); SymbolMap { symbols } } @@ -372,7 +474,7 @@ impl SymbolMap { } } -/// A `SymbolMap` entry for symbol names. +/// The type used for entries in a [`SymbolMap`] that maps from addresses to names. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct SymbolMapName<'data> { address: u64, @@ -408,10 +510,12 @@ impl<'data> SymbolMapEntry for SymbolMapName<'data> { /// A map from addresses to symbol names and object files. /// /// This is derived from STAB entries in Mach-O files. +/// +/// Returned by [`Object::object_map`]. #[derive(Debug, Default, Clone)] pub struct ObjectMap<'data> { symbols: SymbolMap>, - objects: Vec<&'data [u8]>, + objects: Vec>, } impl<'data> ObjectMap<'data> { @@ -430,12 +534,12 @@ impl<'data> ObjectMap<'data> { /// Get all objects in the map. #[inline] - pub fn objects(&self) -> &[&'data [u8]] { + pub fn objects(&self) -> &[ObjectMapFile<'data>] { &self.objects } } -/// A `ObjectMap` entry. +/// A symbol in an [`ObjectMap`]. #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] pub struct ObjectMapEntry<'data> { address: u64, @@ -473,8 +577,8 @@ impl<'data> ObjectMapEntry<'data> { /// Get the object file name. #[inline] - pub fn object(&self, map: &ObjectMap<'data>) -> &'data [u8] { - map.objects[self.object] + pub fn object<'a>(&self, map: &'a ObjectMap<'data>) -> &'a ObjectMapFile<'data> { + &map.objects[self.object] } } @@ -485,7 +589,34 @@ impl<'data> SymbolMapEntry for ObjectMapEntry<'data> { } } +/// An object file name in an [`ObjectMap`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct ObjectMapFile<'data> { + path: &'data [u8], + member: Option<&'data [u8]>, +} + +impl<'data> ObjectMapFile<'data> { + fn new(path: &'data [u8], member: Option<&'data [u8]>) -> Self { + ObjectMapFile { path, member } + } + + /// Get the path to the file containing the object. + #[inline] + pub fn path(&self) -> &'data [u8] { + self.path + } + + /// If the file is an archive, get the name of the member containing the object. + #[inline] + pub fn member(&self) -> Option<&'data [u8]> { + self.member + } +} + /// An imported symbol. +/// +/// Returned by [`Object::imports`]. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Import<'data> { library: ByteString<'data>, @@ -508,6 +639,8 @@ impl<'data> Import<'data> { } /// An exported symbol. +/// +/// Returned by [`Object::exports`]. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Export<'data> { // TODO: and ordinal? @@ -529,7 +662,7 @@ impl<'data> Export<'data> { } } -/// PDB Information +/// PDB information from the debug directory in a PE file. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct CodeView<'data> { guid: [u8; 16], @@ -538,13 +671,13 @@ pub struct CodeView<'data> { } impl<'data> CodeView<'data> { - /// The path to the PDB as stored in CodeView + /// The path to the PDB as stored in CodeView. #[inline] pub fn path(&self) -> &'data [u8] { self.path.0 } - /// The age of the PDB + /// The age of the PDB. #[inline] pub fn age(&self) -> u32 { self.age @@ -557,7 +690,7 @@ impl<'data> CodeView<'data> { } } -/// The target referenced by a relocation. +/// The target referenced by a [`Relocation`]. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[non_exhaustive] pub enum RelocationTarget { @@ -570,6 +703,8 @@ pub enum RelocationTarget { } /// A relocation entry. +/// +/// Returned by [`Object::dynamic_relocations`] or [`ObjectSection::relocations`]. #[derive(Debug)] pub struct Relocation { kind: RelocationKind, @@ -578,6 +713,7 @@ pub struct Relocation { target: RelocationTarget, addend: i64, implicit_addend: bool, + flags: RelocationFlags, } impl Relocation { @@ -616,7 +752,7 @@ impl Relocation { /// Set the addend to use in the relocation calculation. #[inline] pub fn set_addend(&mut self, addend: i64) { - self.addend = addend + self.addend = addend; } /// Returns true if there is an implicit addend stored in the data at the offset @@ -625,6 +761,109 @@ impl Relocation { pub fn has_implicit_addend(&self) -> bool { self.implicit_addend } + + /// Relocation flags that are specific to each file format. + /// + /// The values returned by `kind`, `encoding` and `size` are derived + /// from these flags. + #[inline] + pub fn flags(&self) -> RelocationFlags { + self.flags + } +} + +/// A map from section offsets to relocation information. +/// +/// This can be used to apply relocations to a value at a given section offset. +/// This is intended for use with DWARF in relocatable object files, and only +/// supports relocations that are used in DWARF. +/// +/// Returned by [`ObjectSection::relocation_map`]. +#[derive(Debug, Default)] +pub struct RelocationMap(Map); + +impl RelocationMap { + /// Construct a new relocation map for a section. + /// + /// Fails if any relocation cannot be added to the map. + /// You can manually use `add` if you need different error handling, + /// such as to list all errors or to ignore them. + pub fn new<'data, 'file, T>(file: &'file T, section: &T::Section<'file>) -> Result + where + T: Object<'data>, + { + let mut map = RelocationMap(Map::new()); + for (offset, relocation) in section.relocations() { + map.add(file, offset, relocation)?; + } + Ok(map) + } + + /// Add a single relocation to the map. + pub fn add<'data: 'file, 'file, T>( + &mut self, + file: &'file T, + offset: u64, + relocation: Relocation, + ) -> Result<()> + where + T: Object<'data>, + { + let mut entry = RelocationMapEntry { + implicit_addend: relocation.has_implicit_addend(), + addend: relocation.addend() as u64, + }; + match relocation.kind() { + RelocationKind::Absolute => match relocation.target() { + RelocationTarget::Symbol(symbol_idx) => { + let symbol = file + .symbol_by_index(symbol_idx) + .read_error("Relocation with invalid symbol")?; + entry.addend = symbol.address().wrapping_add(entry.addend); + } + RelocationTarget::Section(section_idx) => { + let section = file + .section_by_index(section_idx) + .read_error("Relocation with invalid section")?; + // DWARF parsers expect references to DWARF sections to be section offsets, + // not addresses. Addresses are useful for everything else. + if section.kind() != SectionKind::Debug { + entry.addend = section.address().wrapping_add(entry.addend); + } + } + _ => { + return Err(Error("Unsupported relocation target")); + } + }, + _ => { + return Err(Error("Unsupported relocation type")); + } + } + if self.0.insert(offset, entry).is_some() { + return Err(Error("Multiple relocations for offset")); + } + Ok(()) + } + + /// Relocate a value that was read from the section at the given offset. + pub fn relocate(&self, offset: u64, value: u64) -> u64 { + if let Some(relocation) = self.0.get(&offset) { + if relocation.implicit_addend { + // Use the explicit addend too, because it may have the symbol value. + value.wrapping_add(relocation.addend) + } else { + relocation.addend + } + } else { + value + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +struct RelocationMapEntry { + implicit_addend: bool, + addend: u64, } /// A data compression format. @@ -646,6 +885,8 @@ pub enum CompressionFormat { } /// A range in a file that may be compressed. +/// +/// Returned by [`ObjectSection::compressed_file_range`]. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct CompressedFileRange { /// The data compression format. @@ -679,7 +920,7 @@ impl CompressedFileRange { } } - /// Convert to `CompressedData` by reading from the file. + /// Convert to [`CompressedData`] by reading from the file. pub fn data<'data, R: ReadRef<'data>>(self, file: R) -> Result> { let data = file .read_bytes_at(self.offset, self.compressed_size) @@ -693,6 +934,8 @@ impl CompressedFileRange { } /// Data that may be compressed. +/// +/// Returned by [`ObjectSection::compressed_data`]. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct CompressedData<'data> { /// The data compression format. @@ -730,7 +973,11 @@ impl<'data> CompressedData<'data> { .try_into() .ok() .read_error("Uncompressed data size is too large.")?; - let mut decompressed = Vec::with_capacity(size); + let mut decompressed = Vec::new(); + decompressed + .try_reserve_exact(size) + .ok() + .read_error("Uncompressed data allocation failed")?; let mut decompress = flate2::Decompress::new(true); decompress .decompress_vec( @@ -751,7 +998,11 @@ impl<'data> CompressedData<'data> { .try_into() .ok() .read_error("Uncompressed data size is too large.")?; - let mut decompressed = Vec::with_capacity(size); + let mut decompressed = Vec::new(); + decompressed + .try_reserve_exact(size) + .ok() + .read_error("Uncompressed data allocation failed")?; let mut decoder = ruzstd::StreamingDecoder::new(self.data) .ok() .read_error("Invalid zstd compressed data")?; diff --git a/third_party/rust/object/src/read/pe/data_directory.rs b/third_party/rust/object/src/read/pe/data_directory.rs index 0e10244bf3a5..262851ef7011 100644 --- a/third_party/rust/object/src/read/pe/data_directory.rs +++ b/third_party/rust/object/src/read/pe/data_directory.rs @@ -1,7 +1,8 @@ use core::slice; +use crate::endian::LittleEndian as LE; +use crate::pe; use crate::read::{Error, ReadError, ReadRef, Result}; -use crate::{pe, LittleEndian as LE}; use super::{ DelayLoadImportTable, ExportTable, ImportTable, RelocationBlockIterator, ResourceDirectory, @@ -9,6 +10,8 @@ use super::{ }; /// The table of data directories in a PE file. +/// +/// Returned by [`ImageNtHeaders::parse`](super::ImageNtHeaders::parse). #[derive(Debug, Clone, Copy)] pub struct DataDirectories<'data> { entries: &'data [pe::ImageDataDirectory], diff --git a/third_party/rust/object/src/read/pe/export.rs b/third_party/rust/object/src/read/pe/export.rs index 88dc78d50bc0..d66d406ebe3b 100644 --- a/third_party/rust/object/src/read/pe/export.rs +++ b/third_party/rust/object/src/read/pe/export.rs @@ -1,8 +1,9 @@ use alloc::vec::Vec; use core::fmt::Debug; +use crate::endian::{LittleEndian as LE, U16Bytes, U32Bytes}; +use crate::pe; use crate::read::{ByteString, Bytes, Error, ReadError, ReadRef, Result}; -use crate::{pe, LittleEndian as LE, U16Bytes, U32Bytes}; /// Where an export is pointing to. #[derive(Clone, Copy)] @@ -80,6 +81,8 @@ impl<'a> Debug for ExportTarget<'a> { } /// A partially parsed PE export table. +/// +/// Returned by [`DataDirectories::export_table`](super::DataDirectories::export_table). #[derive(Debug, Clone)] pub struct ExportTable<'data> { data: Bytes<'data>, diff --git a/third_party/rust/object/src/read/pe/file.rs b/third_party/rust/object/src/read/pe/file.rs index 0f8ce9f2596b..ec9d9e6cd21c 100644 --- a/third_party/rust/object/src/read/pe/file.rs +++ b/third_party/rust/object/src/read/pe/file.rs @@ -4,12 +4,15 @@ use core::{mem, str}; use core::convert::TryInto; +use crate::endian::{LittleEndian as LE, U32}; +use crate::pe; +use crate::pod::{self, Pod}; use crate::read::coff::{CoffCommon, CoffSymbol, CoffSymbolIterator, CoffSymbolTable, SymbolTable}; use crate::read::{ - self, Architecture, ComdatKind, Error, Export, FileFlags, Import, NoDynamicRelocationIterator, - Object, ObjectComdat, ObjectKind, ReadError, ReadRef, Result, SectionIndex, SymbolIndex, + self, Architecture, ByteString, Bytes, CodeView, ComdatKind, Error, Export, FileFlags, Import, + NoDynamicRelocationIterator, Object, ObjectComdat, ObjectKind, ReadError, ReadRef, Result, + SectionIndex, SubArchitecture, SymbolIndex, }; -use crate::{pe, ByteString, Bytes, CodeView, LittleEndian as LE, Pod, U32}; use super::{ DataDirectories, ExportTable, ImageThunkData, ImportTable, PeSection, PeSectionIterator, @@ -17,11 +20,19 @@ use super::{ }; /// A PE32 (32-bit) image file. +/// +/// This is a file that starts with [`pe::ImageNtHeaders32`], and corresponds +/// to [`crate::FileKind::Pe32`]. pub type PeFile32<'data, R = &'data [u8]> = PeFile<'data, pe::ImageNtHeaders32, R>; /// A PE32+ (64-bit) image file. +/// +/// This is a file that starts with [`pe::ImageNtHeaders64`], and corresponds +/// to [`crate::FileKind::Pe64`]. pub type PeFile64<'data, R = &'data [u8]> = PeFile<'data, pe::ImageNtHeaders64, R>; -/// A PE object file. +/// A PE image file. +/// +/// Most functionality is provided by the [`Object`] trait implementation. #[derive(Debug)] pub struct PeFile<'data, Pe, R = &'data [u8]> where @@ -127,33 +138,39 @@ where { } -impl<'data, 'file, Pe, R> Object<'data, 'file> for PeFile<'data, Pe, R> +impl<'data, Pe, R> Object<'data> for PeFile<'data, Pe, R> where - 'data: 'file, Pe: ImageNtHeaders, - R: 'file + ReadRef<'data>, + R: ReadRef<'data>, { - type Segment = PeSegment<'data, 'file, Pe, R>; - type SegmentIterator = PeSegmentIterator<'data, 'file, Pe, R>; - type Section = PeSection<'data, 'file, Pe, R>; - type SectionIterator = PeSectionIterator<'data, 'file, Pe, R>; - type Comdat = PeComdat<'data, 'file, Pe, R>; - type ComdatIterator = PeComdatIterator<'data, 'file, Pe, R>; - type Symbol = CoffSymbol<'data, 'file, R>; - type SymbolIterator = CoffSymbolIterator<'data, 'file, R>; - type SymbolTable = CoffSymbolTable<'data, 'file, R>; - type DynamicRelocationIterator = NoDynamicRelocationIterator; + type Segment<'file> = PeSegment<'data, 'file, Pe, R> where Self: 'file, 'data: 'file; + type SegmentIterator<'file> = PeSegmentIterator<'data, 'file, Pe, R> where Self: 'file, 'data: 'file; + type Section<'file> = PeSection<'data, 'file, Pe, R> where Self: 'file, 'data: 'file; + type SectionIterator<'file> = PeSectionIterator<'data, 'file, Pe, R> where Self: 'file, 'data: 'file; + type Comdat<'file> = PeComdat<'data, 'file, Pe, R> where Self: 'file, 'data: 'file; + type ComdatIterator<'file> = PeComdatIterator<'data, 'file, Pe, R> where Self: 'file, 'data: 'file; + type Symbol<'file> = CoffSymbol<'data, 'file, R> where Self: 'file, 'data: 'file; + type SymbolIterator<'file> = CoffSymbolIterator<'data, 'file, R> where Self: 'file, 'data: 'file; + type SymbolTable<'file> = CoffSymbolTable<'data, 'file, R> where Self: 'file, 'data: 'file; + type DynamicRelocationIterator<'file> = NoDynamicRelocationIterator where Self: 'file, 'data: 'file; fn architecture(&self) -> Architecture { match self.nt_headers.file_header().machine.get(LE) { pe::IMAGE_FILE_MACHINE_ARMNT => Architecture::Arm, - pe::IMAGE_FILE_MACHINE_ARM64 => Architecture::Aarch64, + pe::IMAGE_FILE_MACHINE_ARM64 | pe::IMAGE_FILE_MACHINE_ARM64EC => Architecture::Aarch64, pe::IMAGE_FILE_MACHINE_I386 => Architecture::I386, pe::IMAGE_FILE_MACHINE_AMD64 => Architecture::X86_64, _ => Architecture::Unknown, } } + fn sub_architecture(&self) -> Option { + match self.nt_headers.file_header().machine.get(LE) { + pe::IMAGE_FILE_MACHINE_ARM64EC => Some(SubArchitecture::Arm64EC), + _ => None, + } + } + #[inline] fn is_little_endian(&self) -> bool { // Only little endian is supported. @@ -176,14 +193,14 @@ where } } - fn segments(&'file self) -> PeSegmentIterator<'data, 'file, Pe, R> { + fn segments(&self) -> PeSegmentIterator<'data, '_, Pe, R> { PeSegmentIterator { file: self, iter: self.common.sections.iter(), } } - fn section_by_name_bytes( + fn section_by_name_bytes<'file>( &'file self, section_name: &[u8], ) -> Option> { @@ -192,16 +209,13 @@ where .section_by_name(self.common.symbols.strings(), section_name) .map(|(index, section)| PeSection { file: self, - index: SectionIndex(index), + index, section, }) } - fn section_by_index( - &'file self, - index: SectionIndex, - ) -> Result> { - let section = self.common.sections.section(index.0)?; + fn section_by_index(&self, index: SectionIndex) -> Result> { + let section = self.common.sections.section(index)?; Ok(PeSection { file: self, index, @@ -209,19 +223,19 @@ where }) } - fn sections(&'file self) -> PeSectionIterator<'data, 'file, Pe, R> { + fn sections(&self) -> PeSectionIterator<'data, '_, Pe, R> { PeSectionIterator { file: self, iter: self.common.sections.iter().enumerate(), } } - fn comdats(&'file self) -> PeComdatIterator<'data, 'file, Pe, R> { + fn comdats(&self) -> PeComdatIterator<'data, '_, Pe, R> { PeComdatIterator { file: self } } - fn symbol_by_index(&'file self, index: SymbolIndex) -> Result> { - let symbol = self.common.symbols.symbol(index.0)?; + fn symbol_by_index(&self, index: SymbolIndex) -> Result> { + let symbol = self.common.symbols.symbol(index)?; Ok(CoffSymbol { file: &self.common, index, @@ -229,30 +243,23 @@ where }) } - fn symbols(&'file self) -> CoffSymbolIterator<'data, 'file, R> { - CoffSymbolIterator { - file: &self.common, - index: 0, - } + fn symbols(&self) -> CoffSymbolIterator<'data, '_, R> { + CoffSymbolIterator::new(&self.common) } - fn symbol_table(&'file self) -> Option> { + fn symbol_table(&self) -> Option> { Some(CoffSymbolTable { file: &self.common }) } - fn dynamic_symbols(&'file self) -> CoffSymbolIterator<'data, 'file, R> { - CoffSymbolIterator { - file: &self.common, - // Hack: don't return any. - index: self.common.symbols.len(), - } + fn dynamic_symbols(&self) -> CoffSymbolIterator<'data, '_, R> { + CoffSymbolIterator::empty(&self.common) } - fn dynamic_symbol_table(&'file self) -> Option> { + fn dynamic_symbol_table(&self) -> Option> { None } - fn dynamic_relocations(&'file self) -> Option { + fn dynamic_relocations(&self) -> Option { None } @@ -303,17 +310,8 @@ where Some(data_dir) => data_dir, None => return Ok(None), }; - let debug_data = data_dir.data(self.data, &self.common.sections).map(Bytes)?; - let debug_data_size = data_dir.size.get(LE) as usize; - - let count = debug_data_size / mem::size_of::(); - let rem = debug_data_size % mem::size_of::(); - if rem != 0 || count < 1 { - return Err(Error("Invalid PE debug dir size")); - } - - let debug_dirs = debug_data - .read_slice_at::(0, count) + let debug_data = data_dir.data(self.data, &self.common.sections)?; + let debug_dirs = pod::slice_from_all_bytes::(debug_data) .read_error("Invalid PE debug dir size")?; for debug_dir in debug_dirs { @@ -380,14 +378,16 @@ where } } -/// An iterator over the COMDAT section groups of a `PeFile32`. +/// An iterator for the COMDAT section groups in a [`PeFile32`]. pub type PeComdatIterator32<'data, 'file, R = &'data [u8]> = PeComdatIterator<'data, 'file, pe::ImageNtHeaders32, R>; -/// An iterator over the COMDAT section groups of a `PeFile64`. +/// An iterator for the COMDAT section groups in a [`PeFile64`]. pub type PeComdatIterator64<'data, 'file, R = &'data [u8]> = PeComdatIterator<'data, 'file, pe::ImageNtHeaders64, R>; -/// An iterator over the COMDAT section groups of a `PeFile`. +/// An iterator for the COMDAT section groups in a [`PeFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct PeComdatIterator<'data, 'file, Pe, R = &'data [u8]> where @@ -411,14 +411,16 @@ where } } -/// A COMDAT section group of a `PeFile32`. +/// A COMDAT section group in a [`PeFile32`]. pub type PeComdat32<'data, 'file, R = &'data [u8]> = PeComdat<'data, 'file, pe::ImageNtHeaders32, R>; -/// A COMDAT section group of a `PeFile64`. +/// A COMDAT section group in a [`PeFile64`]. pub type PeComdat64<'data, 'file, R = &'data [u8]> = PeComdat<'data, 'file, pe::ImageNtHeaders64, R>; -/// A COMDAT section group of a `PeFile`. +/// A COMDAT section group in a [`PeFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct PeComdat<'data, 'file, Pe, R = &'data [u8]> where @@ -454,12 +456,12 @@ where } #[inline] - fn name_bytes(&self) -> Result<&[u8]> { + fn name_bytes(&self) -> Result<&'data [u8]> { unreachable!(); } #[inline] - fn name(&self) -> Result<&str> { + fn name(&self) -> Result<&'data str> { unreachable!(); } @@ -469,14 +471,16 @@ where } } -/// An iterator over the sections in a COMDAT section group of a `PeFile32`. +/// An iterator for the sections in a COMDAT section group in a [`PeFile32`]. pub type PeComdatSectionIterator32<'data, 'file, R = &'data [u8]> = PeComdatSectionIterator<'data, 'file, pe::ImageNtHeaders32, R>; -/// An iterator over the sections in a COMDAT section group of a `PeFile64`. +/// An iterator for the sections in a COMDAT section group in a [`PeFile64`]. pub type PeComdatSectionIterator64<'data, 'file, R = &'data [u8]> = PeComdatSectionIterator<'data, 'file, pe::ImageNtHeaders64, R>; -/// An iterator over the sections in a COMDAT section group of a `PeFile`. +/// An iterator for the sections in a COMDAT section group in a [`PeFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct PeComdatSectionIterator<'data, 'file, Pe, R = &'data [u8]> where @@ -521,7 +525,7 @@ impl pe::ImageDosHeader { } } -/// Find the optional header and read the `optional_header.magic`. +/// Find the optional header and read its `magic` field. /// /// It can be useful to know this magic value before trying to /// fully parse the NT headers. @@ -540,7 +544,7 @@ pub fn optional_header_magic<'data, R: ReadRef<'data>>(data: R) -> Result { Ok(nt_headers.optional_header().magic()) } -/// A trait for generic access to `ImageNtHeaders32` and `ImageNtHeaders64`. +/// A trait for generic access to [`pe::ImageNtHeaders32`] and [`pe::ImageNtHeaders64`]. #[allow(missing_docs)] pub trait ImageNtHeaders: Debug + Pod { type ImageOptionalHeader: ImageOptionalHeader; @@ -569,7 +573,7 @@ pub trait ImageNtHeaders: Debug + Pod { /// /// `data` must be for the entire file. /// - /// `offset` must be headers offset, which can be obtained from `ImageDosHeader::nt_headers_offset`. + /// `offset` must be headers offset, which can be obtained from [`pe::ImageDosHeader::nt_headers_offset`]. /// It is updated to point after the optional header, which is where the section headers are located. /// /// Also checks that the `signature` and `magic` fields in the headers are valid. @@ -626,7 +630,7 @@ pub trait ImageNtHeaders: Debug + Pod { } } -/// A trait for generic access to `ImageOptionalHeader32` and `ImageOptionalHeader64`. +/// A trait for generic access to [`pe::ImageOptionalHeader32`] and [`pe::ImageOptionalHeader64`]. #[allow(missing_docs)] pub trait ImageOptionalHeader: Debug + Pod { // Standard fields. diff --git a/third_party/rust/object/src/read/pe/import.rs b/third_party/rust/object/src/read/pe/import.rs index a5535dc36785..f81a084d99fa 100644 --- a/third_party/rust/object/src/read/pe/import.rs +++ b/third_party/rust/object/src/read/pe/import.rs @@ -1,12 +1,16 @@ use core::fmt::Debug; use core::mem; +use crate::endian::{LittleEndian as LE, U16Bytes}; +use crate::pe; +use crate::pod::Pod; use crate::read::{Bytes, ReadError, Result}; -use crate::{pe, LittleEndian as LE, Pod, U16Bytes}; use super::ImageNtHeaders; /// Information for parsing a PE import table. +/// +/// Returned by [`DataDirectories::import_table`](super::DataDirectories::import_table). #[derive(Debug, Clone)] pub struct ImportTable<'data> { section_data: Bytes<'data>, @@ -218,6 +222,9 @@ impl ImageThunkData for pe::ImageThunkData32 { } /// Information for parsing a PE delay-load import table. +/// +/// Returned by +/// [`DataDirectories::delay_load_import_table`](super::DataDirectories::delay_load_import_table). #[derive(Debug, Clone)] pub struct DelayLoadImportTable<'data> { section_data: Bytes<'data>, diff --git a/third_party/rust/object/src/read/pe/mod.rs b/third_party/rust/object/src/read/pe/mod.rs index 2b7cc5d7a01a..ab6011c88839 100644 --- a/third_party/rust/object/src/read/pe/mod.rs +++ b/third_party/rust/object/src/read/pe/mod.rs @@ -1,11 +1,45 @@ //! Support for reading PE files. //! -//! Defines traits to abstract over the difference between PE32/PE32+, -//! and implements read functionality in terms of these traits. +//! Traits are used to abstract over the difference between PE32 and PE32+. +//! The primary trait for this is [`ImageNtHeaders`]. //! -//! This module reuses some of the COFF functionality. +//! ## High level API //! -//! Also provides `PeFile` and related types which implement the `Object` trait. +//! [`PeFile`] implements the [`Object`](crate::read::Object) trait for +//! PE files. [`PeFile`] is parameterised by [`ImageNtHeaders`] to allow +//! reading both PE32 and PE32+. There are type aliases for these parameters +//! ([`PeFile32`] and [`PeFile64`]). +//! +//! ## Low level API +//! +//! The [`ImageNtHeaders`] trait can be directly used to parse both +//! [`pe::ImageNtHeaders32`] and [`pe::ImageNtHeaders64`]. +//! +//! ### Example for low level API +//! ```no_run +//! use object::pe; +//! use object::read::pe::ImageNtHeaders; +//! use std::error::Error; +//! use std::fs; +//! +//! /// Reads a file and displays the name of each section. +//! fn main() -> Result<(), Box> { +//! # #[cfg(feature = "std")] { +//! let data = fs::read("path/to/binary")?; +//! let dos_header = pe::ImageDosHeader::parse(&*data)?; +//! let mut offset = dos_header.nt_headers_offset().into(); +//! let (nt_headers, data_directories) = pe::ImageNtHeaders64::parse(&*data, &mut offset)?; +//! let sections = nt_headers.sections(&*data, offset)?; +//! let symbols = nt_headers.symbols(&*data)?; +//! for section in sections.iter() { +//! println!("{}", String::from_utf8_lossy(section.name(symbols.strings())?)); +//! } +//! # } +//! Ok(()) +//! } +//! ``` +#[cfg(doc)] +use crate::pe; mod file; pub use file::*; diff --git a/third_party/rust/object/src/read/pe/relocation.rs b/third_party/rust/object/src/read/pe/relocation.rs index 06215bd1a7ac..77421b7ba179 100644 --- a/third_party/rust/object/src/read/pe/relocation.rs +++ b/third_party/rust/object/src/read/pe/relocation.rs @@ -5,6 +5,8 @@ use crate::pe; use crate::read::{Bytes, Error, ReadError, Result}; /// An iterator over the relocation blocks in the `.reloc` section of a PE file. +/// +/// Returned by [`DataDirectories::relocation_blocks`](super::DataDirectories::relocation_blocks). #[derive(Debug, Default, Clone, Copy)] pub struct RelocationBlockIterator<'data> { data: Bytes<'data>, diff --git a/third_party/rust/object/src/read/pe/resource.rs b/third_party/rust/object/src/read/pe/resource.rs index 646eaefaaa49..bf1fa704297f 100644 --- a/third_party/rust/object/src/read/pe/resource.rs +++ b/third_party/rust/object/src/read/pe/resource.rs @@ -1,10 +1,13 @@ use alloc::string::String; use core::char; +use crate::endian::{LittleEndian as LE, U16Bytes}; +use crate::pe; use crate::read::{ReadError, ReadRef, Result}; -use crate::{pe, LittleEndian as LE, U16Bytes}; /// The `.rsrc` section of a PE file. +/// +/// Returned by [`DataDirectories::resource_directory`](super::DataDirectories::resource_directory). #[derive(Debug, Clone, Copy)] pub struct ResourceDirectory<'data> { data: &'data [u8], diff --git a/third_party/rust/object/src/read/pe/rich.rs b/third_party/rust/object/src/read/pe/rich.rs index 33dd039c99d0..584be6410fa8 100644 --- a/third_party/rust/object/src/read/pe/rich.rs +++ b/third_party/rust/object/src/read/pe/rich.rs @@ -2,9 +2,10 @@ use core::mem; +use crate::endian::{LittleEndian as LE, U32}; +use crate::pe; use crate::pod::bytes_of_slice; -use crate::read::Bytes; -use crate::{pe, LittleEndian as LE, ReadRef, U32}; +use crate::read::{Bytes, ReadRef}; /// Parsed information about a Rich Header. #[derive(Debug, Clone, Copy)] diff --git a/third_party/rust/object/src/read/pe/section.rs b/third_party/rust/object/src/read/pe/section.rs index 2880e401fed7..4c53d1e25c71 100644 --- a/third_party/rust/object/src/read/pe/section.rs +++ b/third_party/rust/object/src/read/pe/section.rs @@ -6,19 +6,19 @@ use crate::pe; use crate::pe::ImageSectionHeader; use crate::read::{ self, CompressedData, CompressedFileRange, ObjectSection, ObjectSegment, ReadError, ReadRef, - Relocation, Result, SectionFlags, SectionIndex, SectionKind, SegmentFlags, + Relocation, RelocationMap, Result, SectionFlags, SectionIndex, SectionKind, SegmentFlags, }; use super::{ImageNtHeaders, PeFile, SectionTable}; -/// An iterator over the loadable sections of a `PeFile32`. +/// An iterator for the loadable sections in a [`PeFile32`](super::PeFile32). pub type PeSegmentIterator32<'data, 'file, R = &'data [u8]> = PeSegmentIterator<'data, 'file, pe::ImageNtHeaders32, R>; -/// An iterator over the loadable sections of a `PeFile64`. +/// An iterator for the loadable sections in a [`PeFile64`](super::PeFile64). pub type PeSegmentIterator64<'data, 'file, R = &'data [u8]> = PeSegmentIterator<'data, 'file, pe::ImageNtHeaders64, R>; -/// An iterator over the loadable sections of a `PeFile`. +/// An iterator for the loadable sections in a [`PeFile`]. #[derive(Debug)] pub struct PeSegmentIterator<'data, 'file, Pe, R = &'data [u8]> where @@ -44,14 +44,16 @@ where } } -/// A loadable section of a `PeFile32`. +/// A loadable section in a [`PeFile32`](super::PeFile32). pub type PeSegment32<'data, 'file, R = &'data [u8]> = PeSegment<'data, 'file, pe::ImageNtHeaders32, R>; -/// A loadable section of a `PeFile64`. +/// A loadable section in a [`PeFile64`](super::PeFile64). pub type PeSegment64<'data, 'file, R = &'data [u8]> = PeSegment<'data, 'file, pe::ImageNtHeaders64, R>; -/// A loadable section of a `PeFile`. +/// A loadable section in a [`PeFile`]. +/// +/// Most functionality is provided by the [`ObjectSegment`] trait implementation. #[derive(Debug)] pub struct PeSegment<'data, 'file, Pe, R = &'data [u8]> where @@ -62,6 +64,22 @@ where section: &'data pe::ImageSectionHeader, } +impl<'data, 'file, Pe, R> PeSegment<'data, 'file, Pe, R> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + /// Get the PE file containing this segment. + pub fn pe_file(&self) -> &'file PeFile<'data, Pe, R> { + self.file + } + + /// Get the raw PE section header. + pub fn pe_section(&self) -> &'data pe::ImageSectionHeader { + self.section + } +} + impl<'data, 'file, Pe, R> read::private::Sealed for PeSegment<'data, 'file, Pe, R> where Pe: ImageNtHeaders, @@ -132,14 +150,14 @@ where } } -/// An iterator over the sections of a `PeFile32`. +/// An iterator for the sections in a [`PeFile32`](super::PeFile32). pub type PeSectionIterator32<'data, 'file, R = &'data [u8]> = PeSectionIterator<'data, 'file, pe::ImageNtHeaders32, R>; -/// An iterator over the sections of a `PeFile64`. +/// An iterator for the sections in a [`PeFile64`](super::PeFile64). pub type PeSectionIterator64<'data, 'file, R = &'data [u8]> = PeSectionIterator<'data, 'file, pe::ImageNtHeaders64, R>; -/// An iterator over the sections of a `PeFile`. +/// An iterator for the sections in a [`PeFile`]. #[derive(Debug)] pub struct PeSectionIterator<'data, 'file, Pe, R = &'data [u8]> where @@ -166,14 +184,16 @@ where } } -/// A section of a `PeFile32`. +/// A section in a [`PeFile32`](super::PeFile32). pub type PeSection32<'data, 'file, R = &'data [u8]> = PeSection<'data, 'file, pe::ImageNtHeaders32, R>; -/// A section of a `PeFile64`. +/// A section in a [`PeFile64`](super::PeFile64). pub type PeSection64<'data, 'file, R = &'data [u8]> = PeSection<'data, 'file, pe::ImageNtHeaders64, R>; -/// A section of a `PeFile`. +/// A section in a [`PeFile`]. +/// +/// Most functionality is provided by the [`ObjectSection`] trait implementation. #[derive(Debug)] pub struct PeSection<'data, 'file, Pe, R = &'data [u8]> where @@ -185,6 +205,22 @@ where pub(super) section: &'data pe::ImageSectionHeader, } +impl<'data, 'file, Pe, R> PeSection<'data, 'file, Pe, R> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + /// Get the PE file containing this segment. + pub fn pe_file(&self) -> &'file PeFile<'data, Pe, R> { + self.file + } + + /// Get the raw PE section header. + pub fn pe_section(&self) -> &'data pe::ImageSectionHeader { + self.section + } +} + impl<'data, 'file, Pe, R> read::private::Sealed for PeSection<'data, 'file, Pe, R> where Pe: ImageNtHeaders, @@ -253,12 +289,12 @@ where } #[inline] - fn name_bytes(&self) -> Result<&[u8]> { + fn name_bytes(&self) -> Result<&'data [u8]> { self.section.name(self.file.common.symbols.strings()) } #[inline] - fn name(&self) -> Result<&str> { + fn name(&self) -> Result<&'data str> { let name = self.name_bytes()?; str::from_utf8(name) .ok() @@ -284,6 +320,10 @@ where PeRelocationIterator(PhantomData) } + fn relocation_map(&self) -> read::Result { + RelocationMap::new(self.file, self) + } + fn flags(&self) -> SectionFlags { SectionFlags::Coff { characteristics: self.section.characteristics.get(LE), @@ -419,7 +459,9 @@ impl pe::ImageSectionHeader { } } -/// An iterator over the relocations in an `PeSection`. +/// An iterator for the relocations in an [`PeSection`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct PeRelocationIterator<'data, 'file, R = &'data [u8]>( PhantomData<(&'data (), &'file (), R)>, diff --git a/third_party/rust/object/src/read/read_cache.rs b/third_party/rust/object/src/read/read_cache.rs index dfce1e1b1ed3..bfb9bf7306cf 100644 --- a/third_party/rust/object/src/read/read_cache.rs +++ b/third_party/rust/object/src/read/read_cache.rs @@ -1,42 +1,77 @@ +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::cell::RefCell; +use core::convert::TryInto; +use core::mem; use core::ops::Range; -use std::boxed::Box; -use std::cell::RefCell; -use std::collections::hash_map::Entry; -use std::collections::HashMap; -use std::convert::TryInto; +#[cfg(feature = "std")] use std::io::{Read, Seek, SeekFrom}; -use std::mem; -use std::vec::Vec; + +#[cfg(not(feature = "std"))] +use alloc::collections::btree_map::{BTreeMap as Map, Entry}; +#[cfg(feature = "std")] +use std::collections::hash_map::{Entry, HashMap as Map}; use crate::read::ReadRef; -/// An implementation of `ReadRef` for data in a stream that implements +/// An implementation of [`ReadRef`] for data in a stream that implements /// `Read + Seek`. /// /// Contains a cache of read-only blocks of data, allowing references to /// them to be returned. Entries in the cache are never removed. /// Entries are keyed on the offset and size of the read. /// Currently overlapping reads are considered separate reads. +/// +/// This is primarily intended for environments where memory mapped files +/// are not available or not suitable, such as WebAssembly. +/// +/// Note that malformed files can cause the cache to grow much larger than +/// the file size. #[derive(Debug)] -pub struct ReadCache { +pub struct ReadCache { cache: RefCell>, } #[derive(Debug)] -struct ReadCacheInternal { +struct ReadCacheInternal { read: R, - bufs: HashMap<(u64, u64), Box<[u8]>>, - strings: HashMap<(u64, u8), Box<[u8]>>, + bufs: Map<(u64, u64), Box<[u8]>>, + strings: Map<(u64, u8), Box<[u8]>>, + len: Option, } -impl ReadCache { +impl ReadCacheInternal { + /// Ensures this range is contained in the len of the file + fn range_in_bounds(&mut self, range: &Range) -> Result<(), ()> { + if range.start <= range.end && range.end <= self.len()? { + Ok(()) + } else { + Err(()) + } + } + + /// The length of the underlying read, memoized + fn len(&mut self) -> Result { + match self.len { + Some(len) => Ok(len), + None => { + let len = self.read.len()?; + self.len = Some(len); + Ok(len) + } + } + } +} + +impl ReadCache { /// Create an empty `ReadCache` for the given stream. pub fn new(read: R) -> Self { ReadCache { cache: RefCell::new(ReadCacheInternal { read, - bufs: HashMap::new(), - strings: HashMap::new(), + bufs: Map::new(), + strings: Map::new(), + len: None, }), } } @@ -62,10 +97,9 @@ impl ReadCache { } } -impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache { +impl<'a, R: ReadCacheOps> ReadRef<'a> for &'a ReadCache { fn len(self) -> Result { - let cache = &mut *self.cache.borrow_mut(); - cache.read.seek(SeekFrom::End(0)).map_err(|_| ()) + self.cache.borrow_mut().len() } fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'a [u8], ()> { @@ -73,13 +107,17 @@ impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache { return Ok(&[]); } let cache = &mut *self.cache.borrow_mut(); + cache.range_in_bounds(&(offset..(offset.saturating_add(size))))?; let buf = match cache.bufs.entry((offset, size)) { Entry::Occupied(entry) => entry.into_mut(), Entry::Vacant(entry) => { let size = size.try_into().map_err(|_| ())?; - cache.read.seek(SeekFrom::Start(offset)).map_err(|_| ())?; - let mut bytes = vec![0; size].into_boxed_slice(); - cache.read.read_exact(&mut bytes).map_err(|_| ())?; + cache.read.seek(offset)?; + let mut bytes = Vec::new(); + bytes.try_reserve_exact(size).map_err(|_| ())?; + bytes.resize(size, 0); + let mut bytes = bytes.into_boxed_slice(); + cache.read.read_exact(&mut bytes)?; entry.insert(bytes) } }; @@ -90,13 +128,11 @@ impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache { fn read_bytes_at_until(self, range: Range, delimiter: u8) -> Result<&'a [u8], ()> { let cache = &mut *self.cache.borrow_mut(); + cache.range_in_bounds(&range)?; let buf = match cache.strings.entry((range.start, delimiter)) { Entry::Occupied(entry) => entry.into_mut(), Entry::Vacant(entry) => { - cache - .read - .seek(SeekFrom::Start(range.start)) - .map_err(|_| ())?; + cache.read.seek(range.start)?; let max_check: usize = (range.end - range.start).try_into().map_err(|_| ())?; // Strings should be relatively small. @@ -107,7 +143,7 @@ impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache { let mut checked = 0; loop { bytes.resize((checked + 256).min(max_check), 0); - let read = cache.read.read(&mut bytes[checked..]).map_err(|_| ())?; + let read = cache.read.read(&mut bytes[checked..])?; if read == 0 { return Err(()); } @@ -128,30 +164,26 @@ impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache { } } -/// An implementation of `ReadRef` for a range of data in a stream that +/// An implementation of [`ReadRef`] for a range of data in a stream that /// implements `Read + Seek`. /// -/// Shares an underlying `ReadCache` with a lifetime of `'a`. +/// Shares an underlying [`ReadCache`] with a lifetime of `'a`. #[derive(Debug)] -pub struct ReadCacheRange<'a, R: Read + Seek> { +pub struct ReadCacheRange<'a, R: ReadCacheOps> { r: &'a ReadCache, offset: u64, size: u64, } -impl<'a, R: Read + Seek> Clone for ReadCacheRange<'a, R> { +impl<'a, R: ReadCacheOps> Clone for ReadCacheRange<'a, R> { fn clone(&self) -> Self { - Self { - r: self.r, - offset: self.offset, - size: self.size, - } + *self } } -impl<'a, R: Read + Seek> Copy for ReadCacheRange<'a, R> {} +impl<'a, R: ReadCacheOps> Copy for ReadCacheRange<'a, R> {} -impl<'a, R: Read + Seek> ReadRef<'a> for ReadCacheRange<'a, R> { +impl<'a, R: ReadCacheOps> ReadRef<'a> for ReadCacheRange<'a, R> { fn len(self) -> Result { Ok(self.size) } @@ -180,3 +212,50 @@ impl<'a, R: Read + Seek> ReadRef<'a> for ReadCacheRange<'a, R> { Ok(bytes) } } + +/// Operations required to implement [`ReadCache`]. +/// +/// This is a subset of the `Read` and `Seek` traits. +/// A blanket implementation is provided for all types that implement +/// `Read + Seek`. +#[allow(clippy::len_without_is_empty)] +pub trait ReadCacheOps { + /// Return the length of the stream. + /// + /// Equivalent to `std::io::Seek::seek(SeekFrom::End(0))`. + fn len(&mut self) -> Result; + + /// Seek to the given position in the stream. + /// + /// Equivalent to `std::io::Seek::seek` with `SeekFrom::Start(pos)`. + fn seek(&mut self, pos: u64) -> Result; + + /// Read up to `buf.len()` bytes into `buf`. + /// + /// Equivalent to `std::io::Read::read`. + fn read(&mut self, buf: &mut [u8]) -> Result; + + /// Read exactly `buf.len()` bytes into `buf`. + /// + /// Equivalent to `std::io::Read::read_exact`. + fn read_exact(&mut self, buf: &mut [u8]) -> Result<(), ()>; +} + +#[cfg(feature = "std")] +impl ReadCacheOps for T { + fn len(&mut self) -> Result { + self.seek(SeekFrom::End(0)).map_err(|_| ()) + } + + fn seek(&mut self, pos: u64) -> Result { + self.seek(SeekFrom::Start(pos)).map_err(|_| ()) + } + + fn read(&mut self, buf: &mut [u8]) -> Result { + Read::read(self, buf).map_err(|_| ()) + } + + fn read_exact(&mut self, buf: &mut [u8]) -> Result<(), ()> { + Read::read_exact(self, buf).map_err(|_| ()) + } +} diff --git a/third_party/rust/object/src/read/read_ref.rs b/third_party/rust/object/src/read/read_ref.rs index a9b42522141c..fb77f0112666 100644 --- a/third_party/rust/object/src/read/read_ref.rs +++ b/third_party/rust/object/src/read/read_ref.rs @@ -8,7 +8,7 @@ use crate::pod::{from_bytes, slice_from_bytes, Pod}; type Result = result::Result; -/// A trait for reading references to `Pod` types from a block of data. +/// A trait for reading references to [`Pod`] types from a block of data. /// /// This allows parsers to handle both of these cases: /// - the block of data exists in memory, and it is desirable @@ -16,6 +16,18 @@ type Result = result::Result; /// - the block of data exists in storage, and it is desirable /// to read on demand to minimize I/O and memory usage. /// +/// A block of data typically exists in memory as a result of using a memory +/// mapped file, and the crate was written with this use case in mind. +/// Reading the entire file into a `Vec` is also possible, but it often uses +/// more I/O and memory. +/// Both of these are handled by the `ReadRef` implementation for `&[u8]`. +/// +/// For the second use case, the `ReadRef` trait is implemented for +/// [`&ReadCache`](super::ReadCache). This is useful for environments where +/// memory mapped files are not available or not suitable, such as WebAssembly. +/// This differs from reading into a `Vec` in that it only reads the portions +/// of the file that are needed for parsing. +/// /// The methods accept `self` by value because `Self` is expected to behave /// similar to a reference: it may be a reference with a lifetime of `'a`, /// or it may be a wrapper of a reference. diff --git a/third_party/rust/object/src/read/traits.rs b/third_party/rust/object/src/read/traits.rs index d35b0b0caa04..cbc93fc7293f 100644 --- a/third_party/rust/object/src/read/traits.rs +++ b/third_party/rust/object/src/read/traits.rs @@ -1,56 +1,97 @@ use alloc::borrow::Cow; use alloc::vec::Vec; +use crate::endian::Endianness; use crate::read::{ self, Architecture, CodeView, ComdatKind, CompressedData, CompressedFileRange, Export, - FileFlags, Import, ObjectKind, ObjectMap, Relocation, Result, SectionFlags, SectionIndex, - SectionKind, SegmentFlags, SymbolFlags, SymbolIndex, SymbolKind, SymbolMap, SymbolMapName, - SymbolScope, SymbolSection, + FileFlags, Import, ObjectKind, ObjectMap, Relocation, RelocationMap, Result, SectionFlags, + SectionIndex, SectionKind, SegmentFlags, SubArchitecture, SymbolFlags, SymbolIndex, SymbolKind, + SymbolMap, SymbolMapName, SymbolScope, SymbolSection, }; -use crate::Endianness; /// An object file. -pub trait Object<'data: 'file, 'file>: read::private::Sealed { - /// A segment in the object file. - type Segment: ObjectSegment<'data>; +/// +/// This is the primary trait for the unified read API. +pub trait Object<'data>: read::private::Sealed { + /// A loadable segment in the object file. + type Segment<'file>: ObjectSegment<'data> + where + Self: 'file, + 'data: 'file; - /// An iterator over the segments in the object file. - type SegmentIterator: Iterator; + /// An iterator for the loadable segments in the object file. + type SegmentIterator<'file>: Iterator> + where + Self: 'file, + 'data: 'file; /// A section in the object file. - type Section: ObjectSection<'data>; + type Section<'file>: ObjectSection<'data> + where + Self: 'file, + 'data: 'file; - /// An iterator over the sections in the object file. - type SectionIterator: Iterator; + /// An iterator for the sections in the object file. + type SectionIterator<'file>: Iterator> + where + Self: 'file, + 'data: 'file; /// A COMDAT section group in the object file. - type Comdat: ObjectComdat<'data>; + type Comdat<'file>: ObjectComdat<'data> + where + Self: 'file, + 'data: 'file; - /// An iterator over the COMDAT section groups in the object file. - type ComdatIterator: Iterator; + /// An iterator for the COMDAT section groups in the object file. + type ComdatIterator<'file>: Iterator> + where + Self: 'file, + 'data: 'file; /// A symbol in the object file. - type Symbol: ObjectSymbol<'data>; + type Symbol<'file>: ObjectSymbol<'data> + where + Self: 'file, + 'data: 'file; - /// An iterator over symbols in the object file. - type SymbolIterator: Iterator; + /// An iterator for symbols in the object file. + type SymbolIterator<'file>: Iterator> + where + Self: 'file, + 'data: 'file; /// A symbol table in the object file. - type SymbolTable: ObjectSymbolTable< + type SymbolTable<'file>: ObjectSymbolTable< 'data, - Symbol = Self::Symbol, - SymbolIterator = Self::SymbolIterator, - >; + Symbol = Self::Symbol<'file>, + SymbolIterator = Self::SymbolIterator<'file>, + > + where + Self: 'file, + 'data: 'file; - /// An iterator over dynamic relocations in the file. + /// An iterator for the dynamic relocations in the file. /// /// The first field in the item tuple is the address /// that the relocation applies to. - type DynamicRelocationIterator: Iterator; + type DynamicRelocationIterator<'file>: Iterator + where + Self: 'file, + 'data: 'file; /// Get the architecture type of the file. fn architecture(&self) -> Architecture; + /// Get the sub-architecture type of the file if known. + /// + /// A value of `None` has a range of meanings: the file supports all + /// sub-architectures, the file does not explicitly specify a + /// sub-architecture, or the sub-architecture is currently unrecognized. + fn sub_architecture(&self) -> Option { + None + } + /// Get the endianness of the file. #[inline] fn endianness(&self) -> Endianness { @@ -70,29 +111,39 @@ pub trait Object<'data: 'file, 'file>: read::private::Sealed { /// Return the kind of this object. fn kind(&self) -> ObjectKind; - /// Get an iterator over the segments in the file. - fn segments(&'file self) -> Self::SegmentIterator; + /// Get an iterator for the loadable segments in the file. + /// + /// For ELF, this is program headers with type [`PT_LOAD`](crate::elf::PT_LOAD). + /// For Mach-O, this is load commands with type [`LC_SEGMENT`](crate::macho::LC_SEGMENT) + /// or [`LC_SEGMENT_64`](crate::macho::LC_SEGMENT_64). + /// For PE, this is all sections. + fn segments(&self) -> Self::SegmentIterator<'_>; /// Get the section named `section_name`, if such a section exists. /// - /// If `section_name` starts with a '.' then it is treated as a system section name, - /// and is compared using the conventions specific to the object file format. This - /// includes: - /// - if ".debug_str_offsets" is requested for a Mach-O object file, then the actual - /// section name that is searched for is "__debug_str_offs". + /// If `section_name` starts with a '.' then it is treated as a system + /// section name, and is compared using the conventions specific to the + /// object file format. This includes: + /// - if ".debug_str_offsets" is requested for a Mach-O object file, then + /// the actual section name that is searched for is "__debug_str_offs". /// - if ".debug_info" is requested for an ELF object file, then - /// ".zdebug_info" may be returned (and similarly for other debug sections). + /// ".zdebug_info" may be returned (and similarly for other debug + /// sections). Similarly, if ".debug_info" is requested for a Mach-O + /// object file, then "__zdebug_info" may be returned. /// - /// For some object files, multiple segments may contain sections with the same - /// name. In this case, the first matching section will be used. + /// For some object files, multiple segments may contain sections with the + /// same name. In this case, the first matching section will be used. /// /// This method skips over sections with invalid names. - fn section_by_name(&'file self, section_name: &str) -> Option { + fn section_by_name(&self, section_name: &str) -> Option> { self.section_by_name_bytes(section_name.as_bytes()) } /// Like [`Self::section_by_name`], but allows names that are not UTF-8. - fn section_by_name_bytes(&'file self, section_name: &[u8]) -> Option; + fn section_by_name_bytes<'file>( + &'file self, + section_name: &[u8], + ) -> Option>; /// Get the section at the given index. /// @@ -101,64 +152,121 @@ pub trait Object<'data: 'file, 'file>: read::private::Sealed { /// For some object files, this requires iterating through all sections. /// /// Returns an error if the index is invalid. - fn section_by_index(&'file self, index: SectionIndex) -> Result; + fn section_by_index(&self, index: SectionIndex) -> Result>; - /// Get an iterator over the sections in the file. - fn sections(&'file self) -> Self::SectionIterator; + /// Get an iterator for the sections in the file. + fn sections(&self) -> Self::SectionIterator<'_>; - /// Get an iterator over the COMDAT section groups in the file. - fn comdats(&'file self) -> Self::ComdatIterator; + /// Get an iterator for the COMDAT section groups in the file. + fn comdats(&self) -> Self::ComdatIterator<'_>; - /// Get the symbol table, if any. - fn symbol_table(&'file self) -> Option; + /// Get the debugging symbol table, if any. + fn symbol_table(&self) -> Option>; /// Get the debugging symbol at the given index. /// /// The meaning of the index depends on the object file. /// /// Returns an error if the index is invalid. - fn symbol_by_index(&'file self, index: SymbolIndex) -> Result; + fn symbol_by_index(&self, index: SymbolIndex) -> Result>; - /// Get an iterator over the debugging symbols in the file. + /// Get an iterator for the debugging symbols in the file. /// /// This may skip over symbols that are malformed or unsupported. /// /// For Mach-O files, this does not include STAB entries. - fn symbols(&'file self) -> Self::SymbolIterator; + fn symbols(&self) -> Self::SymbolIterator<'_>; + + /// Get the symbol named `symbol_name`, if the symbol exists. + fn symbol_by_name<'file>(&'file self, symbol_name: &str) -> Option> { + self.symbol_by_name_bytes(symbol_name.as_bytes()) + } + + /// Like [`Self::symbol_by_name`], but allows names that are not UTF-8. + fn symbol_by_name_bytes<'file>(&'file self, symbol_name: &[u8]) -> Option> { + self.symbols() + .find(|sym| sym.name_bytes() == Ok(symbol_name)) + } /// Get the dynamic linking symbol table, if any. /// /// Only ELF has a separate dynamic linking symbol table. - fn dynamic_symbol_table(&'file self) -> Option; + /// Consider using [`Self::exports`] or [`Self::imports`] instead. + fn dynamic_symbol_table(&self) -> Option>; - /// Get an iterator over the dynamic linking symbols in the file. + /// Get an iterator for the dynamic linking symbols in the file. /// /// This may skip over symbols that are malformed or unsupported. /// - /// Only ELF has separate dynamic linking symbols. + /// Only ELF has dynamic linking symbols. /// Other file formats will return an empty iterator. - fn dynamic_symbols(&'file self) -> Self::SymbolIterator; + /// Consider using [`Self::exports`] or [`Self::imports`] instead. + fn dynamic_symbols(&self) -> Self::SymbolIterator<'_>; /// Get the dynamic relocations for this file. /// /// Symbol indices in these relocations refer to the dynamic symbol table. /// /// Only ELF has dynamic relocations. - fn dynamic_relocations(&'file self) -> Option; + fn dynamic_relocations(&self) -> Option>; /// Construct a map from addresses to symbol names. /// /// The map will only contain defined text and data symbols. /// The dynamic symbol table will only be used if there are no debugging symbols. - fn symbol_map(&'file self) -> SymbolMap> { + fn symbol_map(&self) -> SymbolMap> { let mut symbols = Vec::new(); if let Some(table) = self.symbol_table().or_else(|| self.dynamic_symbol_table()) { + // Sometimes symbols share addresses. Collect them all then choose the "best". + let mut all_symbols = Vec::new(); for symbol in table.symbols() { + // Must have an address. if !symbol.is_definition() { continue; } - if let Ok(name) = symbol.name() { - symbols.push(SymbolMapName::new(symbol.address(), name)); + // Must have a name. + let name = match symbol.name() { + Ok(name) => name, + _ => continue, + }; + if name.is_empty() { + continue; + } + + // Lower is better. + let mut priority = 0u32; + + // Prefer known kind. + match symbol.kind() { + SymbolKind::Text | SymbolKind::Data => {} + SymbolKind::Unknown => priority += 1, + _ => continue, + } + priority *= 2; + + // Prefer global visibility. + priority += match symbol.scope() { + SymbolScope::Unknown => 3, + SymbolScope::Compilation => 2, + SymbolScope::Linkage => 1, + SymbolScope::Dynamic => 0, + }; + priority *= 4; + + // Prefer later entries (earlier symbol is likely to be less specific). + let index = !0 - symbol.index().0; + + // Tuple is ordered for sort. + all_symbols.push((symbol.address(), priority, index, name)); + } + // Unstable sort is okay because tuple includes index. + all_symbols.sort_unstable(); + + let mut previous_address = !0; + for (address, _priority, _index, name) in all_symbols { + if address != previous_address { + symbols.push(SymbolMapName::new(address, name)); + previous_address = address; } } } @@ -168,7 +276,7 @@ pub trait Object<'data: 'file, 'file>: read::private::Sealed { /// Construct a map from addresses to symbol names and object file names. /// /// This is derived from Mach-O STAB entries. - fn object_map(&'file self) -> ObjectMap<'data> { + fn object_map(&self) -> ObjectMap<'data> { ObjectMap::default() } @@ -177,20 +285,20 @@ pub trait Object<'data: 'file, 'file>: read::private::Sealed { /// Get the exported symbols that expose both a name and an address. /// - /// Some file formats may provide other kinds of symbols, that can be retrieved using - /// the lower-level API. + /// Some file formats may provide other kinds of symbols that can be retrieved using + /// the low level API. fn exports(&self) -> Result>>; - /// Return true if the file contains debug information sections, false if not. + /// Return true if the file contains DWARF debug information sections, false if not. fn has_debug_symbols(&self) -> bool; - /// The UUID from a Mach-O `LC_UUID` load command. + /// The UUID from a Mach-O [`LC_UUID`](crate::macho::LC_UUID) load command. #[inline] fn mach_uuid(&self) -> Result> { Ok(None) } - /// The build ID from an ELF `NT_GNU_BUILD_ID` note. + /// The build ID from an ELF [`NT_GNU_BUILD_ID`](crate::elf::NT_GNU_BUILD_ID) note. #[inline] fn build_id(&self) -> Result> { Ok(None) @@ -208,7 +316,7 @@ pub trait Object<'data: 'file, 'file>: read::private::Sealed { Ok(None) } - /// The filename and GUID from the PE CodeView section + /// The filename and GUID from the PE CodeView section. #[inline] fn pdb_info(&self) -> Result>> { Ok(None) @@ -217,19 +325,18 @@ pub trait Object<'data: 'file, 'file>: read::private::Sealed { /// Get the base address used for relative virtual addresses. /// /// Currently this is only non-zero for PE. - fn relative_address_base(&'file self) -> u64; + fn relative_address_base(&self) -> u64; - /// Get the virtual address of the entry point of the binary - fn entry(&'file self) -> u64; + /// Get the virtual address of the entry point of the binary. + fn entry(&self) -> u64; /// File flags that are specific to each file format. fn flags(&self) -> FileFlags; } -/// A loadable segment defined in an object file. +/// A loadable segment in an [`Object`]. /// -/// For ELF, this is a program header with type `PT_LOAD`. -/// For Mach-O, this is a load command with type `LC_SEGMENT` or `LC_SEGMENT_64`. +/// This trait is part of the unified read API. pub trait ObjectSegment<'data>: read::private::Sealed { /// Returns the virtual address of the segment. fn address(&self) -> u64; @@ -266,9 +373,11 @@ pub trait ObjectSegment<'data>: read::private::Sealed { fn flags(&self) -> SegmentFlags; } -/// A section defined in an object file. +/// A section in an [`Object`]. +/// +/// This trait is part of the unified read API. pub trait ObjectSection<'data>: read::private::Sealed { - /// An iterator over the relocations for a section. + /// An iterator for the relocations for a section. /// /// The first field in the item tuple is the section offset /// that the relocation applies to. @@ -324,12 +433,12 @@ pub trait ObjectSection<'data>: read::private::Sealed { } /// Returns the name of the section. - fn name_bytes(&self) -> Result<&[u8]>; + fn name_bytes(&self) -> Result<&'data [u8]>; /// Returns the name of the section. /// /// Returns an error if the name is not UTF-8. - fn name(&self) -> Result<&str>; + fn name(&self) -> Result<&'data str>; /// Returns the name of the segment for this section. fn segment_name_bytes(&self) -> Result>; @@ -345,13 +454,18 @@ pub trait ObjectSection<'data>: read::private::Sealed { /// Get the relocations for this section. fn relocations(&self) -> Self::RelocationIterator; + /// Construct a relocation map for this section. + fn relocation_map(&self) -> Result; + /// Section flags that are specific to each file format. fn flags(&self) -> SectionFlags; } -/// A COMDAT section group defined in an object file. +/// A COMDAT section group in an [`Object`]. +/// +/// This trait is part of the unified read API. pub trait ObjectComdat<'data>: read::private::Sealed { - /// An iterator over the sections in the object file. + /// An iterator for the sections in the section group. type SectionIterator: Iterator; /// Returns the COMDAT selection kind. @@ -361,26 +475,28 @@ pub trait ObjectComdat<'data>: read::private::Sealed { fn symbol(&self) -> SymbolIndex; /// Returns the name of the COMDAT section group. - fn name_bytes(&self) -> Result<&[u8]>; + fn name_bytes(&self) -> Result<&'data [u8]>; /// Returns the name of the COMDAT section group. /// /// Returns an error if the name is not UTF-8. - fn name(&self) -> Result<&str>; + fn name(&self) -> Result<&'data str>; /// Get the sections in this section group. fn sections(&self) -> Self::SectionIterator; } -/// A symbol table. +/// A symbol table in an [`Object`]. +/// +/// This trait is part of the unified read API. pub trait ObjectSymbolTable<'data>: read::private::Sealed { /// A symbol table entry. type Symbol: ObjectSymbol<'data>; - /// An iterator over the symbols in a symbol table. + /// An iterator for the symbols in a symbol table. type SymbolIterator: Iterator; - /// Get an iterator over the symbols in the table. + /// Get an iterator for the symbols in the table. /// /// This may skip over symbols that are malformed or unsupported. fn symbols(&self) -> Self::SymbolIterator; @@ -393,7 +509,9 @@ pub trait ObjectSymbolTable<'data>: read::private::Sealed { fn symbol_by_index(&self, index: SymbolIndex) -> Result; } -/// A symbol table entry. +/// A symbol table entry in an [`Object`]. +/// +/// This trait is part of the unified read API. pub trait ObjectSymbol<'data>: read::private::Sealed { /// The index of the symbol. fn index(&self) -> SymbolIndex; @@ -430,11 +548,13 @@ pub trait ObjectSymbol<'data>: read::private::Sealed { /// Return true if the symbol is a definition of a function or data object /// that has a known address. + /// + /// This is primarily used to implement [`Object::symbol_map`]. fn is_definition(&self) -> bool; /// Return true if the symbol is common data. /// - /// Note: does not check for `SymbolSection::Section` with `SectionKind::Common`. + /// Note: does not check for [`SymbolSection::Section`] with [`SectionKind::Common`]. fn is_common(&self) -> bool; /// Return true if the symbol is weak. @@ -445,7 +565,7 @@ pub trait ObjectSymbol<'data>: read::private::Sealed { /// Return true if the symbol visible outside of the compilation unit. /// - /// This treats `SymbolScope::Unknown` as global. + /// This treats [`SymbolScope::Unknown`] as global. fn is_global(&self) -> bool; /// Return true if the symbol is only visible within the compilation unit. diff --git a/third_party/rust/object/src/read/util.rs b/third_party/rust/object/src/read/util.rs index 7c3c65ec9e12..93dbc17e2b90 100644 --- a/third_party/rust/object/src/read/util.rs +++ b/third_party/rust/object/src/read/util.rs @@ -4,7 +4,7 @@ use core::fmt; use core::marker::PhantomData; use crate::pod::{from_bytes, slice_from_bytes, Pod}; -use crate::ReadRef; +use crate::read::ReadRef; /// A newtype for byte slices. /// @@ -269,7 +269,7 @@ pub(crate) fn data_range( /// A table of zero-terminated strings. /// -/// This is used for most file formats. +/// This is used by most file formats for strings such as section names and symbol names. #[derive(Debug, Clone, Copy)] pub struct StringTable<'data, R = &'data [u8]> where diff --git a/third_party/rust/object/src/read/wasm.rs b/third_party/rust/object/src/read/wasm.rs index b950ef2b2a77..95828ad1b3bd 100644 --- a/third_party/rust/object/src/read/wasm.rs +++ b/third_party/rust/object/src/read/wasm.rs @@ -1,8 +1,6 @@ //! Support for reading Wasm files. //! -//! Provides `WasmFile` and related types which implement the `Object` trait. -//! -//! Currently implements the minimum required to access DWARF debugging information. +//! [`WasmFile`] implements the [`Object`] trait for Wasm files. use alloc::boxed::Box; use alloc::vec::Vec; use core::marker::PhantomData; @@ -13,9 +11,9 @@ use wasmparser as wp; use crate::read::{ self, Architecture, ComdatKind, CompressedData, CompressedFileRange, Error, Export, FileFlags, Import, NoDynamicRelocationIterator, Object, ObjectComdat, ObjectKind, ObjectSection, - ObjectSegment, ObjectSymbol, ObjectSymbolTable, ReadError, ReadRef, Relocation, Result, - SectionFlags, SectionIndex, SectionKind, SegmentFlags, SymbolFlags, SymbolIndex, SymbolKind, - SymbolScope, SymbolSection, + ObjectSegment, ObjectSymbol, ObjectSymbolTable, ReadError, ReadRef, Relocation, RelocationMap, + Result, SectionFlags, SectionIndex, SectionKind, SegmentFlags, SymbolFlags, SymbolIndex, + SymbolKind, SymbolScope, SymbolSection, }; #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -34,9 +32,10 @@ enum SectionId { Code = 10, Data = 11, DataCount = 12, + Tag = 13, } // Update this constant when adding new section id: -const MAX_SECTION_ID: usize = SectionId::DataCount as usize; +const MAX_SECTION_ID: usize = SectionId::Tag as usize; /// A WebAssembly object file. #[derive(Debug)] @@ -115,6 +114,11 @@ impl<'data, R: ReadRef<'data>> WasmFile<'data, R> { let payload = payload.read_error("Invalid Wasm section header")?; match payload { + wp::Payload::Version { encoding, .. } => { + if encoding != wp::Encoding::Module { + return Err(Error("Unsupported Wasm encoding")); + } + } wp::Payload::TypeSection(section) => { file.add_section(SectionId::Type, section.range(), ""); } @@ -207,8 +211,9 @@ impl<'data, R: ReadRef<'data>> WasmFile<'data, R> { if let Some(local_func_id) = export.index.checked_sub(imported_funcs_count) { - let local_func_kind = - &mut local_func_kinds[local_func_id as usize]; + let local_func_kind = local_func_kinds + .get_mut(local_func_id as usize) + .read_error("Invalid Wasm export index")?; if let LocalFunctionKind::Unknown = local_func_kind { *local_func_kind = LocalFunctionKind::Exported { symbol_ids: Vec::new(), @@ -275,7 +280,9 @@ impl<'data, R: ReadRef<'data>> WasmFile<'data, R> { file.entry = address; } - let local_func_kind = &mut local_func_kinds[i]; + let local_func_kind = local_func_kinds + .get_mut(i) + .read_error("Invalid Wasm code section index")?; match local_func_kind { LocalFunctionKind::Unknown => { *local_func_kind = LocalFunctionKind::Local { @@ -308,6 +315,9 @@ impl<'data, R: ReadRef<'data>> WasmFile<'data, R> { wp::Payload::DataCountSection { range, .. } => { file.add_section(SectionId::DataCount, range, ""); } + wp::Payload::TagSection(section) => { + file.add_section(SectionId::Tag, section.range(), ""); + } wp::Payload::CustomSection(section) => { let name = section.name(); let size = section.data().len(); @@ -315,9 +325,12 @@ impl<'data, R: ReadRef<'data>> WasmFile<'data, R> { range.start = range.end - size; file.add_section(SectionId::Custom, range, name); if name == "name" { - for name in - wp::NameSectionReader::new(section.data(), section.data_offset()) - { + let reader = wp::BinaryReader::new( + section.data(), + section.data_offset(), + wp::WasmFeatures::all(), + ); + for name in wp::NameSectionReader::new(reader) { // TODO: Right now, ill-formed name subsections // are silently ignored in order to maintain // compatibility with extended name sections, which @@ -362,21 +375,17 @@ impl<'data, R: ReadRef<'data>> WasmFile<'data, R> { impl<'data, R> read::private::Sealed for WasmFile<'data, R> {} -impl<'data, 'file, R: ReadRef<'data>> Object<'data, 'file> for WasmFile<'data, R> -where - 'data: 'file, - R: 'file, -{ - type Segment = WasmSegment<'data, 'file, R>; - type SegmentIterator = WasmSegmentIterator<'data, 'file, R>; - type Section = WasmSection<'data, 'file, R>; - type SectionIterator = WasmSectionIterator<'data, 'file, R>; - type Comdat = WasmComdat<'data, 'file, R>; - type ComdatIterator = WasmComdatIterator<'data, 'file, R>; - type Symbol = WasmSymbol<'data, 'file>; - type SymbolIterator = WasmSymbolIterator<'data, 'file>; - type SymbolTable = WasmSymbolTable<'data, 'file>; - type DynamicRelocationIterator = NoDynamicRelocationIterator; +impl<'data, R: ReadRef<'data>> Object<'data> for WasmFile<'data, R> { + type Segment<'file> = WasmSegment<'data, 'file, R> where Self: 'file, 'data: 'file; + type SegmentIterator<'file> = WasmSegmentIterator<'data, 'file, R> where Self: 'file, 'data: 'file; + type Section<'file> = WasmSection<'data, 'file, R> where Self: 'file, 'data: 'file; + type SectionIterator<'file> = WasmSectionIterator<'data, 'file, R> where Self: 'file, 'data: 'file; + type Comdat<'file> = WasmComdat<'data, 'file, R> where Self: 'file, 'data: 'file; + type ComdatIterator<'file> = WasmComdatIterator<'data, 'file, R> where Self: 'file, 'data: 'file; + type Symbol<'file> = WasmSymbol<'data, 'file> where Self: 'file, 'data: 'file; + type SymbolIterator<'file> = WasmSymbolIterator<'data, 'file> where Self: 'file, 'data: 'file; + type SymbolTable<'file> = WasmSymbolTable<'data, 'file> where Self: 'file, 'data: 'file; + type DynamicRelocationIterator<'file> = NoDynamicRelocationIterator where Self: 'file, 'data: 'file; #[inline] fn architecture(&self) -> Architecture { @@ -402,11 +411,11 @@ where ObjectKind::Unknown } - fn segments(&'file self) -> Self::SegmentIterator { + fn segments(&self) -> Self::SegmentIterator<'_> { WasmSegmentIterator { file: self } } - fn section_by_name_bytes( + fn section_by_name_bytes<'file>( &'file self, section_name: &[u8], ) -> Option> { @@ -414,7 +423,7 @@ where .find(|section| section.name_bytes() == Ok(section_name)) } - fn section_by_index(&'file self, index: SectionIndex) -> Result> { + fn section_by_index(&self, index: SectionIndex) -> Result> { // TODO: Missing sections should return an empty section. let id_section = self .id_sections @@ -428,19 +437,19 @@ where }) } - fn sections(&'file self) -> Self::SectionIterator { + fn sections(&self) -> Self::SectionIterator<'_> { WasmSectionIterator { file: self, sections: self.sections.iter(), } } - fn comdats(&'file self) -> Self::ComdatIterator { + fn comdats(&self) -> Self::ComdatIterator<'_> { WasmComdatIterator { file: self } } #[inline] - fn symbol_by_index(&'file self, index: SymbolIndex) -> Result> { + fn symbol_by_index(&self, index: SymbolIndex) -> Result> { let symbol = self .symbols .get(index.0) @@ -448,26 +457,26 @@ where Ok(WasmSymbol { index, symbol }) } - fn symbols(&'file self) -> Self::SymbolIterator { + fn symbols(&self) -> Self::SymbolIterator<'_> { WasmSymbolIterator { symbols: self.symbols.iter().enumerate(), } } - fn symbol_table(&'file self) -> Option> { + fn symbol_table(&self) -> Option> { Some(WasmSymbolTable { symbols: &self.symbols, }) } - fn dynamic_symbols(&'file self) -> Self::SymbolIterator { + fn dynamic_symbols(&self) -> Self::SymbolIterator<'_> { WasmSymbolIterator { symbols: [].iter().enumerate(), } } #[inline] - fn dynamic_symbol_table(&'file self) -> Option> { + fn dynamic_symbol_table(&self) -> Option> { None } @@ -495,7 +504,7 @@ where } #[inline] - fn entry(&'file self) -> u64 { + fn entry(&self) -> u64 { self.entry } @@ -505,7 +514,9 @@ where } } -/// An iterator over the segments of a `WasmFile`. +/// An iterator for the segments in a [`WasmFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct WasmSegmentIterator<'data, 'file, R = &'data [u8]> { #[allow(unused)] @@ -521,7 +532,9 @@ impl<'data, 'file, R> Iterator for WasmSegmentIterator<'data, 'file, R> { } } -/// A segment of a `WasmFile`. +/// A segment in a [`WasmFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct WasmSegment<'data, 'file, R = &'data [u8]> { #[allow(unused)] @@ -575,7 +588,7 @@ impl<'data, 'file, R> ObjectSegment<'data> for WasmSegment<'data, 'file, R> { } } -/// An iterator over the sections of a `WasmFile`. +/// An iterator for the sections in a [`WasmFile`]. #[derive(Debug)] pub struct WasmSectionIterator<'data, 'file, R = &'data [u8]> { file: &'file WasmFile<'data, R>, @@ -594,7 +607,9 @@ impl<'data, 'file, R> Iterator for WasmSectionIterator<'data, 'file, R> { } } -/// A section of a `WasmFile`. +/// A section in a [`WasmFile`]. +/// +/// Most functionality is provided by the [`ObjectSection`] trait implementation. #[derive(Debug)] pub struct WasmSection<'data, 'file, R = &'data [u8]> { file: &'file WasmFile<'data, R>, @@ -659,12 +674,12 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectSection<'data> for WasmSection<'data } #[inline] - fn name_bytes(&self) -> Result<&[u8]> { + fn name_bytes(&self) -> Result<&'data [u8]> { self.name().map(str::as_bytes) } #[inline] - fn name(&self) -> Result<&str> { + fn name(&self) -> Result<&'data str> { Ok(match self.section.id { SectionId::Custom => self.section.name, SectionId::Type => "", @@ -679,6 +694,7 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectSection<'data> for WasmSection<'data SectionId::Code => "", SectionId::Data => "", SectionId::DataCount => "", + SectionId::Tag => "", }) } @@ -711,6 +727,7 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectSection<'data> for WasmSection<'data SectionId::Code => SectionKind::Text, SectionId::Data => SectionKind::Data, SectionId::DataCount => SectionKind::UninitializedData, + SectionId::Tag => SectionKind::Data, } } @@ -719,13 +736,19 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectSection<'data> for WasmSection<'data WasmRelocationIterator(PhantomData) } + fn relocation_map(&self) -> read::Result { + RelocationMap::new(self.file, self) + } + #[inline] fn flags(&self) -> SectionFlags { SectionFlags::None } } -/// An iterator over the COMDAT section groups of a `WasmFile`. +/// An iterator for the COMDAT section groups in a [`WasmFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct WasmComdatIterator<'data, 'file, R = &'data [u8]> { #[allow(unused)] @@ -741,7 +764,9 @@ impl<'data, 'file, R> Iterator for WasmComdatIterator<'data, 'file, R> { } } -/// A COMDAT section group of a `WasmFile`. +/// A COMDAT section group in a [`WasmFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct WasmComdat<'data, 'file, R = &'data [u8]> { #[allow(unused)] @@ -764,12 +789,12 @@ impl<'data, 'file, R> ObjectComdat<'data> for WasmComdat<'data, 'file, R> { } #[inline] - fn name_bytes(&self) -> Result<&[u8]> { + fn name_bytes(&self) -> Result<&'data [u8]> { unreachable!(); } #[inline] - fn name(&self) -> Result<&str> { + fn name(&self) -> Result<&'data str> { unreachable!(); } @@ -779,7 +804,9 @@ impl<'data, 'file, R> ObjectComdat<'data> for WasmComdat<'data, 'file, R> { } } -/// An iterator over the sections in a COMDAT section group of a `WasmFile`. +/// An iterator for the sections in a COMDAT section group in a [`WasmFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct WasmComdatSectionIterator<'data, 'file, R = &'data [u8]> { #[allow(unused)] @@ -794,7 +821,7 @@ impl<'data, 'file, R> Iterator for WasmComdatSectionIterator<'data, 'file, R> { } } -/// A symbol table of a `WasmFile`. +/// A symbol table in a [`WasmFile`]. #[derive(Debug)] pub struct WasmSymbolTable<'data, 'file> { symbols: &'file [WasmSymbolInternal<'data>], @@ -821,7 +848,7 @@ impl<'data, 'file> ObjectSymbolTable<'data> for WasmSymbolTable<'data, 'file> { } } -/// An iterator over the symbols of a `WasmFile`. +/// An iterator for the symbols in a [`WasmFile`]. #[derive(Debug)] pub struct WasmSymbolIterator<'data, 'file> { symbols: core::iter::Enumerate>>, @@ -839,7 +866,9 @@ impl<'data, 'file> Iterator for WasmSymbolIterator<'data, 'file> { } } -/// A symbol of a `WasmFile`. +/// A symbol in a [`WasmFile`]. +/// +/// Most functionality is provided by the [`ObjectSymbol`] trait implementation. #[derive(Clone, Copy, Debug)] pub struct WasmSymbol<'data, 'file> { index: SymbolIndex, @@ -901,7 +930,8 @@ impl<'data, 'file> ObjectSymbol<'data> for WasmSymbol<'data, 'file> { #[inline] fn is_definition(&self) -> bool { - self.symbol.kind == SymbolKind::Text && self.symbol.section != SymbolSection::Undefined + (self.symbol.kind == SymbolKind::Text || self.symbol.kind == SymbolKind::Data) + && self.symbol.section != SymbolSection::Undefined } #[inline] @@ -935,7 +965,9 @@ impl<'data, 'file> ObjectSymbol<'data> for WasmSymbol<'data, 'file> { } } -/// An iterator over the relocations in a `WasmSection`. +/// An iterator for the relocations for a [`WasmSection`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct WasmRelocationIterator<'data, 'file, R = &'data [u8]>( PhantomData<(&'data (), &'file (), R)>, diff --git a/third_party/rust/object/src/read/xcoff/comdat.rs b/third_party/rust/object/src/read/xcoff/comdat.rs index 2b23d1dba8b8..142dd52685c1 100644 --- a/third_party/rust/object/src/read/xcoff/comdat.rs +++ b/third_party/rust/object/src/read/xcoff/comdat.rs @@ -2,20 +2,21 @@ use core::fmt::Debug; -use crate::xcoff; - use crate::read::{self, ComdatKind, ObjectComdat, ReadRef, Result, SectionIndex, SymbolIndex}; +use crate::xcoff; use super::{FileHeader, XcoffFile}; -/// An iterator over the COMDAT section groups of a `XcoffFile32`. +/// An iterator for the COMDAT section groups in a [`XcoffFile32`](super::XcoffFile32). pub type XcoffComdatIterator32<'data, 'file, R = &'data [u8]> = XcoffComdatIterator<'data, 'file, xcoff::FileHeader32, R>; -/// An iterator over the COMDAT section groups of a `XcoffFile64`. +/// An iterator for the COMDAT section groups in a [`XcoffFile64`](super::XcoffFile64). pub type XcoffComdatIterator64<'data, 'file, R = &'data [u8]> = XcoffComdatIterator<'data, 'file, xcoff::FileHeader64, R>; -/// An iterator over the COMDAT section groups of a `XcoffFile`. +/// An iterator for the COMDAT section groups in a [`XcoffFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct XcoffComdatIterator<'data, 'file, Xcoff, R = &'data [u8]> where @@ -39,15 +40,17 @@ where } } -/// A COMDAT section group of a `XcoffFile32`. +/// A COMDAT section group in a [`XcoffFile32`](super::XcoffFile32). pub type XcoffComdat32<'data, 'file, R = &'data [u8]> = XcoffComdat<'data, 'file, xcoff::FileHeader32, R>; -/// A COMDAT section group of a `XcoffFile64`. +/// A COMDAT section group in a [`XcoffFile64`](super::XcoffFile64). pub type XcoffComdat64<'data, 'file, R = &'data [u8]> = XcoffComdat<'data, 'file, xcoff::FileHeader64, R>; -/// A COMDAT section group of a `XcoffFile`. +/// A COMDAT section group in a [`XcoffFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct XcoffComdat<'data, 'file, Xcoff, R = &'data [u8]> where @@ -83,12 +86,12 @@ where } #[inline] - fn name_bytes(&self) -> Result<&[u8]> { + fn name_bytes(&self) -> Result<&'data [u8]> { unreachable!(); } #[inline] - fn name(&self) -> Result<&str> { + fn name(&self) -> Result<&'data str> { unreachable!(); } @@ -98,14 +101,16 @@ where } } -/// An iterator over the sections in a COMDAT section group of a `XcoffFile32`. +/// An iterator for the sections in a COMDAT section group in a [`XcoffFile32`](super::XcoffFile32). pub type XcoffComdatSectionIterator32<'data, 'file, R = &'data [u8]> = XcoffComdatSectionIterator<'data, 'file, xcoff::FileHeader32, R>; -/// An iterator over the sections in a COMDAT section group of a `XcoffFile64`. +/// An iterator for the sections in a COMDAT section group in a [`XcoffFile64`](super::XcoffFile64). pub type XcoffComdatSectionIterator64<'data, 'file, R = &'data [u8]> = XcoffComdatSectionIterator<'data, 'file, xcoff::FileHeader64, R>; -/// An iterator over the sections in a COMDAT section group of a `XcoffFile`. +/// An iterator for the sections in a COMDAT section group in a [`XcoffFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct XcoffComdatSectionIterator<'data, 'file, Xcoff, R = &'data [u8]> where diff --git a/third_party/rust/object/src/read/xcoff/file.rs b/third_party/rust/object/src/read/xcoff/file.rs index bac9e7075615..6d8de8ddb2b1 100644 --- a/third_party/rust/object/src/read/xcoff/file.rs +++ b/third_party/rust/object/src/read/xcoff/file.rs @@ -3,27 +3,34 @@ use core::mem; use alloc::vec::Vec; -use crate::read::{self, Error, NoDynamicRelocationIterator, Object, ReadError, ReadRef, Result}; - -use crate::{ - xcoff, Architecture, BigEndian as BE, FileFlags, ObjectKind, ObjectSection, Pod, SectionIndex, - SymbolIndex, +use crate::endian::BigEndian as BE; +use crate::pod::Pod; +use crate::read::{ + self, Architecture, Error, Export, FileFlags, Import, NoDynamicRelocationIterator, Object, + ObjectKind, ObjectSection, ReadError, ReadRef, Result, SectionIndex, SymbolIndex, }; +use crate::xcoff; use super::{ - CsectAux, FileAux, SectionHeader, SectionTable, Symbol, SymbolTable, XcoffComdat, + CsectAux, FileAux, Rel, SectionHeader, SectionTable, Symbol, SymbolTable, XcoffComdat, XcoffComdatIterator, XcoffSection, XcoffSectionIterator, XcoffSegment, XcoffSegmentIterator, XcoffSymbol, XcoffSymbolIterator, XcoffSymbolTable, }; /// A 32-bit XCOFF object file. +/// +/// This is a file that starts with [`xcoff::FileHeader32`], and corresponds +/// to [`crate::FileKind::Xcoff32`]. pub type XcoffFile32<'data, R = &'data [u8]> = XcoffFile<'data, xcoff::FileHeader32, R>; /// A 64-bit XCOFF object file. +/// +/// This is a file that starts with [`xcoff::FileHeader64`], and corresponds +/// to [`crate::FileKind::Xcoff64`]. pub type XcoffFile64<'data, R = &'data [u8]> = XcoffFile<'data, xcoff::FileHeader64, R>; /// A partially parsed XCOFF file. /// -/// Most of the functionality of this type is provided by the `Object` trait implementation. +/// Most functionality is provided by the [`Object`] trait implementation. #[derive(Debug)] pub struct XcoffFile<'data, Xcoff, R = &'data [u8]> where @@ -65,9 +72,30 @@ where } /// Returns the raw XCOFF file header. + #[deprecated(note = "Use `xcoff_header` instead")] pub fn raw_header(&self) -> &'data Xcoff { self.header } + + /// Get the raw XCOFF file header. + pub fn xcoff_header(&self) -> &'data Xcoff { + self.header + } + + /// Get the raw XCOFF auxiliary header. + pub fn xcoff_aux_header(&self) -> Option<&'data Xcoff::AuxHeader> { + self.aux_header + } + + /// Get the XCOFF section table. + pub fn xcoff_section_table(&self) -> &SectionTable<'data, Xcoff> { + &self.sections + } + + /// Get the XCOFF symbol table. + pub fn xcoff_symbol_table(&self) -> &SymbolTable<'data, Xcoff, R> { + &self.symbols + } } impl<'data, Xcoff, R> read::private::Sealed for XcoffFile<'data, Xcoff, R> @@ -77,24 +105,23 @@ where { } -impl<'data, 'file, Xcoff, R> Object<'data, 'file> for XcoffFile<'data, Xcoff, R> +impl<'data, Xcoff, R> Object<'data> for XcoffFile<'data, Xcoff, R> where - 'data: 'file, Xcoff: FileHeader, - R: 'file + ReadRef<'data>, + R: ReadRef<'data>, { - type Segment = XcoffSegment<'data, 'file, Xcoff, R>; - type SegmentIterator = XcoffSegmentIterator<'data, 'file, Xcoff, R>; - type Section = XcoffSection<'data, 'file, Xcoff, R>; - type SectionIterator = XcoffSectionIterator<'data, 'file, Xcoff, R>; - type Comdat = XcoffComdat<'data, 'file, Xcoff, R>; - type ComdatIterator = XcoffComdatIterator<'data, 'file, Xcoff, R>; - type Symbol = XcoffSymbol<'data, 'file, Xcoff, R>; - type SymbolIterator = XcoffSymbolIterator<'data, 'file, Xcoff, R>; - type SymbolTable = XcoffSymbolTable<'data, 'file, Xcoff, R>; - type DynamicRelocationIterator = NoDynamicRelocationIterator; + type Segment<'file> = XcoffSegment<'data, 'file, Xcoff, R> where Self: 'file, 'data: 'file; + type SegmentIterator<'file> = XcoffSegmentIterator<'data, 'file, Xcoff, R> where Self: 'file, 'data: 'file; + type Section<'file> = XcoffSection<'data, 'file, Xcoff, R> where Self: 'file, 'data: 'file; + type SectionIterator<'file> = XcoffSectionIterator<'data, 'file, Xcoff, R> where Self: 'file, 'data: 'file; + type Comdat<'file> = XcoffComdat<'data, 'file, Xcoff, R> where Self: 'file, 'data: 'file; + type ComdatIterator<'file> = XcoffComdatIterator<'data, 'file, Xcoff, R> where Self: 'file, 'data: 'file; + type Symbol<'file> = XcoffSymbol<'data, 'file, Xcoff, R> where Self: 'file, 'data: 'file; + type SymbolIterator<'file> = XcoffSymbolIterator<'data, 'file, Xcoff, R> where Self: 'file, 'data: 'file; + type SymbolTable<'file> = XcoffSymbolTable<'data, 'file, Xcoff, R> where Self: 'file, 'data: 'file; + type DynamicRelocationIterator<'file> = NoDynamicRelocationIterator where Self: 'file, 'data: 'file; - fn architecture(&self) -> crate::Architecture { + fn architecture(&self) -> Architecture { if self.is_64() { Architecture::PowerPc64 } else { @@ -123,11 +150,11 @@ where } } - fn segments(&'file self) -> XcoffSegmentIterator<'data, 'file, Xcoff, R> { + fn segments(&self) -> XcoffSegmentIterator<'data, '_, Xcoff, R> { XcoffSegmentIterator { file: self } } - fn section_by_name_bytes( + fn section_by_name_bytes<'file>( &'file self, section_name: &[u8], ) -> Option> { @@ -135,10 +162,7 @@ where .find(|section| section.name_bytes() == Ok(section_name)) } - fn section_by_index( - &'file self, - index: SectionIndex, - ) -> Result> { + fn section_by_index(&self, index: SectionIndex) -> Result> { let section = self.sections.section(index)?; Ok(XcoffSection { file: self, @@ -147,18 +171,18 @@ where }) } - fn sections(&'file self) -> XcoffSectionIterator<'data, 'file, Xcoff, R> { + fn sections(&self) -> XcoffSectionIterator<'data, '_, Xcoff, R> { XcoffSectionIterator { file: self, iter: self.sections.iter().enumerate(), } } - fn comdats(&'file self) -> XcoffComdatIterator<'data, 'file, Xcoff, R> { + fn comdats(&self) -> XcoffComdatIterator<'data, '_, Xcoff, R> { XcoffComdatIterator { file: self } } - fn symbol_table(&'file self) -> Option> { + fn symbol_table(&self) -> Option> { if self.symbols.is_empty() { return None; } @@ -168,11 +192,8 @@ where }) } - fn symbol_by_index( - &'file self, - index: SymbolIndex, - ) -> Result> { - let symbol = self.symbols.symbol(index.0)?; + fn symbol_by_index(&self, index: SymbolIndex) -> Result> { + let symbol = self.symbols.symbol(index)?; Ok(XcoffSymbol { symbols: &self.symbols, index, @@ -181,39 +202,38 @@ where }) } - fn symbols(&'file self) -> XcoffSymbolIterator<'data, 'file, Xcoff, R> { + fn symbols(&self) -> XcoffSymbolIterator<'data, '_, Xcoff, R> { XcoffSymbolIterator { - symbols: &self.symbols, - index: 0, file: self, + symbols: self.symbols.iter(), } } - fn dynamic_symbol_table(&'file self) -> Option> { + fn dynamic_symbol_table<'file>( + &'file self, + ) -> Option> { None } - fn dynamic_symbols(&'file self) -> XcoffSymbolIterator<'data, 'file, Xcoff, R> { + fn dynamic_symbols(&self) -> XcoffSymbolIterator<'data, '_, Xcoff, R> { // TODO: return the symbols in the STYP_LOADER section. XcoffSymbolIterator { file: self, - symbols: &self.symbols, - // Hack: don't return any. - index: self.symbols.len(), + symbols: self.symbols.iter_none(), } } - fn dynamic_relocations(&'file self) -> Option { + fn dynamic_relocations(&self) -> Option> { // TODO: return the relocations in the STYP_LOADER section. None } - fn imports(&self) -> Result>> { + fn imports(&self) -> Result>> { // TODO: return the imports in the STYP_LOADER section. Ok(Vec::new()) } - fn exports(&self) -> Result>> { + fn exports(&self) -> Result>> { // TODO: return the exports in the STYP_LOADER section. Ok(Vec::new()) } @@ -222,11 +242,11 @@ where self.section_by_name(".debug").is_some() || self.section_by_name(".dwinfo").is_some() } - fn relative_address_base(&'file self) -> u64 { + fn relative_address_base(&self) -> u64 { 0 } - fn entry(&'file self) -> u64 { + fn entry(&self) -> u64 { if let Some(aux_header) = self.aux_header { aux_header.o_entry().into() } else { @@ -241,15 +261,16 @@ where } } -/// A trait for generic access to `FileHeader32` and `FileHeader64`. +/// A trait for generic access to [`xcoff::FileHeader32`] and [`xcoff::FileHeader64`]. #[allow(missing_docs)] pub trait FileHeader: Debug + Pod { type Word: Into; type AuxHeader: AuxHeader; - type SectionHeader: SectionHeader; + type SectionHeader: SectionHeader; type Symbol: Symbol; type FileAux: FileAux; type CsectAux: CsectAux; + type Rel: Rel; /// Return true if this type is a 64-bit header. fn is_type_64(&self) -> bool; @@ -332,6 +353,7 @@ impl FileHeader for xcoff::FileHeader32 { type Symbol = xcoff::Symbol32; type FileAux = xcoff::FileAux32; type CsectAux = xcoff::CsectAux32; + type Rel = xcoff::Rel32; fn is_type_64(&self) -> bool { false @@ -373,6 +395,7 @@ impl FileHeader for xcoff::FileHeader64 { type Symbol = xcoff::Symbol64; type FileAux = xcoff::FileAux64; type CsectAux = xcoff::CsectAux64; + type Rel = xcoff::Rel64; fn is_type_64(&self) -> bool { true @@ -407,10 +430,12 @@ impl FileHeader for xcoff::FileHeader64 { } } +/// A trait for generic access to [`xcoff::AuxHeader32`] and [`xcoff::AuxHeader64`]. #[allow(missing_docs)] pub trait AuxHeader: Debug + Pod { type Word: Into; + fn o_mflag(&self) -> u16; fn o_vstamp(&self) -> u16; fn o_tsize(&self) -> Self::Word; fn o_dsize(&self) -> Self::Word; @@ -425,20 +450,30 @@ pub trait AuxHeader: Debug + Pod { fn o_sntoc(&self) -> u16; fn o_snloader(&self) -> u16; fn o_snbss(&self) -> u16; - fn o_sntdata(&self) -> u16; - fn o_sntbss(&self) -> u16; fn o_algntext(&self) -> u16; fn o_algndata(&self) -> u16; + fn o_modtype(&self) -> u16; + fn o_cpuflag(&self) -> u8; + fn o_cputype(&self) -> u8; fn o_maxstack(&self) -> Self::Word; fn o_maxdata(&self) -> Self::Word; + fn o_debugger(&self) -> u32; fn o_textpsize(&self) -> u8; fn o_datapsize(&self) -> u8; fn o_stackpsize(&self) -> u8; + fn o_flags(&self) -> u8; + fn o_sntdata(&self) -> u16; + fn o_sntbss(&self) -> u16; + fn o_x64flags(&self) -> Option; } impl AuxHeader for xcoff::AuxHeader32 { type Word = u32; + fn o_mflag(&self) -> u16 { + self.o_mflag.get(BE) + } + fn o_vstamp(&self) -> u16 { self.o_vstamp.get(BE) } @@ -495,14 +530,6 @@ impl AuxHeader for xcoff::AuxHeader32 { self.o_snbss.get(BE) } - fn o_sntdata(&self) -> u16 { - self.o_sntdata.get(BE) - } - - fn o_sntbss(&self) -> u16 { - self.o_sntbss.get(BE) - } - fn o_algntext(&self) -> u16 { self.o_algntext.get(BE) } @@ -511,6 +538,18 @@ impl AuxHeader for xcoff::AuxHeader32 { self.o_algndata.get(BE) } + fn o_modtype(&self) -> u16 { + self.o_modtype.get(BE) + } + + fn o_cpuflag(&self) -> u8 { + self.o_cpuflag + } + + fn o_cputype(&self) -> u8 { + self.o_cputype + } + fn o_maxstack(&self) -> Self::Word { self.o_maxstack.get(BE) } @@ -519,6 +558,10 @@ impl AuxHeader for xcoff::AuxHeader32 { self.o_maxdata.get(BE) } + fn o_debugger(&self) -> u32 { + self.o_debugger.get(BE) + } + fn o_textpsize(&self) -> u8 { self.o_textpsize } @@ -530,11 +573,31 @@ impl AuxHeader for xcoff::AuxHeader32 { fn o_stackpsize(&self) -> u8 { self.o_stackpsize } + + fn o_flags(&self) -> u8 { + self.o_flags + } + + fn o_sntdata(&self) -> u16 { + self.o_sntdata.get(BE) + } + + fn o_sntbss(&self) -> u16 { + self.o_sntbss.get(BE) + } + + fn o_x64flags(&self) -> Option { + None + } } impl AuxHeader for xcoff::AuxHeader64 { type Word = u64; + fn o_mflag(&self) -> u16 { + self.o_mflag.get(BE) + } + fn o_vstamp(&self) -> u16 { self.o_vstamp.get(BE) } @@ -591,14 +654,6 @@ impl AuxHeader for xcoff::AuxHeader64 { self.o_snbss.get(BE) } - fn o_sntdata(&self) -> u16 { - self.o_sntdata.get(BE) - } - - fn o_sntbss(&self) -> u16 { - self.o_sntbss.get(BE) - } - fn o_algntext(&self) -> u16 { self.o_algntext.get(BE) } @@ -607,6 +662,18 @@ impl AuxHeader for xcoff::AuxHeader64 { self.o_algndata.get(BE) } + fn o_modtype(&self) -> u16 { + self.o_modtype.get(BE) + } + + fn o_cpuflag(&self) -> u8 { + self.o_cpuflag + } + + fn o_cputype(&self) -> u8 { + self.o_cputype + } + fn o_maxstack(&self) -> Self::Word { self.o_maxstack.get(BE) } @@ -615,6 +682,10 @@ impl AuxHeader for xcoff::AuxHeader64 { self.o_maxdata.get(BE) } + fn o_debugger(&self) -> u32 { + self.o_debugger.get(BE) + } + fn o_textpsize(&self) -> u8 { self.o_textpsize } @@ -626,4 +697,20 @@ impl AuxHeader for xcoff::AuxHeader64 { fn o_stackpsize(&self) -> u8 { self.o_stackpsize } + + fn o_flags(&self) -> u8 { + self.o_flags + } + + fn o_sntdata(&self) -> u16 { + self.o_sntdata.get(BE) + } + + fn o_sntbss(&self) -> u16 { + self.o_sntbss.get(BE) + } + + fn o_x64flags(&self) -> Option { + Some(self.o_x64flags.get(BE)) + } } diff --git a/third_party/rust/object/src/read/xcoff/mod.rs b/third_party/rust/object/src/read/xcoff/mod.rs index 136e31073b73..b75c4da275a4 100644 --- a/third_party/rust/object/src/read/xcoff/mod.rs +++ b/third_party/rust/object/src/read/xcoff/mod.rs @@ -1,6 +1,48 @@ //! Support for reading AIX XCOFF files. //! -//! Provides `XcoffFile` and related types which implement the `Object` trait. +//! Traits are used to abstract over the difference between 32-bit and 64-bit XCOFF. +//! The primary trait for this is [`FileHeader`]. +//! +//! ## High level API +//! +//! [`XcoffFile`] implements the [`Object`](crate::read::Object) trait for XCOFF files. +//! [`XcoffFile`] is parameterised by [`FileHeader`] to allow reading both 32-bit and +//! 64-bit XCOFF. There are type aliases for these parameters ([`XcoffFile32`] and +//! [`XcoffFile64`]). +//! +//! ## Low level API +//! +//! The [`FileHeader`] trait can be directly used to parse both [`xcoff::FileHeader32`] +//! and [`xcoff::FileHeader64`]. +//! +//! ### Example for low level API +//! ```no_run +//! use object::xcoff; +//! use object::read::xcoff::{FileHeader, SectionHeader, Symbol}; +//! use std::error::Error; +//! use std::fs; +//! +//! /// Reads a file and displays the name of each section and symbol. +//! fn main() -> Result<(), Box> { +//! # #[cfg(feature = "std")] { +//! let data = fs::read("path/to/binary")?; +//! let mut offset = 0; +//! let header = xcoff::FileHeader64::parse(&*data, &mut offset)?; +//! let aux_header = header.aux_header(&*data, &mut offset)?; +//! let sections = header.sections(&*data, &mut offset)?; +//! let symbols = header.symbols(&*data)?; +//! for section in sections.iter() { +//! println!("{}", String::from_utf8_lossy(section.name())); +//! } +//! for (_index, symbol) in symbols.iter() { +//! println!("{}", String::from_utf8_lossy(symbol.name(symbols.strings())?)); +//! } +//! # } +//! Ok(()) +//! } +//! ``` +#[cfg(doc)] +use crate::xcoff; mod file; pub use file::*; diff --git a/third_party/rust/object/src/read/xcoff/relocation.rs b/third_party/rust/object/src/read/xcoff/relocation.rs index 78c6acfc7f02..bfabf87bd809 100644 --- a/third_party/rust/object/src/read/xcoff/relocation.rs +++ b/third_party/rust/object/src/read/xcoff/relocation.rs @@ -2,21 +2,24 @@ use alloc::fmt; use core::fmt::Debug; use core::slice; +use crate::endian::BigEndian as BE; use crate::pod::Pod; -use crate::{xcoff, BigEndian as BE, Relocation}; - -use crate::read::{ReadRef, RelocationEncoding, RelocationKind, RelocationTarget, SymbolIndex}; +use crate::read::{ + ReadRef, Relocation, RelocationEncoding, RelocationFlags, RelocationKind, RelocationTarget, + SymbolIndex, +}; +use crate::xcoff; use super::{FileHeader, SectionHeader, XcoffFile}; -/// An iterator over the relocations in a `XcoffSection32`. +/// An iterator for the relocations in an [`XcoffSection32`](super::XcoffSection32). pub type XcoffRelocationIterator32<'data, 'file, R = &'data [u8]> = XcoffRelocationIterator<'data, 'file, xcoff::FileHeader32, R>; -/// An iterator over the relocations in a `XcoffSection64`. +/// An iterator for the relocations in an [`XcoffSection64`](super::XcoffSection64). pub type XcoffRelocationIterator64<'data, 'file, R = &'data [u8]> = XcoffRelocationIterator<'data, 'file, xcoff::FileHeader64, R>; -/// An iterator over the relocations in a `XcoffSection`. +/// An iterator for the relocations in an [`XcoffSection`](super::XcoffSection). pub struct XcoffRelocationIterator<'data, 'file, Xcoff, R = &'data [u8]> where Xcoff: FileHeader, @@ -37,8 +40,11 @@ where fn next(&mut self) -> Option { self.relocations.next().map(|relocation| { + let r_rtype = relocation.r_rtype(); + let r_rsize = relocation.r_rsize(); + let flags = RelocationFlags::Xcoff { r_rtype, r_rsize }; let encoding = RelocationEncoding::Generic; - let (kind, addend) = match relocation.r_rtype() { + let (kind, addend) = match r_rtype { xcoff::R_POS | xcoff::R_RL | xcoff::R_RLA @@ -47,10 +53,10 @@ where | xcoff::R_TLS => (RelocationKind::Absolute, 0), xcoff::R_REL | xcoff::R_BR | xcoff::R_RBR => (RelocationKind::Relative, -4), xcoff::R_TOC | xcoff::R_TOCL | xcoff::R_TOCU => (RelocationKind::Got, 0), - r_type => (RelocationKind::Xcoff(r_type), 0), + _ => (RelocationKind::Unknown, 0), }; - let size = (relocation.r_rsize() & 0x3F) + 1; - let target = RelocationTarget::Symbol(SymbolIndex(relocation.r_symndx() as usize)); + let size = (r_rsize & 0x3F) + 1; + let target = RelocationTarget::Symbol(relocation.symbol()); ( relocation.r_vaddr().into(), Relocation { @@ -60,6 +66,7 @@ where target, addend, implicit_addend: true, + flags, }, ) }) @@ -76,7 +83,7 @@ where } } -/// A trait for generic access to `Rel32` and `Rel64`. +/// A trait for generic access to [`xcoff::Rel32`] and [`xcoff::Rel64`]. #[allow(missing_docs)] pub trait Rel: Debug + Pod { type Word: Into; @@ -84,6 +91,10 @@ pub trait Rel: Debug + Pod { fn r_symndx(&self) -> u32; fn r_rsize(&self) -> u8; fn r_rtype(&self) -> u8; + + fn symbol(&self) -> SymbolIndex { + SymbolIndex(self.r_symndx() as usize) + } } impl Rel for xcoff::Rel32 { diff --git a/third_party/rust/object/src/read/xcoff/section.rs b/third_party/rust/object/src/read/xcoff/section.rs index 77453fcd21af..dac3c2a3675f 100644 --- a/third_party/rust/object/src/read/xcoff/section.rs +++ b/third_party/rust/object/src/read/xcoff/section.rs @@ -1,22 +1,24 @@ use core::fmt::Debug; use core::{iter, result, slice, str}; -use crate::{ - xcoff, BigEndian as BE, CompressedData, CompressedFileRange, Pod, SectionFlags, SectionKind, +use crate::endian::BigEndian as BE; +use crate::pod::Pod; +use crate::read::{ + self, CompressedData, CompressedFileRange, Error, ObjectSection, ReadError, ReadRef, + RelocationMap, Result, SectionFlags, SectionIndex, SectionKind, }; - -use crate::read::{self, Error, ObjectSection, ReadError, ReadRef, Result, SectionIndex}; +use crate::xcoff; use super::{AuxHeader, FileHeader, Rel, XcoffFile, XcoffRelocationIterator}; -/// An iterator over the sections of an `XcoffFile32`. +/// An iterator for the sections in an [`XcoffFile32`](super::XcoffFile32). pub type XcoffSectionIterator32<'data, 'file, R = &'data [u8]> = XcoffSectionIterator<'data, 'file, xcoff::FileHeader32, R>; -/// An iterator over the sections of an `XcoffFile64`. +/// An iterator for the sections in an [`XcoffFile64`](super::XcoffFile64). pub type XcoffSectionIterator64<'data, 'file, R = &'data [u8]> = XcoffSectionIterator<'data, 'file, xcoff::FileHeader64, R>; -/// An iterator over the sections of an `XcoffFile`. +/// An iterator for the sections in an [`XcoffFile`]. #[derive(Debug)] pub struct XcoffSectionIterator<'data, 'file, Xcoff, R = &'data [u8]> where @@ -43,14 +45,16 @@ where } } -/// A section of an `XcoffFile32`. +/// A section in an [`XcoffFile32`](super::XcoffFile32). pub type XcoffSection32<'data, 'file, R = &'data [u8]> = XcoffSection<'data, 'file, xcoff::FileHeader32, R>; -/// A section of an `XcoffFile64`. +/// A section in an [`XcoffFile64`](super::XcoffFile64). pub type XcoffSection64<'data, 'file, R = &'data [u8]> = XcoffSection<'data, 'file, xcoff::FileHeader64, R>; -/// A section of an `XcoffFile`. +/// A section in an [`XcoffFile`]. +/// +/// Most functionality is provided by the [`ObjectSection`] trait implementation. #[derive(Debug)] pub struct XcoffSection<'data, 'file, Xcoff, R = &'data [u8]> where @@ -63,6 +67,21 @@ where } impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> XcoffSection<'data, 'file, Xcoff, R> { + /// Get the XCOFF file containing this section. + pub fn xcoff_file(&self) -> &'file XcoffFile<'data, Xcoff, R> { + self.file + } + + /// Get the raw XCOFF section header. + pub fn xcoff_section(&self) -> &'data Xcoff::SectionHeader { + self.section + } + + /// Get the raw XCOFF relocation entries for this section. + pub fn xcoff_relocations(&self) -> Result<&'data [Xcoff::Rel]> { + self.section.relocations(self.file.data) + } + fn bytes(&self) -> Result<&'data [u8]> { self.section .data(self.file.data) @@ -134,11 +153,11 @@ where self.data().map(CompressedData::none) } - fn name_bytes(&self) -> read::Result<&[u8]> { + fn name_bytes(&self) -> read::Result<&'data [u8]> { Ok(self.section.name()) } - fn name(&self) -> read::Result<&str> { + fn name(&self) -> read::Result<&'data str> { let name = self.name_bytes()?; str::from_utf8(name) .ok() @@ -180,13 +199,17 @@ where } fn relocations(&self) -> Self::RelocationIterator { - let rel = self.section.relocations(self.file.data).unwrap_or(&[]); + let rel = self.xcoff_relocations().unwrap_or(&[]); XcoffRelocationIterator { file: self.file, relocations: rel.iter(), } } + fn relocation_map(&self) -> read::Result { + RelocationMap::new(self.file, self) + } + fn flags(&self) -> SectionFlags { SectionFlags::Xcoff { s_flags: self.section.s_flags(), @@ -199,6 +222,8 @@ where } /// The table of section headers in an XCOFF file. +/// +/// Returned by [`FileHeader::sections`]. #[derive(Debug, Clone, Copy)] pub struct SectionTable<'data, Xcoff: FileHeader> { sections: &'data [Xcoff::SectionHeader], @@ -260,7 +285,7 @@ where } } -/// A trait for generic access to `SectionHeader32` and `SectionHeader64`. +/// A trait for generic access to [`xcoff::SectionHeader32`] and [`xcoff::SectionHeader64`]. #[allow(missing_docs)] pub trait SectionHeader: Debug + Pod { type Word: Into; diff --git a/third_party/rust/object/src/read/xcoff/segment.rs b/third_party/rust/object/src/read/xcoff/segment.rs index 7eca72367a98..a45c52254322 100644 --- a/third_party/rust/object/src/read/xcoff/segment.rs +++ b/third_party/rust/object/src/read/xcoff/segment.rs @@ -3,19 +3,21 @@ use core::fmt::Debug; use core::str; -use crate::read::{self, ObjectSegment, ReadRef, Result}; +use crate::read::{self, ObjectSegment, ReadRef, Result, SegmentFlags}; use crate::xcoff; use super::{FileHeader, XcoffFile}; -/// An iterator over the segments of an `XcoffFile32`. +/// An iterator for the segments in an [`XcoffFile32`](super::XcoffFile32). pub type XcoffSegmentIterator32<'data, 'file, R = &'data [u8]> = XcoffSegmentIterator<'data, 'file, xcoff::FileHeader32, R>; -/// An iterator over the segments of an `XcoffFile64`. +/// An iterator for the segments in an [`XcoffFile64`](super::XcoffFile64). pub type XcoffSegmentIterator64<'data, 'file, R = &'data [u8]> = XcoffSegmentIterator<'data, 'file, xcoff::FileHeader64, R>; -/// An iterator over the segments of an `XcoffFile`. +/// An iterator for the segments in an [`XcoffFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct XcoffSegmentIterator<'data, 'file, Xcoff, R = &'data [u8]> where @@ -38,14 +40,16 @@ where } } -/// A segment of an `XcoffFile32`. +/// A segment in an [`XcoffFile32`](super::XcoffFile32). pub type XcoffSegment32<'data, 'file, R = &'data [u8]> = XcoffSegment<'data, 'file, xcoff::FileHeader32, R>; -/// A segment of an `XcoffFile64`. +/// A segment in an [`XcoffFile64`](super::XcoffFile64). pub type XcoffSegment64<'data, 'file, R = &'data [u8]> = XcoffSegment<'data, 'file, xcoff::FileHeader64, R>; -/// A loadable section of an `XcoffFile`. +/// A loadable section in an [`XcoffFile`]. +/// +/// This is a stub that doesn't implement any functionality. #[derive(Debug)] pub struct XcoffSegment<'data, 'file, Xcoff, R = &'data [u8]> where @@ -107,7 +111,7 @@ where unreachable!(); } - fn flags(&self) -> crate::SegmentFlags { + fn flags(&self) -> SegmentFlags { unreachable!(); } } diff --git a/third_party/rust/object/src/read/xcoff/symbol.rs b/third_party/rust/object/src/read/xcoff/symbol.rs index 7ce215fac018..248976a68bf1 100644 --- a/third_party/rust/object/src/read/xcoff/symbol.rs +++ b/third_party/rust/object/src/read/xcoff/symbol.rs @@ -6,19 +6,19 @@ use core::str; use crate::endian::{BigEndian as BE, U32Bytes}; use crate::pod::{bytes_of, Pod}; -use crate::read::util::StringTable; -use crate::xcoff; - use crate::read::{ self, Bytes, Error, ObjectSymbol, ObjectSymbolTable, ReadError, ReadRef, Result, SectionIndex, - SymbolFlags, SymbolIndex, SymbolKind, SymbolScope, SymbolSection, + StringTable, SymbolFlags, SymbolIndex, SymbolKind, SymbolScope, SymbolSection, }; +use crate::xcoff; use super::{FileHeader, XcoffFile}; /// A table of symbol entries in an XCOFF file. /// /// Also includes the string table used for the symbol names. +/// +/// Returned by [`FileHeader::symbols`]. #[derive(Debug)] pub struct SymbolTable<'data, Xcoff, R = &'data [u8]> where @@ -80,9 +80,36 @@ where }) } + /// Return the string table used for the symbol names. + #[inline] + pub fn strings(&self) -> StringTable<'data, R> { + self.strings + } + + /// Iterate over the symbols. + /// + /// This does not return null symbols. + #[inline] + pub fn iter<'table>(&'table self) -> SymbolIterator<'data, 'table, Xcoff, R> { + SymbolIterator { + symbols: self, + index: 0, + } + } + + /// Empty symbol iterator. + #[inline] + pub(super) fn iter_none<'table>(&'table self) -> SymbolIterator<'data, 'table, Xcoff, R> { + SymbolIterator { + symbols: self, + index: self.symbols.len(), + } + } + /// Return the symbol entry at the given index and offset. - pub fn get(&self, index: usize, offset: usize) -> Result<&'data T> { + pub fn get(&self, index: SymbolIndex, offset: usize) -> Result<&'data T> { let entry = index + .0 .checked_add(offset) .and_then(|x| self.symbols.get(x)) .read_error("Invalid XCOFF symbol index")?; @@ -90,13 +117,27 @@ where Bytes(bytes).read().read_error("Invalid XCOFF symbol data") } - /// Return the symbol at the given index. - pub fn symbol(&self, index: usize) -> Result<&'data Xcoff::Symbol> { + /// Get the symbol at the given index. + /// + /// This does not check if the symbol is null, but does check if the index is in bounds. + fn symbol_unchecked(&self, index: SymbolIndex) -> Result<&'data Xcoff::Symbol> { self.get::(index, 0) } + /// Get the symbol at the given index. + /// + /// Returns an error for null symbols and out of bounds indices. + /// Note that this is unable to check whether the index is an auxiliary symbol. + pub fn symbol(&self, index: SymbolIndex) -> Result<&'data Xcoff::Symbol> { + let symbol = self.symbol_unchecked(index)?; + if symbol.is_null() { + return Err(Error("Invalid XCOFF symbol index")); + } + Ok(symbol) + } + /// Return a file auxiliary symbol. - pub fn aux_file(&self, index: usize, offset: usize) -> Result<&'data Xcoff::FileAux> { + pub fn aux_file(&self, index: SymbolIndex, offset: usize) -> Result<&'data Xcoff::FileAux> { debug_assert!(self.symbol(index)?.has_aux_file()); let aux_file = self.get::(index, offset)?; if let Some(aux_type) = aux_file.x_auxtype() { @@ -108,7 +149,7 @@ where } /// Return the csect auxiliary symbol. - pub fn aux_csect(&self, index: usize, offset: usize) -> Result<&'data Xcoff::CsectAux> { + pub fn aux_csect(&self, index: SymbolIndex, offset: usize) -> Result<&'data Xcoff::CsectAux> { debug_assert!(self.symbol(index)?.has_aux_csect()); let aux_csect = self.get::(index, offset)?; if let Some(aux_type) = aux_csect.x_auxtype() { @@ -134,21 +175,51 @@ where } } -/// A symbol table of an `XcoffFile32`. +/// An iterator for symbol entries in an XCOFF file. +/// +/// Yields the index and symbol structure for each symbol. +#[derive(Debug)] +pub struct SymbolIterator<'data, 'table, Xcoff, R = &'data [u8]> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + symbols: &'table SymbolTable<'data, Xcoff, R>, + index: usize, +} + +impl<'data, 'table, Xcoff: FileHeader, R: ReadRef<'data>> Iterator + for SymbolIterator<'data, 'table, Xcoff, R> +{ + type Item = (SymbolIndex, &'data Xcoff::Symbol); + + fn next(&mut self) -> Option { + loop { + let index = SymbolIndex(self.index); + let symbol = self.symbols.symbol_unchecked(index).ok()?; + self.index += 1 + symbol.n_numaux() as usize; + if !symbol.is_null() { + return Some((index, symbol)); + } + } + } +} + +/// A symbol table in an [`XcoffFile32`](super::XcoffFile32). pub type XcoffSymbolTable32<'data, 'file, R = &'data [u8]> = XcoffSymbolTable<'data, 'file, xcoff::FileHeader32, R>; -/// A symbol table of an `XcoffFile64`. +/// A symbol table in an [`XcoffFile64`](super::XcoffFile64). pub type XcoffSymbolTable64<'data, 'file, R = &'data [u8]> = XcoffSymbolTable<'data, 'file, xcoff::FileHeader64, R>; -/// A symbol table of an `XcoffFile`. +/// A symbol table in an [`XcoffFile`]. #[derive(Debug, Clone, Copy)] pub struct XcoffSymbolTable<'data, 'file, Xcoff, R = &'data [u8]> where Xcoff: FileHeader, R: ReadRef<'data>, { - pub(crate) file: &'file XcoffFile<'data, Xcoff, R>, + pub(super) file: &'file XcoffFile<'data, Xcoff, R>, pub(super) symbols: &'file SymbolTable<'data, Xcoff, R>, } @@ -166,13 +237,12 @@ impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> ObjectSymbolTable<'data fn symbols(&self) -> Self::SymbolIterator { XcoffSymbolIterator { file: self.file, - symbols: self.symbols, - index: 0, + symbols: self.symbols.iter(), } } fn symbol_by_index(&self, index: SymbolIndex) -> read::Result { - let symbol = self.symbols.symbol(index.0)?; + let symbol = self.symbols.symbol(index)?; Ok(XcoffSymbol { file: self.file, symbols: self.symbols, @@ -182,22 +252,21 @@ impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> ObjectSymbolTable<'data } } -/// An iterator over the symbols of an `XcoffFile32`. +/// An iterator for the symbols in an [`XcoffFile32`](super::XcoffFile32). pub type XcoffSymbolIterator32<'data, 'file, R = &'data [u8]> = XcoffSymbolIterator<'data, 'file, xcoff::FileHeader32, R>; -/// An iterator over the symbols of an `XcoffFile64`. +/// An iterator for the symbols in an [`XcoffFile64`](super::XcoffFile64). pub type XcoffSymbolIterator64<'data, 'file, R = &'data [u8]> = XcoffSymbolIterator<'data, 'file, xcoff::FileHeader64, R>; -/// An iterator over the symbols of an `XcoffFile`. +/// An iterator for the symbols in an [`XcoffFile`]. pub struct XcoffSymbolIterator<'data, 'file, Xcoff, R = &'data [u8]> where Xcoff: FileHeader, R: ReadRef<'data>, { - pub(crate) file: &'file XcoffFile<'data, Xcoff, R>, - pub(super) symbols: &'file SymbolTable<'data, Xcoff, R>, - pub(super) index: usize, + pub(super) file: &'file XcoffFile<'data, Xcoff, R>, + pub(super) symbols: SymbolIterator<'data, 'file, Xcoff, R>, } impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> fmt::Debug @@ -214,39 +283,54 @@ impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> Iterator type Item = XcoffSymbol<'data, 'file, Xcoff, R>; fn next(&mut self) -> Option { - let index = self.index; - let symbol = self.symbols.symbol(index).ok()?; - // TODO: skip over the auxiliary symbols for now. - self.index += 1 + symbol.n_numaux() as usize; + let (index, symbol) = self.symbols.next()?; Some(XcoffSymbol { file: self.file, - symbols: self.symbols, - index: SymbolIndex(index), + symbols: self.symbols.symbols, + index, symbol, }) } } -/// A symbol of an `XcoffFile32`. +/// A symbol in an [`XcoffFile32`](super::XcoffFile32). pub type XcoffSymbol32<'data, 'file, R = &'data [u8]> = XcoffSymbol<'data, 'file, xcoff::FileHeader32, R>; -/// A symbol of an `XcoffFile64`. +/// A symbol in an [`XcoffFile64`](super::XcoffFile64). pub type XcoffSymbol64<'data, 'file, R = &'data [u8]> = XcoffSymbol<'data, 'file, xcoff::FileHeader64, R>; -/// A symbol of an `XcoffFile`. +/// A symbol in an [`XcoffFile`]. +/// +/// Most functionality is provided by the [`ObjectSymbol`] trait implementation. #[derive(Debug, Clone, Copy)] pub struct XcoffSymbol<'data, 'file, Xcoff, R = &'data [u8]> where Xcoff: FileHeader, R: ReadRef<'data>, { - pub(crate) file: &'file XcoffFile<'data, Xcoff, R>, + pub(super) file: &'file XcoffFile<'data, Xcoff, R>, pub(super) symbols: &'file SymbolTable<'data, Xcoff, R>, pub(super) index: SymbolIndex, pub(super) symbol: &'data Xcoff::Symbol, } +impl<'data, 'file, Xcoff, R> XcoffSymbol<'data, 'file, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + /// Get the XCOFF file containing this symbol. + pub fn xcoff_file(&self) -> &'file XcoffFile<'data, Xcoff, R> { + self.file + } + + /// Get the raw XCOFF symbol structure. + pub fn xcoff_symbol(&self) -> &'data Xcoff::Symbol { + self.symbol + } +} + impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> read::private::Sealed for XcoffSymbol<'data, 'file, Xcoff, R> { @@ -264,7 +348,7 @@ impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> if self.symbol.has_aux_file() { // By convention the file name is in the first auxiliary entry. self.symbols - .aux_file(self.index.0, 1)? + .aux_file(self.index, 1)? .fname(self.symbols.strings) } else { self.symbol.name(self.symbols.strings) @@ -301,9 +385,9 @@ impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> if let Ok(aux_csect) = self .file .symbols - .aux_csect(self.index.0, self.symbol.n_numaux() as usize) + .aux_csect(self.index, self.symbol.n_numaux() as usize) { - let sym_type = aux_csect.sym_type() & 0x07; + let sym_type = aux_csect.sym_type(); if sym_type == xcoff::XTY_SD || sym_type == xcoff::XTY_CM { return aux_csect.x_scnlen(); } @@ -317,9 +401,9 @@ impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> if let Ok(aux_csect) = self .file .symbols - .aux_csect(self.index.0, self.symbol.n_numaux() as usize) + .aux_csect(self.index, self.symbol.n_numaux() as usize) { - let sym_type = aux_csect.sym_type() & 0x07; + let sym_type = aux_csect.sym_type(); if sym_type == xcoff::XTY_SD || sym_type == xcoff::XTY_CM { return match aux_csect.x_smclas() { xcoff::XMC_PR | xcoff::XMC_GL => SymbolKind::Text, @@ -342,7 +426,6 @@ impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> } } match self.symbol.n_sclass() { - xcoff::C_NULL => SymbolKind::Null, xcoff::C_FILE => SymbolKind::File, _ => SymbolKind::Unknown, } @@ -366,16 +449,16 @@ impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> /// Return true if the symbol is a definition of a function or data object. #[inline] fn is_definition(&self) -> bool { + if self.symbol.n_scnum() <= 0 { + return false; + } if self.symbol.has_aux_csect() { if let Ok(aux_csect) = self .symbols - .aux_csect(self.index.0, self.symbol.n_numaux() as usize) + .aux_csect(self.index, self.symbol.n_numaux() as usize) { - let smclas = aux_csect.x_smclas(); - self.symbol.n_scnum() != xcoff::N_UNDEF - && (smclas == xcoff::XMC_PR - || smclas == xcoff::XMC_RW - || smclas == xcoff::XMC_RO) + let sym_type = aux_csect.sym_type(); + sym_type == xcoff::XTY_SD || sym_type == xcoff::XTY_LD || sym_type == xcoff::XTY_CM } else { false } @@ -399,7 +482,7 @@ impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> SymbolScope::Unknown } else { match self.symbol.n_sclass() { - xcoff::C_EXT | xcoff::C_WEAKEXT | xcoff::C_HIDEXT => { + xcoff::C_EXT | xcoff::C_WEAKEXT => { let visibility = self.symbol.n_type() & xcoff::SYM_V_MASK; if visibility == xcoff::SYM_V_HIDDEN { SymbolScope::Linkage @@ -434,11 +517,11 @@ impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> if let Ok(aux_csect) = self .file .symbols - .aux_csect(self.index.0, self.symbol.n_numaux() as usize) + .aux_csect(self.index, self.symbol.n_numaux() as usize) { x_smtyp = aux_csect.x_smtyp(); x_smclas = aux_csect.x_smclas(); - if x_smtyp == xcoff::XTY_LD { + if aux_csect.sym_type() == xcoff::XTY_LD { containing_csect = Some(SymbolIndex(aux_csect.x_scnlen() as usize)) } } @@ -452,7 +535,7 @@ impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> } } -/// A trait for generic access to `Symbol32` and `Symbol64`. +/// A trait for generic access to [`xcoff::Symbol32`] and [`xcoff::Symbol64`]. #[allow(missing_docs)] pub trait Symbol: Debug + Pod { type Word: Into; @@ -463,11 +546,28 @@ pub trait Symbol: Debug + Pod { fn n_sclass(&self) -> u8; fn n_numaux(&self) -> u8; + fn name_offset(&self) -> Option; fn name<'data, R: ReadRef<'data>>( &'data self, strings: StringTable<'data, R>, ) -> Result<&'data [u8]>; + /// Return the section index for the symbol. + fn section(&self) -> Option { + let index = self.n_scnum(); + if index > 0 { + Some(SectionIndex(index as usize)) + } else { + None + } + } + + /// Return true if the symbol is a null placeholder. + #[inline] + fn is_null(&self) -> bool { + self.n_sclass() == xcoff::C_NULL + } + /// Return true if the symbol is undefined. #[inline] fn is_undefined(&self) -> bool { @@ -515,6 +615,10 @@ impl Symbol for xcoff::Symbol64 { self.n_numaux } + fn name_offset(&self) -> Option { + Some(self.n_offset.get(BE)) + } + /// Parse the symbol name for XCOFF64. fn name<'data, R: ReadRef<'data>>( &'data self, @@ -549,14 +653,22 @@ impl Symbol for xcoff::Symbol32 { self.n_numaux } + fn name_offset(&self) -> Option { + if self.n_name[0] == 0 { + let offset = u32::from_be_bytes(self.n_name[4..8].try_into().unwrap()); + Some(offset) + } else { + None + } + } + /// Parse the symbol name for XCOFF32. fn name<'data, R: ReadRef<'data>>( &'data self, strings: StringTable<'data, R>, ) -> Result<&'data [u8]> { - if self.n_name[0] == 0 { + if let Some(offset) = self.name_offset() { // If the name starts with 0 then the last 4 bytes are a string table offset. - let offset = u32::from_be_bytes(self.n_name[4..8].try_into().unwrap()); strings .get(offset) .read_error("Invalid XCOFF symbol name offset") @@ -570,27 +682,35 @@ impl Symbol for xcoff::Symbol32 { } } -/// A trait for generic access to `FileAux32` and `FileAux64`. +/// A trait for generic access to [`xcoff::FileAux32`] and [`xcoff::FileAux64`]. #[allow(missing_docs)] pub trait FileAux: Debug + Pod { fn x_fname(&self) -> &[u8; 8]; fn x_ftype(&self) -> u8; fn x_auxtype(&self) -> Option; + fn name_offset(&self) -> Option { + let x_fname = self.x_fname(); + if x_fname[0] == 0 { + Some(u32::from_be_bytes(x_fname[4..8].try_into().unwrap())) + } else { + None + } + } + /// Parse the x_fname field, which may be an inline string or a string table offset. fn fname<'data, R: ReadRef<'data>>( &'data self, strings: StringTable<'data, R>, ) -> Result<&'data [u8]> { - let x_fname = self.x_fname(); - if x_fname[0] == 0 { + if let Some(offset) = self.name_offset() { // If the name starts with 0 then the last 4 bytes are a string table offset. - let offset = u32::from_be_bytes(x_fname[4..8].try_into().unwrap()); strings .get(offset) .read_error("Invalid XCOFF symbol name offset") } else { // The name is inline and padded with nulls. + let x_fname = self.x_fname(); Ok(match memchr::memchr(b'\0', x_fname) { Some(end) => &x_fname[..end], None => x_fname, @@ -627,7 +747,7 @@ impl FileAux for xcoff::FileAux32 { } } -/// A trait for generic access to `CsectAux32` and `CsectAux64`. +/// A trait for generic access to [`xcoff::CsectAux32`] and [`xcoff::CsectAux64`]. #[allow(missing_docs)] pub trait CsectAux: Debug + Pod { fn x_scnlen(&self) -> u64; @@ -635,8 +755,13 @@ pub trait CsectAux: Debug + Pod { fn x_snhash(&self) -> u16; fn x_smtyp(&self) -> u8; fn x_smclas(&self) -> u8; + fn x_stab(&self) -> Option; + fn x_snstab(&self) -> Option; fn x_auxtype(&self) -> Option; + fn alignment(&self) -> u8 { + self.x_smtyp() >> 3 + } fn sym_type(&self) -> u8 { self.x_smtyp() & 0x07 } @@ -663,6 +788,14 @@ impl CsectAux for xcoff::CsectAux64 { self.x_smclas } + fn x_stab(&self) -> Option { + None + } + + fn x_snstab(&self) -> Option { + None + } + fn x_auxtype(&self) -> Option { Some(self.x_auxtype) } @@ -689,6 +822,14 @@ impl CsectAux for xcoff::CsectAux32 { self.x_smclas } + fn x_stab(&self) -> Option { + Some(self.x_stab.get(BE)) + } + + fn x_snstab(&self) -> Option { + Some(self.x_snstab.get(BE)) + } + fn x_auxtype(&self) -> Option { None } diff --git a/third_party/rust/object/src/write/coff.rs b/third_party/rust/object/src/write/coff.rs deleted file mode 100644 index d2f7ccc64026..000000000000 --- a/third_party/rust/object/src/write/coff.rs +++ /dev/null @@ -1,725 +0,0 @@ -use alloc::vec::Vec; -use core::mem; - -use crate::endian::{LittleEndian as LE, U16Bytes, U32Bytes, U16, U32}; -use crate::pe as coff; -use crate::write::string::*; -use crate::write::util::*; -use crate::write::*; - -#[derive(Default, Clone, Copy)] -struct SectionOffsets { - offset: usize, - str_id: Option, - reloc_offset: usize, - selection: u8, - associative_section: u16, -} - -#[derive(Default, Clone, Copy)] -struct SymbolOffsets { - index: usize, - str_id: Option, - aux_count: u8, -} - -/// Internal format to use for the `.drectve` section containing linker -/// directives for symbol exports. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum CoffExportStyle { - /// MSVC format supported by link.exe and LLD. - Msvc, - /// Gnu format supported by GNU LD and LLD. - Gnu, -} - -impl<'a> Object<'a> { - pub(crate) fn coff_section_info( - &self, - section: StandardSection, - ) -> (&'static [u8], &'static [u8], SectionKind, SectionFlags) { - match section { - StandardSection::Text => (&[], &b".text"[..], SectionKind::Text, SectionFlags::None), - StandardSection::Data => (&[], &b".data"[..], SectionKind::Data, SectionFlags::None), - StandardSection::ReadOnlyData - | StandardSection::ReadOnlyDataWithRel - | StandardSection::ReadOnlyString => ( - &[], - &b".rdata"[..], - SectionKind::ReadOnlyData, - SectionFlags::None, - ), - StandardSection::UninitializedData => ( - &[], - &b".bss"[..], - SectionKind::UninitializedData, - SectionFlags::None, - ), - // TLS sections are data sections with a special name. - StandardSection::Tls => (&[], &b".tls$"[..], SectionKind::Data, SectionFlags::None), - StandardSection::UninitializedTls => { - // Unsupported section. - (&[], &[], SectionKind::UninitializedTls, SectionFlags::None) - } - StandardSection::TlsVariables => { - // Unsupported section. - (&[], &[], SectionKind::TlsVariables, SectionFlags::None) - } - StandardSection::Common => { - // Unsupported section. - (&[], &[], SectionKind::Common, SectionFlags::None) - } - StandardSection::GnuProperty => { - // Unsupported section. - (&[], &[], SectionKind::Note, SectionFlags::None) - } - } - } - - pub(crate) fn coff_subsection_name(&self, section: &[u8], value: &[u8]) -> Vec { - let mut name = section.to_vec(); - name.push(b'$'); - name.extend_from_slice(value); - name - } - - pub(crate) fn coff_fixup_relocation(&mut self, mut relocation: &mut Relocation) -> i64 { - if relocation.kind == RelocationKind::GotRelative { - // Use a stub symbol for the relocation instead. - // This isn't really a GOT, but it's a similar purpose. - // TODO: need to handle DLL imports differently? - relocation.kind = RelocationKind::Relative; - relocation.symbol = self.coff_add_stub_symbol(relocation.symbol); - } else if relocation.kind == RelocationKind::PltRelative { - // Windows doesn't need a separate relocation type for - // references to functions in import libraries. - // For convenience, treat this the same as Relative. - relocation.kind = RelocationKind::Relative; - } - - let constant = match self.architecture { - Architecture::I386 | Architecture::Arm | Architecture::Aarch64 => match relocation.kind - { - RelocationKind::Relative => { - // IMAGE_REL_I386_REL32, IMAGE_REL_ARM_REL32, IMAGE_REL_ARM64_REL32 - relocation.addend + 4 - } - _ => relocation.addend, - }, - Architecture::X86_64 => match relocation.kind { - RelocationKind::Relative => { - // IMAGE_REL_AMD64_REL32 through to IMAGE_REL_AMD64_REL32_5 - if relocation.addend <= -4 && relocation.addend >= -9 { - 0 - } else { - relocation.addend + 4 - } - } - _ => relocation.addend, - }, - _ => unimplemented!(), - }; - relocation.addend -= constant; - constant - } - - fn coff_add_stub_symbol(&mut self, symbol_id: SymbolId) -> SymbolId { - if let Some(stub_id) = self.stub_symbols.get(&symbol_id) { - return *stub_id; - } - let stub_size = self.architecture.address_size().unwrap().bytes(); - - let name = b".rdata$.refptr".to_vec(); - let section_id = self.add_section(Vec::new(), name, SectionKind::ReadOnlyData); - let section = self.section_mut(section_id); - section.set_data(vec![0; stub_size as usize], u64::from(stub_size)); - section.relocations = vec![Relocation { - offset: 0, - size: stub_size * 8, - kind: RelocationKind::Absolute, - encoding: RelocationEncoding::Generic, - symbol: symbol_id, - addend: 0, - }]; - - let mut name = b".refptr.".to_vec(); - name.extend_from_slice(&self.symbol(symbol_id).name); - let stub_id = self.add_raw_symbol(Symbol { - name, - value: 0, - size: u64::from(stub_size), - kind: SymbolKind::Data, - scope: SymbolScope::Compilation, - weak: false, - section: SymbolSection::Section(section_id), - flags: SymbolFlags::None, - }); - self.stub_symbols.insert(symbol_id, stub_id); - - stub_id - } - - /// Appends linker directives to the `.drectve` section to tell the linker - /// to export all symbols with `SymbolScope::Dynamic`. - /// - /// This must be called after all symbols have been defined. - pub fn add_coff_exports(&mut self, style: CoffExportStyle) { - assert_eq!(self.format, BinaryFormat::Coff); - - let mut directives = vec![]; - for symbol in &self.symbols { - if symbol.scope == SymbolScope::Dynamic { - match style { - CoffExportStyle::Msvc => directives.extend(b" /EXPORT:\""), - CoffExportStyle::Gnu => directives.extend(b" -export:\""), - } - directives.extend(&symbol.name); - directives.extend(b"\""); - if symbol.kind != SymbolKind::Text { - match style { - CoffExportStyle::Msvc => directives.extend(b",DATA"), - CoffExportStyle::Gnu => directives.extend(b",data"), - } - } - } - } - let drectve = self.add_section(vec![], b".drectve".to_vec(), SectionKind::Linker); - self.append_section_data(drectve, &directives, 1); - } - - pub(crate) fn coff_write(&self, buffer: &mut dyn WritableBuffer) -> Result<()> { - // Calculate offsets of everything, and build strtab. - let mut offset = 0; - let mut strtab = StringTable::default(); - - // COFF header. - offset += mem::size_of::(); - - // Section headers. - offset += self.sections.len() * mem::size_of::(); - - // Calculate size of section data and add section strings to strtab. - let mut section_offsets = vec![SectionOffsets::default(); self.sections.len()]; - for (index, section) in self.sections.iter().enumerate() { - if section.name.len() > 8 { - section_offsets[index].str_id = Some(strtab.add(§ion.name)); - } - - let len = section.data.len(); - if len != 0 { - // TODO: not sure what alignment is required here, but this seems to match LLVM - offset = align(offset, 4); - section_offsets[index].offset = offset; - offset += len; - } else { - section_offsets[index].offset = 0; - } - - // Calculate size of relocations. - let mut count = section.relocations.len(); - if count != 0 { - section_offsets[index].reloc_offset = offset; - if count > 0xffff { - count += 1; - } - offset += count * mem::size_of::(); - } - } - - // Set COMDAT flags. - for comdat in &self.comdats { - let symbol = &self.symbols[comdat.symbol.0]; - let comdat_section = match symbol.section { - SymbolSection::Section(id) => id.0, - _ => { - return Err(Error(format!( - "unsupported COMDAT symbol `{}` section {:?}", - symbol.name().unwrap_or(""), - symbol.section - ))); - } - }; - section_offsets[comdat_section].selection = match comdat.kind { - ComdatKind::NoDuplicates => coff::IMAGE_COMDAT_SELECT_NODUPLICATES, - ComdatKind::Any => coff::IMAGE_COMDAT_SELECT_ANY, - ComdatKind::SameSize => coff::IMAGE_COMDAT_SELECT_SAME_SIZE, - ComdatKind::ExactMatch => coff::IMAGE_COMDAT_SELECT_EXACT_MATCH, - ComdatKind::Largest => coff::IMAGE_COMDAT_SELECT_LARGEST, - ComdatKind::Newest => coff::IMAGE_COMDAT_SELECT_NEWEST, - ComdatKind::Unknown => { - return Err(Error(format!( - "unsupported COMDAT symbol `{}` kind {:?}", - symbol.name().unwrap_or(""), - comdat.kind - ))); - } - }; - for id in &comdat.sections { - let section = &self.sections[id.0]; - if section.symbol.is_none() { - return Err(Error(format!( - "missing symbol for COMDAT section `{}`", - section.name().unwrap_or(""), - ))); - } - if id.0 != comdat_section { - section_offsets[id.0].selection = coff::IMAGE_COMDAT_SELECT_ASSOCIATIVE; - section_offsets[id.0].associative_section = comdat_section as u16 + 1; - } - } - } - - // Calculate size of symbols and add symbol strings to strtab. - let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; - let mut symtab_count = 0; - for (index, symbol) in self.symbols.iter().enumerate() { - symbol_offsets[index].index = symtab_count; - symtab_count += 1; - match symbol.kind { - SymbolKind::File => { - // Name goes in auxiliary symbol records. - let aux_count = (symbol.name.len() + coff::IMAGE_SIZEOF_SYMBOL - 1) - / coff::IMAGE_SIZEOF_SYMBOL; - symbol_offsets[index].aux_count = aux_count as u8; - symtab_count += aux_count; - // Don't add name to strtab. - continue; - } - SymbolKind::Section => { - symbol_offsets[index].aux_count = 1; - symtab_count += 1; - } - _ => {} - } - if symbol.name.len() > 8 { - symbol_offsets[index].str_id = Some(strtab.add(&symbol.name)); - } - } - - // Calculate size of symtab. - let symtab_offset = offset; - let symtab_len = symtab_count * coff::IMAGE_SIZEOF_SYMBOL; - offset += symtab_len; - - // Calculate size of strtab. - let strtab_offset = offset; - let mut strtab_data = Vec::new(); - // First 4 bytes of strtab are the length. - strtab.write(4, &mut strtab_data); - let strtab_len = strtab_data.len() + 4; - offset += strtab_len; - - // Start writing. - buffer - .reserve(offset) - .map_err(|_| Error(String::from("Cannot allocate buffer")))?; - - // Write file header. - let header = coff::ImageFileHeader { - machine: U16::new( - LE, - match self.architecture { - Architecture::Arm => coff::IMAGE_FILE_MACHINE_ARMNT, - Architecture::Aarch64 => coff::IMAGE_FILE_MACHINE_ARM64, - Architecture::I386 => coff::IMAGE_FILE_MACHINE_I386, - Architecture::X86_64 => coff::IMAGE_FILE_MACHINE_AMD64, - _ => { - return Err(Error(format!( - "unimplemented architecture {:?}", - self.architecture - ))); - } - }, - ), - number_of_sections: U16::new(LE, self.sections.len() as u16), - time_date_stamp: U32::default(), - pointer_to_symbol_table: U32::new(LE, symtab_offset as u32), - number_of_symbols: U32::new(LE, symtab_count as u32), - size_of_optional_header: U16::default(), - characteristics: match self.flags { - FileFlags::Coff { characteristics } => U16::new(LE, characteristics), - _ => U16::default(), - }, - }; - buffer.write(&header); - - // Write section headers. - for (index, section) in self.sections.iter().enumerate() { - let mut characteristics = if let SectionFlags::Coff { - characteristics, .. - } = section.flags - { - characteristics - } else { - match section.kind { - SectionKind::Text => { - coff::IMAGE_SCN_CNT_CODE - | coff::IMAGE_SCN_MEM_EXECUTE - | coff::IMAGE_SCN_MEM_READ - } - SectionKind::Data => { - coff::IMAGE_SCN_CNT_INITIALIZED_DATA - | coff::IMAGE_SCN_MEM_READ - | coff::IMAGE_SCN_MEM_WRITE - } - SectionKind::UninitializedData => { - coff::IMAGE_SCN_CNT_UNINITIALIZED_DATA - | coff::IMAGE_SCN_MEM_READ - | coff::IMAGE_SCN_MEM_WRITE - } - SectionKind::ReadOnlyData - | SectionKind::ReadOnlyDataWithRel - | SectionKind::ReadOnlyString => { - coff::IMAGE_SCN_CNT_INITIALIZED_DATA | coff::IMAGE_SCN_MEM_READ - } - SectionKind::Debug | SectionKind::Other | SectionKind::OtherString => { - coff::IMAGE_SCN_CNT_INITIALIZED_DATA - | coff::IMAGE_SCN_MEM_READ - | coff::IMAGE_SCN_MEM_DISCARDABLE - } - SectionKind::Linker => coff::IMAGE_SCN_LNK_INFO | coff::IMAGE_SCN_LNK_REMOVE, - SectionKind::Common - | SectionKind::Tls - | SectionKind::UninitializedTls - | SectionKind::TlsVariables - | SectionKind::Note - | SectionKind::Unknown - | SectionKind::Metadata - | SectionKind::Elf(_) => { - return Err(Error(format!( - "unimplemented section `{}` kind {:?}", - section.name().unwrap_or(""), - section.kind - ))); - } - } - }; - if section_offsets[index].selection != 0 { - characteristics |= coff::IMAGE_SCN_LNK_COMDAT; - }; - if section.relocations.len() > 0xffff { - characteristics |= coff::IMAGE_SCN_LNK_NRELOC_OVFL; - } - characteristics |= match section.align { - 1 => coff::IMAGE_SCN_ALIGN_1BYTES, - 2 => coff::IMAGE_SCN_ALIGN_2BYTES, - 4 => coff::IMAGE_SCN_ALIGN_4BYTES, - 8 => coff::IMAGE_SCN_ALIGN_8BYTES, - 16 => coff::IMAGE_SCN_ALIGN_16BYTES, - 32 => coff::IMAGE_SCN_ALIGN_32BYTES, - 64 => coff::IMAGE_SCN_ALIGN_64BYTES, - 128 => coff::IMAGE_SCN_ALIGN_128BYTES, - 256 => coff::IMAGE_SCN_ALIGN_256BYTES, - 512 => coff::IMAGE_SCN_ALIGN_512BYTES, - 1024 => coff::IMAGE_SCN_ALIGN_1024BYTES, - 2048 => coff::IMAGE_SCN_ALIGN_2048BYTES, - 4096 => coff::IMAGE_SCN_ALIGN_4096BYTES, - 8192 => coff::IMAGE_SCN_ALIGN_8192BYTES, - _ => { - return Err(Error(format!( - "unimplemented section `{}` align {}", - section.name().unwrap_or(""), - section.align - ))); - } - }; - let mut coff_section = coff::ImageSectionHeader { - name: [0; 8], - virtual_size: U32::default(), - virtual_address: U32::default(), - size_of_raw_data: U32::new(LE, section.size as u32), - pointer_to_raw_data: U32::new(LE, section_offsets[index].offset as u32), - pointer_to_relocations: U32::new(LE, section_offsets[index].reloc_offset as u32), - pointer_to_linenumbers: U32::default(), - number_of_relocations: if section.relocations.len() > 0xffff { - U16::new(LE, 0xffff) - } else { - U16::new(LE, section.relocations.len() as u16) - }, - number_of_linenumbers: U16::default(), - characteristics: U32::new(LE, characteristics), - }; - if section.name.len() <= 8 { - coff_section.name[..section.name.len()].copy_from_slice(§ion.name); - } else { - let mut str_offset = strtab.get_offset(section_offsets[index].str_id.unwrap()); - if str_offset <= 9_999_999 { - let mut name = [0; 7]; - let mut len = 0; - if str_offset == 0 { - name[6] = b'0'; - len = 1; - } else { - while str_offset != 0 { - let rem = (str_offset % 10) as u8; - str_offset /= 10; - name[6 - len] = b'0' + rem; - len += 1; - } - } - coff_section.name = [0; 8]; - coff_section.name[0] = b'/'; - coff_section.name[1..][..len].copy_from_slice(&name[7 - len..]); - } else if str_offset as u64 <= 0xf_ffff_ffff { - coff_section.name[0] = b'/'; - coff_section.name[1] = b'/'; - for i in 0..6 { - let rem = (str_offset % 64) as u8; - str_offset /= 64; - let c = match rem { - 0..=25 => b'A' + rem, - 26..=51 => b'a' + rem - 26, - 52..=61 => b'0' + rem - 52, - 62 => b'+', - 63 => b'/', - _ => unreachable!(), - }; - coff_section.name[7 - i] = c; - } - } else { - return Err(Error(format!("invalid section name offset {}", str_offset))); - } - } - buffer.write(&coff_section); - } - - // Write section data and relocations. - for (index, section) in self.sections.iter().enumerate() { - let len = section.data.len(); - if len != 0 { - write_align(buffer, 4); - debug_assert_eq!(section_offsets[index].offset, buffer.len()); - buffer.write_bytes(§ion.data); - } - - if !section.relocations.is_empty() { - debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); - if section.relocations.len() > 0xffff { - let coff_relocation = coff::ImageRelocation { - virtual_address: U32Bytes::new(LE, section.relocations.len() as u32 + 1), - symbol_table_index: U32Bytes::new(LE, 0), - typ: U16Bytes::new(LE, 0), - }; - buffer.write(&coff_relocation); - } - for reloc in §ion.relocations { - //assert!(reloc.implicit_addend); - let typ = match self.architecture { - Architecture::I386 => match (reloc.kind, reloc.size, reloc.addend) { - (RelocationKind::Absolute, 16, 0) => coff::IMAGE_REL_I386_DIR16, - (RelocationKind::Relative, 16, 0) => coff::IMAGE_REL_I386_REL16, - (RelocationKind::Absolute, 32, 0) => coff::IMAGE_REL_I386_DIR32, - (RelocationKind::ImageOffset, 32, 0) => coff::IMAGE_REL_I386_DIR32NB, - (RelocationKind::SectionIndex, 16, 0) => coff::IMAGE_REL_I386_SECTION, - (RelocationKind::SectionOffset, 32, 0) => coff::IMAGE_REL_I386_SECREL, - (RelocationKind::SectionOffset, 7, 0) => coff::IMAGE_REL_I386_SECREL7, - (RelocationKind::Relative, 32, -4) => coff::IMAGE_REL_I386_REL32, - (RelocationKind::Coff(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::X86_64 => match (reloc.kind, reloc.size, reloc.addend) { - (RelocationKind::Absolute, 64, 0) => coff::IMAGE_REL_AMD64_ADDR64, - (RelocationKind::Absolute, 32, 0) => coff::IMAGE_REL_AMD64_ADDR32, - (RelocationKind::ImageOffset, 32, 0) => coff::IMAGE_REL_AMD64_ADDR32NB, - (RelocationKind::Relative, 32, -4) => coff::IMAGE_REL_AMD64_REL32, - (RelocationKind::Relative, 32, -5) => coff::IMAGE_REL_AMD64_REL32_1, - (RelocationKind::Relative, 32, -6) => coff::IMAGE_REL_AMD64_REL32_2, - (RelocationKind::Relative, 32, -7) => coff::IMAGE_REL_AMD64_REL32_3, - (RelocationKind::Relative, 32, -8) => coff::IMAGE_REL_AMD64_REL32_4, - (RelocationKind::Relative, 32, -9) => coff::IMAGE_REL_AMD64_REL32_5, - (RelocationKind::SectionIndex, 16, 0) => coff::IMAGE_REL_AMD64_SECTION, - (RelocationKind::SectionOffset, 32, 0) => coff::IMAGE_REL_AMD64_SECREL, - (RelocationKind::SectionOffset, 7, 0) => coff::IMAGE_REL_AMD64_SECREL7, - (RelocationKind::Coff(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::Arm => match (reloc.kind, reloc.size, reloc.addend) { - (RelocationKind::Absolute, 32, 0) => coff::IMAGE_REL_ARM_ADDR32, - (RelocationKind::ImageOffset, 32, 0) => coff::IMAGE_REL_ARM_ADDR32NB, - (RelocationKind::Relative, 32, -4) => coff::IMAGE_REL_ARM_REL32, - (RelocationKind::SectionIndex, 16, 0) => coff::IMAGE_REL_ARM_SECTION, - (RelocationKind::SectionOffset, 32, 0) => coff::IMAGE_REL_ARM_SECREL, - (RelocationKind::Coff(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::Aarch64 => match (reloc.kind, reloc.size, reloc.addend) { - (RelocationKind::Absolute, 32, 0) => coff::IMAGE_REL_ARM64_ADDR32, - (RelocationKind::ImageOffset, 32, 0) => coff::IMAGE_REL_ARM64_ADDR32NB, - (RelocationKind::SectionIndex, 16, 0) => coff::IMAGE_REL_ARM64_SECTION, - (RelocationKind::SectionOffset, 32, 0) => coff::IMAGE_REL_ARM64_SECREL, - (RelocationKind::Absolute, 64, 0) => coff::IMAGE_REL_ARM64_ADDR64, - (RelocationKind::Relative, 32, -4) => coff::IMAGE_REL_ARM64_REL32, - (RelocationKind::Coff(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - _ => { - return Err(Error(format!( - "unimplemented architecture {:?}", - self.architecture - ))); - } - }; - let coff_relocation = coff::ImageRelocation { - virtual_address: U32Bytes::new(LE, reloc.offset as u32), - symbol_table_index: U32Bytes::new( - LE, - symbol_offsets[reloc.symbol.0].index as u32, - ), - typ: U16Bytes::new(LE, typ), - }; - buffer.write(&coff_relocation); - } - } - } - - // Write symbols. - debug_assert_eq!(symtab_offset, buffer.len()); - for (index, symbol) in self.symbols.iter().enumerate() { - let mut name = &symbol.name[..]; - let section_number = match symbol.section { - SymbolSection::None => { - debug_assert_eq!(symbol.kind, SymbolKind::File); - coff::IMAGE_SYM_DEBUG as u16 - } - SymbolSection::Undefined => coff::IMAGE_SYM_UNDEFINED as u16, - SymbolSection::Absolute => coff::IMAGE_SYM_ABSOLUTE as u16, - SymbolSection::Common => coff::IMAGE_SYM_UNDEFINED as u16, - SymbolSection::Section(id) => id.0 as u16 + 1, - }; - let typ = if symbol.kind == SymbolKind::Text { - coff::IMAGE_SYM_DTYPE_FUNCTION << coff::IMAGE_SYM_DTYPE_SHIFT - } else { - coff::IMAGE_SYM_TYPE_NULL - }; - let storage_class = match symbol.kind { - SymbolKind::File => { - // Name goes in auxiliary symbol records. - name = b".file"; - coff::IMAGE_SYM_CLASS_FILE - } - SymbolKind::Section => coff::IMAGE_SYM_CLASS_STATIC, - SymbolKind::Label => coff::IMAGE_SYM_CLASS_LABEL, - SymbolKind::Text | SymbolKind::Data | SymbolKind::Tls => { - match symbol.section { - SymbolSection::None => { - return Err(Error(format!( - "missing section for symbol `{}`", - symbol.name().unwrap_or("") - ))); - } - SymbolSection::Undefined | SymbolSection::Common => { - coff::IMAGE_SYM_CLASS_EXTERNAL - } - SymbolSection::Absolute | SymbolSection::Section(_) => { - match symbol.scope { - // TODO: does this need aux symbol records too? - _ if symbol.weak => coff::IMAGE_SYM_CLASS_WEAK_EXTERNAL, - SymbolScope::Unknown => { - return Err(Error(format!( - "unimplemented symbol `{}` scope {:?}", - symbol.name().unwrap_or(""), - symbol.scope - ))); - } - SymbolScope::Compilation => coff::IMAGE_SYM_CLASS_STATIC, - SymbolScope::Linkage | SymbolScope::Dynamic => { - coff::IMAGE_SYM_CLASS_EXTERNAL - } - } - } - } - } - SymbolKind::Unknown | SymbolKind::Null => { - return Err(Error(format!( - "unimplemented symbol `{}` kind {:?}", - symbol.name().unwrap_or(""), - symbol.kind - ))); - } - }; - let number_of_aux_symbols = symbol_offsets[index].aux_count; - let value = if symbol.section == SymbolSection::Common { - symbol.size as u32 - } else { - symbol.value as u32 - }; - let mut coff_symbol = coff::ImageSymbol { - name: [0; 8], - value: U32Bytes::new(LE, value), - section_number: U16Bytes::new(LE, section_number), - typ: U16Bytes::new(LE, typ), - storage_class, - number_of_aux_symbols, - }; - if name.len() <= 8 { - coff_symbol.name[..name.len()].copy_from_slice(name); - } else { - let str_offset = strtab.get_offset(symbol_offsets[index].str_id.unwrap()); - coff_symbol.name[4..8].copy_from_slice(&u32::to_le_bytes(str_offset as u32)); - } - buffer.write(&coff_symbol); - - // Write auxiliary symbols. - match symbol.kind { - SymbolKind::File => { - let aux_len = number_of_aux_symbols as usize * coff::IMAGE_SIZEOF_SYMBOL; - debug_assert!(aux_len >= symbol.name.len()); - let old_len = buffer.len(); - buffer.write_bytes(&symbol.name); - buffer.resize(old_len + aux_len); - } - SymbolKind::Section => { - debug_assert_eq!(number_of_aux_symbols, 1); - let section_index = symbol.section.id().unwrap().0; - let section = &self.sections[section_index]; - let aux = coff::ImageAuxSymbolSection { - length: U32Bytes::new(LE, section.size as u32), - number_of_relocations: if section.relocations.len() > 0xffff { - U16Bytes::new(LE, 0xffff) - } else { - U16Bytes::new(LE, section.relocations.len() as u16) - }, - number_of_linenumbers: U16Bytes::default(), - check_sum: U32Bytes::new(LE, checksum(section.data())), - number: U16Bytes::new( - LE, - section_offsets[section_index].associative_section, - ), - selection: section_offsets[section_index].selection, - reserved: 0, - // TODO: bigobj - high_number: U16Bytes::default(), - }; - buffer.write(&aux); - } - _ => { - debug_assert_eq!(number_of_aux_symbols, 0); - } - } - } - - // Write strtab section. - debug_assert_eq!(strtab_offset, buffer.len()); - buffer.write_bytes(&u32::to_le_bytes(strtab_len as u32)); - buffer.write_bytes(&strtab_data); - - debug_assert_eq!(offset, buffer.len()); - - Ok(()) - } -} - -// JamCRC -fn checksum(data: &[u8]) -> u32 { - let mut hasher = crc32fast::Hasher::new_with_initial(0xffff_ffff); - hasher.update(data); - !hasher.finalize() -} diff --git a/third_party/rust/object/src/write/coff/mod.rs b/third_party/rust/object/src/write/coff/mod.rs new file mode 100644 index 000000000000..6e0f5edd4977 --- /dev/null +++ b/third_party/rust/object/src/write/coff/mod.rs @@ -0,0 +1,10 @@ +//! Support for writing COFF files. +//! +//! Provides [`Writer`] for low level writing of COFF files. +//! This is also used to provide COFF support for [`write::Object`](crate::write::Object). + +mod object; +pub use self::object::*; + +mod writer; +pub use writer::*; diff --git a/third_party/rust/object/src/write/coff/object.rs b/third_party/rust/object/src/write/coff/object.rs new file mode 100644 index 000000000000..f70465f35c01 --- /dev/null +++ b/third_party/rust/object/src/write/coff/object.rs @@ -0,0 +1,681 @@ +use alloc::vec::Vec; + +use crate::pe as coff; +use crate::write::coff::writer; +use crate::write::util::*; +use crate::write::*; + +#[derive(Default, Clone, Copy)] +struct SectionOffsets { + name: writer::Name, + offset: u32, + reloc_offset: u32, + selection: u8, + associative_section: u32, +} + +#[derive(Default, Clone, Copy)] +struct SymbolOffsets { + name: writer::Name, + index: u32, + aux_count: u8, +} + +/// Internal format to use for the `.drectve` section containing linker +/// directives for symbol exports. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum CoffExportStyle { + /// MSVC format supported by link.exe and LLD. + Msvc, + /// Gnu format supported by GNU LD and LLD. + Gnu, +} + +impl<'a> Object<'a> { + pub(crate) fn coff_section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind, SectionFlags) { + match section { + StandardSection::Text => (&[], &b".text"[..], SectionKind::Text, SectionFlags::None), + StandardSection::Data => (&[], &b".data"[..], SectionKind::Data, SectionFlags::None), + StandardSection::ReadOnlyData + | StandardSection::ReadOnlyDataWithRel + | StandardSection::ReadOnlyString => ( + &[], + &b".rdata"[..], + SectionKind::ReadOnlyData, + SectionFlags::None, + ), + StandardSection::UninitializedData => ( + &[], + &b".bss"[..], + SectionKind::UninitializedData, + SectionFlags::None, + ), + // TLS sections are data sections with a special name. + StandardSection::Tls => (&[], &b".tls$"[..], SectionKind::Data, SectionFlags::None), + StandardSection::UninitializedTls => { + // Unsupported section. + (&[], &[], SectionKind::UninitializedTls, SectionFlags::None) + } + StandardSection::TlsVariables => { + // Unsupported section. + (&[], &[], SectionKind::TlsVariables, SectionFlags::None) + } + StandardSection::Common => { + // Unsupported section. + (&[], &[], SectionKind::Common, SectionFlags::None) + } + StandardSection::GnuProperty => { + // Unsupported section. + (&[], &[], SectionKind::Note, SectionFlags::None) + } + } + } + + pub(crate) fn coff_subsection_name(&self, section: &[u8], value: &[u8]) -> Vec { + let mut name = section.to_vec(); + name.push(b'$'); + name.extend_from_slice(value); + name + } + + pub(crate) fn coff_translate_relocation(&mut self, reloc: &mut Relocation) -> Result<()> { + let (mut kind, _encoding, size) = if let RelocationFlags::Generic { + kind, + encoding, + size, + } = reloc.flags + { + (kind, encoding, size) + } else { + return Ok(()); + }; + if kind == RelocationKind::GotRelative { + // Use a stub symbol for the relocation instead. + // This isn't really a GOT, but it's a similar purpose. + // TODO: need to handle DLL imports differently? + kind = RelocationKind::Relative; + reloc.symbol = self.coff_add_stub_symbol(reloc.symbol)?; + } else if kind == RelocationKind::PltRelative { + // Windows doesn't need a separate relocation type for + // references to functions in import libraries. + // For convenience, treat this the same as Relative. + kind = RelocationKind::Relative; + } + + let typ = match self.architecture { + Architecture::I386 => match (kind, size) { + (RelocationKind::Absolute, 16) => coff::IMAGE_REL_I386_DIR16, + (RelocationKind::Relative, 16) => coff::IMAGE_REL_I386_REL16, + (RelocationKind::Absolute, 32) => coff::IMAGE_REL_I386_DIR32, + (RelocationKind::ImageOffset, 32) => coff::IMAGE_REL_I386_DIR32NB, + (RelocationKind::SectionIndex, 16) => coff::IMAGE_REL_I386_SECTION, + (RelocationKind::SectionOffset, 32) => coff::IMAGE_REL_I386_SECREL, + (RelocationKind::SectionOffset, 7) => coff::IMAGE_REL_I386_SECREL7, + (RelocationKind::Relative, 32) => coff::IMAGE_REL_I386_REL32, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::X86_64 => match (kind, size) { + (RelocationKind::Absolute, 64) => coff::IMAGE_REL_AMD64_ADDR64, + (RelocationKind::Absolute, 32) => coff::IMAGE_REL_AMD64_ADDR32, + (RelocationKind::ImageOffset, 32) => coff::IMAGE_REL_AMD64_ADDR32NB, + (RelocationKind::Relative, 32) => match reloc.addend { + -5 => coff::IMAGE_REL_AMD64_REL32_1, + -6 => coff::IMAGE_REL_AMD64_REL32_2, + -7 => coff::IMAGE_REL_AMD64_REL32_3, + -8 => coff::IMAGE_REL_AMD64_REL32_4, + -9 => coff::IMAGE_REL_AMD64_REL32_5, + _ => coff::IMAGE_REL_AMD64_REL32, + }, + (RelocationKind::SectionIndex, 16) => coff::IMAGE_REL_AMD64_SECTION, + (RelocationKind::SectionOffset, 32) => coff::IMAGE_REL_AMD64_SECREL, + (RelocationKind::SectionOffset, 7) => coff::IMAGE_REL_AMD64_SECREL7, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Arm => match (kind, size) { + (RelocationKind::Absolute, 32) => coff::IMAGE_REL_ARM_ADDR32, + (RelocationKind::ImageOffset, 32) => coff::IMAGE_REL_ARM_ADDR32NB, + (RelocationKind::Relative, 32) => coff::IMAGE_REL_ARM_REL32, + (RelocationKind::SectionIndex, 16) => coff::IMAGE_REL_ARM_SECTION, + (RelocationKind::SectionOffset, 32) => coff::IMAGE_REL_ARM_SECREL, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Aarch64 => match (kind, size) { + (RelocationKind::Absolute, 32) => coff::IMAGE_REL_ARM64_ADDR32, + (RelocationKind::ImageOffset, 32) => coff::IMAGE_REL_ARM64_ADDR32NB, + (RelocationKind::SectionIndex, 16) => coff::IMAGE_REL_ARM64_SECTION, + (RelocationKind::SectionOffset, 32) => coff::IMAGE_REL_ARM64_SECREL, + (RelocationKind::Absolute, 64) => coff::IMAGE_REL_ARM64_ADDR64, + (RelocationKind::Relative, 32) => coff::IMAGE_REL_ARM64_REL32, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + _ => { + return Err(Error(format!( + "unimplemented architecture {:?}", + self.architecture + ))); + } + }; + reloc.flags = RelocationFlags::Coff { typ }; + Ok(()) + } + + pub(crate) fn coff_adjust_addend(&self, relocation: &mut Relocation) -> Result { + let typ = if let RelocationFlags::Coff { typ } = relocation.flags { + typ + } else { + return Err(Error(format!("invalid relocation flags {:?}", relocation))); + }; + let offset = match self.architecture { + Architecture::Arm => { + if typ == coff::IMAGE_REL_ARM_REL32 { + 4 + } else { + 0 + } + } + Architecture::Aarch64 => { + if typ == coff::IMAGE_REL_ARM64_REL32 { + 4 + } else { + 0 + } + } + Architecture::I386 => { + if typ == coff::IMAGE_REL_I386_REL32 { + 4 + } else { + 0 + } + } + Architecture::X86_64 => match typ { + coff::IMAGE_REL_AMD64_REL32 => 4, + coff::IMAGE_REL_AMD64_REL32_1 => 5, + coff::IMAGE_REL_AMD64_REL32_2 => 6, + coff::IMAGE_REL_AMD64_REL32_3 => 7, + coff::IMAGE_REL_AMD64_REL32_4 => 8, + coff::IMAGE_REL_AMD64_REL32_5 => 9, + _ => 0, + }, + _ => return Err(Error(format!("unimplemented relocation {:?}", relocation))), + }; + relocation.addend += offset; + Ok(true) + } + + pub(crate) fn coff_relocation_size(&self, reloc: &Relocation) -> Result { + let typ = if let RelocationFlags::Coff { typ } = reloc.flags { + typ + } else { + return Err(Error(format!("unexpected relocation for size {:?}", reloc))); + }; + let size = match self.architecture { + Architecture::I386 => match typ { + coff::IMAGE_REL_I386_DIR16 + | coff::IMAGE_REL_I386_REL16 + | coff::IMAGE_REL_I386_SECTION => Some(16), + coff::IMAGE_REL_I386_DIR32 + | coff::IMAGE_REL_I386_DIR32NB + | coff::IMAGE_REL_I386_SECREL + | coff::IMAGE_REL_I386_TOKEN + | coff::IMAGE_REL_I386_REL32 => Some(32), + _ => None, + }, + Architecture::X86_64 => match typ { + coff::IMAGE_REL_AMD64_SECTION => Some(16), + coff::IMAGE_REL_AMD64_ADDR32 + | coff::IMAGE_REL_AMD64_ADDR32NB + | coff::IMAGE_REL_AMD64_REL32 + | coff::IMAGE_REL_AMD64_REL32_1 + | coff::IMAGE_REL_AMD64_REL32_2 + | coff::IMAGE_REL_AMD64_REL32_3 + | coff::IMAGE_REL_AMD64_REL32_4 + | coff::IMAGE_REL_AMD64_REL32_5 + | coff::IMAGE_REL_AMD64_SECREL + | coff::IMAGE_REL_AMD64_TOKEN => Some(32), + coff::IMAGE_REL_AMD64_ADDR64 => Some(64), + _ => None, + }, + Architecture::Arm => match typ { + coff::IMAGE_REL_ARM_SECTION => Some(16), + coff::IMAGE_REL_ARM_ADDR32 + | coff::IMAGE_REL_ARM_ADDR32NB + | coff::IMAGE_REL_ARM_TOKEN + | coff::IMAGE_REL_ARM_REL32 + | coff::IMAGE_REL_ARM_SECREL => Some(32), + _ => None, + }, + Architecture::Aarch64 => match typ { + coff::IMAGE_REL_ARM64_SECTION => Some(16), + coff::IMAGE_REL_ARM64_ADDR32 + | coff::IMAGE_REL_ARM64_ADDR32NB + | coff::IMAGE_REL_ARM64_SECREL + | coff::IMAGE_REL_ARM64_TOKEN + | coff::IMAGE_REL_ARM64_REL32 => Some(32), + coff::IMAGE_REL_ARM64_ADDR64 => Some(64), + _ => None, + }, + _ => None, + }; + size.ok_or_else(|| Error(format!("unsupported relocation for size {:?}", reloc))) + } + + fn coff_add_stub_symbol(&mut self, symbol_id: SymbolId) -> Result { + if let Some(stub_id) = self.stub_symbols.get(&symbol_id) { + return Ok(*stub_id); + } + let stub_size = self.architecture.address_size().unwrap().bytes(); + + let name = b".rdata$.refptr".to_vec(); + let section_id = self.add_section(Vec::new(), name, SectionKind::ReadOnlyData); + let section = self.section_mut(section_id); + section.set_data(vec![0; stub_size as usize], u64::from(stub_size)); + self.add_relocation( + section_id, + Relocation { + offset: 0, + symbol: symbol_id, + addend: 0, + flags: RelocationFlags::Generic { + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + size: stub_size * 8, + }, + }, + )?; + + let mut name = b".refptr.".to_vec(); + name.extend_from_slice(&self.symbol(symbol_id).name); + let stub_id = self.add_raw_symbol(Symbol { + name, + value: 0, + size: u64::from(stub_size), + kind: SymbolKind::Data, + scope: SymbolScope::Compilation, + weak: false, + section: SymbolSection::Section(section_id), + flags: SymbolFlags::None, + }); + self.stub_symbols.insert(symbol_id, stub_id); + + Ok(stub_id) + } + + /// Appends linker directives to the `.drectve` section to tell the linker + /// to export all symbols with `SymbolScope::Dynamic`. + /// + /// This must be called after all symbols have been defined. + pub fn add_coff_exports(&mut self, style: CoffExportStyle) { + assert_eq!(self.format, BinaryFormat::Coff); + + let mut directives = vec![]; + for symbol in &self.symbols { + if symbol.scope == SymbolScope::Dynamic { + match style { + CoffExportStyle::Msvc => directives.extend(b" /EXPORT:\""), + CoffExportStyle::Gnu => directives.extend(b" -export:\""), + } + directives.extend(&symbol.name); + directives.extend(b"\""); + if symbol.kind != SymbolKind::Text { + match style { + CoffExportStyle::Msvc => directives.extend(b",DATA"), + CoffExportStyle::Gnu => directives.extend(b",data"), + } + } + } + } + let drectve = self.add_section(vec![], b".drectve".to_vec(), SectionKind::Linker); + self.append_section_data(drectve, &directives, 1); + } + + pub(crate) fn coff_write(&self, buffer: &mut dyn WritableBuffer) -> Result<()> { + let mut writer = writer::Writer::new(buffer); + + // Add section strings to strtab. + let mut section_offsets = vec![SectionOffsets::default(); self.sections.len()]; + for (index, section) in self.sections.iter().enumerate() { + section_offsets[index].name = writer.add_name(§ion.name); + } + + // Set COMDAT flags. + for comdat in &self.comdats { + let symbol = &self.symbols[comdat.symbol.0]; + let comdat_section = match symbol.section { + SymbolSection::Section(id) => id.0, + _ => { + return Err(Error(format!( + "unsupported COMDAT symbol `{}` section {:?}", + symbol.name().unwrap_or(""), + symbol.section + ))); + } + }; + section_offsets[comdat_section].selection = match comdat.kind { + ComdatKind::NoDuplicates => coff::IMAGE_COMDAT_SELECT_NODUPLICATES, + ComdatKind::Any => coff::IMAGE_COMDAT_SELECT_ANY, + ComdatKind::SameSize => coff::IMAGE_COMDAT_SELECT_SAME_SIZE, + ComdatKind::ExactMatch => coff::IMAGE_COMDAT_SELECT_EXACT_MATCH, + ComdatKind::Largest => coff::IMAGE_COMDAT_SELECT_LARGEST, + ComdatKind::Newest => coff::IMAGE_COMDAT_SELECT_NEWEST, + ComdatKind::Unknown => { + return Err(Error(format!( + "unsupported COMDAT symbol `{}` kind {:?}", + symbol.name().unwrap_or(""), + comdat.kind + ))); + } + }; + for id in &comdat.sections { + let section = &self.sections[id.0]; + if section.symbol.is_none() { + return Err(Error(format!( + "missing symbol for COMDAT section `{}`", + section.name().unwrap_or(""), + ))); + } + if id.0 != comdat_section { + section_offsets[id.0].selection = coff::IMAGE_COMDAT_SELECT_ASSOCIATIVE; + section_offsets[id.0].associative_section = comdat_section as u32 + 1; + } + } + } + + // Reserve symbol indices and add symbol strings to strtab. + let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; + for (index, symbol) in self.symbols.iter().enumerate() { + symbol_offsets[index].index = writer.reserve_symbol_index(); + let mut name = &*symbol.name; + match symbol.kind { + SymbolKind::File => { + // Name goes in auxiliary symbol records. + symbol_offsets[index].aux_count = writer.reserve_aux_file_name(&symbol.name); + name = b".file"; + } + SymbolKind::Section if symbol.section.id().is_some() => { + symbol_offsets[index].aux_count = writer.reserve_aux_section(); + } + _ => {} + }; + symbol_offsets[index].name = writer.add_name(name); + } + + // Reserve file ranges. + writer.reserve_file_header(); + writer.reserve_section_headers(self.sections.len() as u16); + for (index, section) in self.sections.iter().enumerate() { + section_offsets[index].offset = writer.reserve_section(section.data.len()); + section_offsets[index].reloc_offset = + writer.reserve_relocations(section.relocations.len()); + } + writer.reserve_symtab_strtab(); + + // Start writing. + writer.write_file_header(writer::FileHeader { + machine: match (self.architecture, self.sub_architecture) { + (Architecture::Arm, None) => coff::IMAGE_FILE_MACHINE_ARMNT, + (Architecture::Aarch64, None) => coff::IMAGE_FILE_MACHINE_ARM64, + (Architecture::Aarch64, Some(SubArchitecture::Arm64EC)) => { + coff::IMAGE_FILE_MACHINE_ARM64EC + } + (Architecture::I386, None) => coff::IMAGE_FILE_MACHINE_I386, + (Architecture::X86_64, None) => coff::IMAGE_FILE_MACHINE_AMD64, + _ => { + return Err(Error(format!( + "unimplemented architecture {:?} with sub-architecture {:?}", + self.architecture, self.sub_architecture + ))); + } + }, + time_date_stamp: 0, + characteristics: match self.flags { + FileFlags::Coff { characteristics } => characteristics, + _ => 0, + }, + })?; + + // Write section headers. + for (index, section) in self.sections.iter().enumerate() { + let mut characteristics = if let SectionFlags::Coff { + characteristics, .. + } = section.flags + { + characteristics + } else { + match section.kind { + SectionKind::Text => { + coff::IMAGE_SCN_CNT_CODE + | coff::IMAGE_SCN_MEM_EXECUTE + | coff::IMAGE_SCN_MEM_READ + } + SectionKind::Data => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_WRITE + } + SectionKind::UninitializedData => { + coff::IMAGE_SCN_CNT_UNINITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_WRITE + } + SectionKind::ReadOnlyData + | SectionKind::ReadOnlyDataWithRel + | SectionKind::ReadOnlyString => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA | coff::IMAGE_SCN_MEM_READ + } + SectionKind::Debug + | SectionKind::DebugString + | SectionKind::Other + | SectionKind::OtherString => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_DISCARDABLE + } + SectionKind::Linker => coff::IMAGE_SCN_LNK_INFO | coff::IMAGE_SCN_LNK_REMOVE, + SectionKind::Common + | SectionKind::Tls + | SectionKind::UninitializedTls + | SectionKind::TlsVariables + | SectionKind::Note + | SectionKind::Unknown + | SectionKind::Metadata + | SectionKind::Elf(_) => { + return Err(Error(format!( + "unimplemented section `{}` kind {:?}", + section.name().unwrap_or(""), + section.kind + ))); + } + } + }; + if section_offsets[index].selection != 0 { + characteristics |= coff::IMAGE_SCN_LNK_COMDAT; + }; + if section.relocations.len() > 0xffff { + characteristics |= coff::IMAGE_SCN_LNK_NRELOC_OVFL; + } + characteristics |= match section.align { + 1 => coff::IMAGE_SCN_ALIGN_1BYTES, + 2 => coff::IMAGE_SCN_ALIGN_2BYTES, + 4 => coff::IMAGE_SCN_ALIGN_4BYTES, + 8 => coff::IMAGE_SCN_ALIGN_8BYTES, + 16 => coff::IMAGE_SCN_ALIGN_16BYTES, + 32 => coff::IMAGE_SCN_ALIGN_32BYTES, + 64 => coff::IMAGE_SCN_ALIGN_64BYTES, + 128 => coff::IMAGE_SCN_ALIGN_128BYTES, + 256 => coff::IMAGE_SCN_ALIGN_256BYTES, + 512 => coff::IMAGE_SCN_ALIGN_512BYTES, + 1024 => coff::IMAGE_SCN_ALIGN_1024BYTES, + 2048 => coff::IMAGE_SCN_ALIGN_2048BYTES, + 4096 => coff::IMAGE_SCN_ALIGN_4096BYTES, + 8192 => coff::IMAGE_SCN_ALIGN_8192BYTES, + _ => { + return Err(Error(format!( + "unimplemented section `{}` align {}", + section.name().unwrap_or(""), + section.align + ))); + } + }; + writer.write_section_header(writer::SectionHeader { + name: section_offsets[index].name, + size_of_raw_data: section.size as u32, + pointer_to_raw_data: section_offsets[index].offset, + pointer_to_relocations: section_offsets[index].reloc_offset, + pointer_to_linenumbers: 0, + number_of_relocations: section.relocations.len() as u32, + number_of_linenumbers: 0, + characteristics, + }); + } + + // Write section data and relocations. + for section in &self.sections { + writer.write_section(§ion.data); + + if !section.relocations.is_empty() { + //debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); + writer.write_relocations_count(section.relocations.len()); + for reloc in §ion.relocations { + let typ = if let RelocationFlags::Coff { typ } = reloc.flags { + typ + } else { + return Err(Error("invalid relocation flags".into())); + }; + writer.write_relocation(writer::Relocation { + virtual_address: reloc.offset as u32, + symbol: symbol_offsets[reloc.symbol.0].index, + typ, + }); + } + } + } + + // Write symbols. + for (index, symbol) in self.symbols.iter().enumerate() { + let section_number = match symbol.section { + SymbolSection::None => { + debug_assert_eq!(symbol.kind, SymbolKind::File); + coff::IMAGE_SYM_DEBUG as u16 + } + SymbolSection::Undefined => coff::IMAGE_SYM_UNDEFINED as u16, + SymbolSection::Absolute => coff::IMAGE_SYM_ABSOLUTE as u16, + SymbolSection::Common => coff::IMAGE_SYM_UNDEFINED as u16, + SymbolSection::Section(id) => id.0 as u16 + 1, + }; + let typ = if symbol.kind == SymbolKind::Text { + coff::IMAGE_SYM_DTYPE_FUNCTION << coff::IMAGE_SYM_DTYPE_SHIFT + } else { + coff::IMAGE_SYM_TYPE_NULL + }; + let storage_class = match symbol.kind { + SymbolKind::File => coff::IMAGE_SYM_CLASS_FILE, + SymbolKind::Section => { + if symbol.section.id().is_some() { + coff::IMAGE_SYM_CLASS_STATIC + } else { + coff::IMAGE_SYM_CLASS_SECTION + } + } + SymbolKind::Label => coff::IMAGE_SYM_CLASS_LABEL, + SymbolKind::Text | SymbolKind::Data | SymbolKind::Tls => { + match symbol.section { + SymbolSection::None => { + return Err(Error(format!( + "missing section for symbol `{}`", + symbol.name().unwrap_or("") + ))); + } + SymbolSection::Undefined | SymbolSection::Common => { + coff::IMAGE_SYM_CLASS_EXTERNAL + } + SymbolSection::Absolute | SymbolSection::Section(_) => { + match symbol.scope { + // TODO: does this need aux symbol records too? + _ if symbol.weak => coff::IMAGE_SYM_CLASS_WEAK_EXTERNAL, + SymbolScope::Unknown => { + return Err(Error(format!( + "unimplemented symbol `{}` scope {:?}", + symbol.name().unwrap_or(""), + symbol.scope + ))); + } + SymbolScope::Compilation => coff::IMAGE_SYM_CLASS_STATIC, + SymbolScope::Linkage | SymbolScope::Dynamic => { + coff::IMAGE_SYM_CLASS_EXTERNAL + } + } + } + } + } + SymbolKind::Unknown => { + return Err(Error(format!( + "unimplemented symbol `{}` kind {:?}", + symbol.name().unwrap_or(""), + symbol.kind + ))); + } + }; + let number_of_aux_symbols = symbol_offsets[index].aux_count; + let value = if symbol.section == SymbolSection::Common { + symbol.size as u32 + } else { + symbol.value as u32 + }; + writer.write_symbol(writer::Symbol { + name: symbol_offsets[index].name, + value, + section_number, + typ, + storage_class, + number_of_aux_symbols, + }); + + // Write auxiliary symbols. + match symbol.kind { + SymbolKind::File => { + writer.write_aux_file_name(&symbol.name, number_of_aux_symbols); + } + SymbolKind::Section if symbol.section.id().is_some() => { + debug_assert_eq!(number_of_aux_symbols, 1); + let section_index = symbol.section.id().unwrap().0; + let section = &self.sections[section_index]; + writer.write_aux_section(writer::AuxSymbolSection { + length: section.size as u32, + number_of_relocations: section.relocations.len() as u32, + number_of_linenumbers: 0, + check_sum: checksum(section.data()), + number: section_offsets[section_index].associative_section, + selection: section_offsets[section_index].selection, + }); + } + _ => { + debug_assert_eq!(number_of_aux_symbols, 0); + } + } + } + + writer.write_strtab(); + + debug_assert_eq!(writer.reserved_len(), writer.len()); + + Ok(()) + } +} + +// JamCRC +fn checksum(data: &[u8]) -> u32 { + let mut hasher = crc32fast::Hasher::new_with_initial(0xffff_ffff); + hasher.update(data); + !hasher.finalize() +} diff --git a/third_party/rust/object/src/write/coff/writer.rs b/third_party/rust/object/src/write/coff/writer.rs new file mode 100644 index 000000000000..9d06dfaeadf7 --- /dev/null +++ b/third_party/rust/object/src/write/coff/writer.rs @@ -0,0 +1,520 @@ +//! Helper for writing COFF files. +use alloc::string::String; +use alloc::vec::Vec; +use core::mem; + +use crate::endian::{LittleEndian as LE, U16Bytes, U32Bytes, U16, U32}; +use crate::pe; +use crate::write::string::{StringId, StringTable}; +use crate::write::util; +use crate::write::{Error, Result, WritableBuffer}; + +/// A helper for writing COFF files. +/// +/// Writing uses a two phase approach. The first phase builds up all of the information +/// that may need to be known ahead of time: +/// - build string table +/// - reserve section indices +/// - reserve symbol indices +/// - reserve file ranges for headers and sections +/// +/// Some of the information has ordering requirements. For example, strings must be added +/// to the string table before reserving the file range for the string table. There are debug +/// asserts to check some of these requirements. +/// +/// The second phase writes everything out in order. Thus the caller must ensure writing +/// is in the same order that file ranges were reserved. There are debug asserts to assist +/// with checking this. +#[allow(missing_debug_implementations)] +pub struct Writer<'a> { + buffer: &'a mut dyn WritableBuffer, + len: usize, + + section_num: u16, + + symtab_offset: u32, + symtab_num: u32, + + strtab: StringTable<'a>, + strtab_len: usize, + strtab_offset: u32, + strtab_data: Vec, +} + +impl<'a> Writer<'a> { + /// Create a new `Writer`. + pub fn new(buffer: &'a mut dyn WritableBuffer) -> Self { + Writer { + buffer, + len: 0, + + section_num: 0, + + symtab_offset: 0, + symtab_num: 0, + + strtab: StringTable::default(), + strtab_len: 0, + strtab_offset: 0, + strtab_data: Vec::new(), + } + } + + /// Return the current file length that has been reserved. + pub fn reserved_len(&self) -> usize { + self.len + } + + /// Return the current file length that has been written. + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> usize { + self.buffer.len() + } + + /// Reserve a file range with the given size and starting alignment. + /// + /// Returns the aligned offset of the start of the range. + /// + /// `align_start` must be a power of two. + pub fn reserve(&mut self, len: usize, align_start: usize) -> u32 { + if align_start > 1 { + self.len = util::align(self.len, align_start); + } + let offset = self.len; + self.len += len; + offset as u32 + } + + /// Write alignment padding bytes. + pub fn write_align(&mut self, align_start: usize) { + if align_start > 1 { + util::write_align(self.buffer, align_start); + } + } + + /// Write data. + pub fn write(&mut self, data: &[u8]) { + self.buffer.write_bytes(data); + } + + /// Reserve the file range up to the given file offset. + pub fn reserve_until(&mut self, offset: usize) { + debug_assert!(self.len <= offset); + self.len = offset; + } + + /// Write padding up to the given file offset. + pub fn pad_until(&mut self, offset: usize) { + debug_assert!(self.buffer.len() <= offset); + self.buffer.resize(offset); + } + + /// Reserve the range for the file header. + /// + /// This must be at the start of the file. + pub fn reserve_file_header(&mut self) { + debug_assert_eq!(self.len, 0); + self.reserve(mem::size_of::(), 1); + } + + /// Write the file header. + /// + /// This must be at the start of the file. + /// + /// Fields that can be derived from known information are automatically set by this function. + pub fn write_file_header(&mut self, header: FileHeader) -> Result<()> { + debug_assert_eq!(self.buffer.len(), 0); + + // Start writing. + self.buffer + .reserve(self.len) + .map_err(|_| Error(String::from("Cannot allocate buffer")))?; + + // Write file header. + let header = pe::ImageFileHeader { + machine: U16::new(LE, header.machine), + number_of_sections: U16::new(LE, self.section_num), + time_date_stamp: U32::new(LE, header.time_date_stamp), + pointer_to_symbol_table: U32::new(LE, self.symtab_offset), + number_of_symbols: U32::new(LE, self.symtab_num), + size_of_optional_header: U16::default(), + characteristics: U16::new(LE, header.characteristics), + }; + self.buffer.write(&header); + + Ok(()) + } + + /// Reserve the range for the section headers. + pub fn reserve_section_headers(&mut self, section_num: u16) { + debug_assert_eq!(self.section_num, 0); + self.section_num = section_num; + self.reserve( + section_num as usize * mem::size_of::(), + 1, + ); + } + + /// Write a section header. + pub fn write_section_header(&mut self, section: SectionHeader) { + let mut coff_section = pe::ImageSectionHeader { + name: [0; 8], + virtual_size: U32::default(), + virtual_address: U32::default(), + size_of_raw_data: U32::new(LE, section.size_of_raw_data), + pointer_to_raw_data: U32::new(LE, section.pointer_to_raw_data), + pointer_to_relocations: U32::new(LE, section.pointer_to_relocations), + pointer_to_linenumbers: U32::new(LE, section.pointer_to_linenumbers), + number_of_relocations: if section.number_of_relocations > 0xffff { + U16::new(LE, 0xffff) + } else { + U16::new(LE, section.number_of_relocations as u16) + }, + number_of_linenumbers: U16::default(), + characteristics: U32::new(LE, section.characteristics), + }; + match section.name { + Name::Short(name) => coff_section.name = name, + Name::Long(str_id) => { + let mut str_offset = self.strtab.get_offset(str_id); + if str_offset <= 9_999_999 { + let mut name = [0; 7]; + let mut len = 0; + if str_offset == 0 { + name[6] = b'0'; + len = 1; + } else { + while str_offset != 0 { + let rem = (str_offset % 10) as u8; + str_offset /= 10; + name[6 - len] = b'0' + rem; + len += 1; + } + } + coff_section.name = [0; 8]; + coff_section.name[0] = b'/'; + coff_section.name[1..][..len].copy_from_slice(&name[7 - len..]); + } else { + debug_assert!(str_offset as u64 <= 0xf_ffff_ffff); + coff_section.name[0] = b'/'; + coff_section.name[1] = b'/'; + for i in 0..6 { + let rem = (str_offset % 64) as u8; + str_offset /= 64; + let c = match rem { + 0..=25 => b'A' + rem, + 26..=51 => b'a' + rem - 26, + 52..=61 => b'0' + rem - 52, + 62 => b'+', + 63 => b'/', + _ => unreachable!(), + }; + coff_section.name[7 - i] = c; + } + } + } + } + self.buffer.write(&coff_section); + } + + /// Reserve the range for the section data. + /// + /// Returns the aligned offset of the start of the range. + /// Does nothing and returns 0 if the length is zero. + pub fn reserve_section(&mut self, len: usize) -> u32 { + if len == 0 { + return 0; + } + // TODO: not sure what alignment is required here, but this seems to match LLVM + self.reserve(len, 4) + } + + /// Write the alignment bytes prior to section data. + /// + /// This is unneeded if you are using `write_section` or `write_section_zeroes` + /// for the data. + pub fn write_section_align(&mut self) { + util::write_align(self.buffer, 4); + } + + /// Write the section data. + /// + /// Writes alignment bytes prior to the data. + /// Does nothing if the data is empty. + pub fn write_section(&mut self, data: &[u8]) { + if data.is_empty() { + return; + } + self.write_section_align(); + self.buffer.write_bytes(data); + } + + /// Write the section data using zero bytes. + /// + /// Writes alignment bytes prior to the data. + /// Does nothing if the length is zero. + pub fn write_section_zeroes(&mut self, len: usize) { + if len == 0 { + return; + } + self.write_section_align(); + self.buffer.resize(self.buffer.len() + len); + } + + /// Reserve a file range for the given number of relocations. + /// + /// This will automatically reserve an extra relocation if there are more than 0xffff. + /// + /// Returns the offset of the range. + /// Does nothing and returns 0 if the count is zero. + pub fn reserve_relocations(&mut self, mut count: usize) -> u32 { + if count == 0 { + return 0; + } + if count > 0xffff { + count += 1; + } + self.reserve(count * mem::size_of::(), 1) + } + + /// Write a relocation containing the count if required. + /// + /// This should be called before writing the first relocation for a section. + pub fn write_relocations_count(&mut self, count: usize) { + if count > 0xffff { + let coff_relocation = pe::ImageRelocation { + virtual_address: U32Bytes::new(LE, count as u32 + 1), + symbol_table_index: U32Bytes::new(LE, 0), + typ: U16Bytes::new(LE, 0), + }; + self.buffer.write(&coff_relocation); + } + } + + /// Write a relocation. + pub fn write_relocation(&mut self, reloc: Relocation) { + let coff_relocation = pe::ImageRelocation { + virtual_address: U32Bytes::new(LE, reloc.virtual_address), + symbol_table_index: U32Bytes::new(LE, reloc.symbol), + typ: U16Bytes::new(LE, reloc.typ), + }; + self.buffer.write(&coff_relocation); + } + + /// Reserve a symbol table entry. + /// + /// This must be called before [`Self::reserve_symtab_strtab`]. + pub fn reserve_symbol_index(&mut self) -> u32 { + debug_assert_eq!(self.symtab_offset, 0); + let index = self.symtab_num; + self.symtab_num += 1; + index + } + + /// Reserve a number of symbol table entries. + pub fn reserve_symbol_indices(&mut self, count: u32) { + debug_assert_eq!(self.symtab_offset, 0); + self.symtab_num += count; + } + + /// Write a symbol table entry. + pub fn write_symbol(&mut self, symbol: Symbol) { + let mut coff_symbol = pe::ImageSymbol { + name: [0; 8], + value: U32Bytes::new(LE, symbol.value), + section_number: U16Bytes::new(LE, symbol.section_number), + typ: U16Bytes::new(LE, symbol.typ), + storage_class: symbol.storage_class, + number_of_aux_symbols: symbol.number_of_aux_symbols, + }; + match symbol.name { + Name::Short(name) => coff_symbol.name = name, + Name::Long(str_id) => { + let str_offset = self.strtab.get_offset(str_id); + coff_symbol.name[4..8].copy_from_slice(&u32::to_le_bytes(str_offset as u32)); + } + } + self.buffer.write(&coff_symbol); + } + + /// Reserve auxiliary symbols for a file name. + /// + /// Returns the number of auxiliary symbols required. + /// + /// This must be called before [`Self::reserve_symtab_strtab`]. + pub fn reserve_aux_file_name(&mut self, name: &[u8]) -> u8 { + debug_assert_eq!(self.symtab_offset, 0); + let aux_count = (name.len() + pe::IMAGE_SIZEOF_SYMBOL - 1) / pe::IMAGE_SIZEOF_SYMBOL; + self.symtab_num += aux_count as u32; + aux_count as u8 + } + + /// Write auxiliary symbols for a file name. + pub fn write_aux_file_name(&mut self, name: &[u8], aux_count: u8) { + let aux_len = aux_count as usize * pe::IMAGE_SIZEOF_SYMBOL; + debug_assert!(aux_len >= name.len()); + let old_len = self.buffer.len(); + self.buffer.write_bytes(name); + self.buffer.resize(old_len + aux_len); + } + + /// Reserve an auxiliary symbol for a section. + /// + /// Returns the number of auxiliary symbols required. + /// + /// This must be called before [`Self::reserve_symtab_strtab`]. + pub fn reserve_aux_section(&mut self) -> u8 { + debug_assert_eq!(self.symtab_offset, 0); + self.symtab_num += 1; + 1 + } + + /// Write an auxiliary symbol for a section. + pub fn write_aux_section(&mut self, section: AuxSymbolSection) { + let aux = pe::ImageAuxSymbolSection { + length: U32Bytes::new(LE, section.length), + number_of_relocations: if section.number_of_relocations > 0xffff { + U16Bytes::new(LE, 0xffff) + } else { + U16Bytes::new(LE, section.number_of_relocations as u16) + }, + number_of_linenumbers: U16Bytes::new(LE, section.number_of_linenumbers), + check_sum: U32Bytes::new(LE, section.check_sum), + number: U16Bytes::new(LE, section.number as u16), + selection: section.selection, + reserved: 0, + high_number: U16Bytes::new(LE, (section.number >> 16) as u16), + }; + self.buffer.write(&aux); + } + + /// Return the number of reserved symbol table entries. + pub fn symbol_count(&self) -> u32 { + self.symtab_num + } + + /// Add a string to the string table. + /// + /// This must be called before [`Self::reserve_symtab_strtab`]. + pub fn add_string(&mut self, name: &'a [u8]) -> StringId { + debug_assert_eq!(self.strtab_offset, 0); + self.strtab.add(name) + } + + /// Add a section or symbol name to the string table if required. + /// + /// This must be called before [`Self::reserve_symtab_strtab`]. + pub fn add_name(&mut self, name: &'a [u8]) -> Name { + if name.len() > 8 { + Name::Long(self.add_string(name)) + } else { + let mut short_name = [0; 8]; + short_name[..name.len()].copy_from_slice(name); + Name::Short(short_name) + } + } + + /// Reserve the range for the symbol table and string table. + /// + /// This must be called after functions that reserve symbol + /// indices or add strings. + pub fn reserve_symtab_strtab(&mut self) { + debug_assert_eq!(self.symtab_offset, 0); + self.symtab_offset = self.reserve(self.symtab_num as usize * pe::IMAGE_SIZEOF_SYMBOL, 1); + + debug_assert_eq!(self.strtab_offset, 0); + // First 4 bytes of strtab are the length. + self.strtab.write(4, &mut self.strtab_data); + self.strtab_len = self.strtab_data.len() + 4; + self.strtab_offset = self.reserve(self.strtab_len, 1); + } + + /// Write the string table. + pub fn write_strtab(&mut self) { + debug_assert_eq!(self.strtab_offset, self.buffer.len() as u32); + self.buffer + .write_bytes(&u32::to_le_bytes(self.strtab_len as u32)); + self.buffer.write_bytes(&self.strtab_data); + } +} + +/// Shortened and native endian version of [`pe::ImageFileHeader`]. +#[allow(missing_docs)] +#[derive(Debug, Default, Clone)] +pub struct FileHeader { + pub machine: u16, + pub time_date_stamp: u32, + pub characteristics: u16, +} + +/// A section or symbol name. +#[derive(Debug, Clone, Copy)] +pub enum Name { + /// An inline name. + Short([u8; 8]), + /// An id of a string table entry. + Long(StringId), +} + +impl Default for Name { + fn default() -> Name { + Name::Short([0; 8]) + } +} + +// From isn't useful. +#[allow(clippy::from_over_into)] +impl<'a> Into for &'a [u8; 8] { + fn into(self) -> Name { + Name::Short(*self) + } +} + +/// Native endian version of [`pe::ImageSectionHeader`]. +#[allow(missing_docs)] +#[derive(Debug, Default, Clone)] +pub struct SectionHeader { + pub name: Name, + pub size_of_raw_data: u32, + pub pointer_to_raw_data: u32, + pub pointer_to_relocations: u32, + pub pointer_to_linenumbers: u32, + /// This will automatically be clamped if there are more than 0xffff. + pub number_of_relocations: u32, + pub number_of_linenumbers: u16, + pub characteristics: u32, +} + +/// Native endian version of [`pe::ImageSymbol`]. +#[allow(missing_docs)] +#[derive(Debug, Default, Clone)] +pub struct Symbol { + pub name: Name, + pub value: u32, + pub section_number: u16, + pub typ: u16, + pub storage_class: u8, + pub number_of_aux_symbols: u8, +} + +/// Native endian version of [`pe::ImageAuxSymbolSection`]. +#[allow(missing_docs)] +#[derive(Debug, Default, Clone)] +pub struct AuxSymbolSection { + pub length: u32, + /// This will automatically be clamped if there are more than 0xffff. + pub number_of_relocations: u32, + pub number_of_linenumbers: u16, + pub check_sum: u32, + pub number: u32, + pub selection: u8, +} + +/// Native endian version of [`pe::ImageRelocation`]. +#[allow(missing_docs)] +#[derive(Debug, Default, Clone)] +pub struct Relocation { + pub virtual_address: u32, + pub symbol: u32, + pub typ: u16, +} diff --git a/third_party/rust/object/src/write/elf/object.rs b/third_party/rust/object/src/write/elf/object.rs index acc820c9ecb9..0920a0dd193a 100644 --- a/third_party/rust/object/src/write/elf/object.rs +++ b/third_party/rust/object/src/write/elf/object.rs @@ -3,7 +3,6 @@ use alloc::vec::Vec; use crate::write::elf::writer::*; use crate::write::string::StringId; use crate::write::*; -use crate::AddressSize; use crate::{elf, pod}; #[derive(Clone, Copy)] @@ -141,6 +140,9 @@ impl<'a> Object<'a> { Architecture::Riscv32 => true, Architecture::S390x => true, Architecture::Sbf => false, + Architecture::Sharc => true, + Architecture::Sparc => true, + Architecture::Sparc32Plus => true, Architecture::Sparc64 => true, Architecture::Xtensa => true, _ => { @@ -152,59 +154,351 @@ impl<'a> Object<'a> { }) } - pub(crate) fn elf_fixup_relocation(&mut self, mut relocation: &mut Relocation) -> Result { - // Return true if we should use a section symbol to avoid preemption. - fn want_section_symbol(relocation: &Relocation, symbol: &Symbol) -> bool { - if symbol.scope != SymbolScope::Dynamic { - // Only dynamic symbols can be preemptible. - return false; - } - match symbol.kind { - SymbolKind::Text | SymbolKind::Data => {} - _ => return false, - } - match relocation.kind { - // Anything using GOT or PLT is preemptible. - // We also require that `Other` relocations must already be correct. - RelocationKind::Got - | RelocationKind::GotRelative - | RelocationKind::GotBaseRelative - | RelocationKind::PltRelative - | RelocationKind::Elf(_) => return false, - // Absolute relocations are preemptible for non-local data. - // TODO: not sure if this rule is exactly correct - // This rule was added to handle global data references in debuginfo. - // Maybe this should be a new relocation kind so that the caller can decide. - RelocationKind::Absolute => { - if symbol.kind == SymbolKind::Data { - return false; - } - } - _ => {} - } - true - } - - // Use section symbols for relocations where required to avoid preemption. - // Otherwise, the linker will fail with: - // relocation R_X86_64_PC32 against symbol `SomeSymbolName' can not be used when - // making a shared object; recompile with -fPIC - let symbol = &self.symbols[relocation.symbol.0]; - if want_section_symbol(relocation, symbol) { - if let Some(section) = symbol.section.id() { - relocation.addend += symbol.value as i64; - relocation.symbol = self.section_symbol(section); - } - } - - // Determine whether the addend is stored in the relocation or the data. - if self.elf_has_relocation_addend()? { - Ok(0) + pub(crate) fn elf_translate_relocation(&mut self, reloc: &mut Relocation) -> Result<()> { + let (kind, encoding, size) = if let RelocationFlags::Generic { + kind, + encoding, + size, + } = reloc.flags + { + (kind, encoding, size) } else { - let constant = relocation.addend; - relocation.addend = 0; - Ok(constant) - } + return Ok(()); + }; + + let r_type = match self.architecture { + Architecture::Aarch64 => match (kind, encoding, size) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 64) => elf::R_AARCH64_ABS64, + (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => elf::R_AARCH64_ABS32, + (RelocationKind::Absolute, RelocationEncoding::Generic, 16) => elf::R_AARCH64_ABS16, + (RelocationKind::Relative, RelocationEncoding::Generic, 64) => { + elf::R_AARCH64_PREL64 + } + (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { + elf::R_AARCH64_PREL32 + } + (RelocationKind::Relative, RelocationEncoding::Generic, 16) => { + elf::R_AARCH64_PREL16 + } + (RelocationKind::Relative, RelocationEncoding::AArch64Call, 26) + | (RelocationKind::PltRelative, RelocationEncoding::AArch64Call, 26) => { + elf::R_AARCH64_CALL26 + } + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Aarch64_Ilp32 => match (kind, encoding, size) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => { + elf::R_AARCH64_P32_ABS32 + } + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Arm => match (kind, encoding, size) { + (RelocationKind::Absolute, _, 32) => elf::R_ARM_ABS32, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Avr => match (kind, encoding, size) { + (RelocationKind::Absolute, _, 32) => elf::R_AVR_32, + (RelocationKind::Absolute, _, 16) => elf::R_AVR_16, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Bpf => match (kind, encoding, size) { + (RelocationKind::Absolute, _, 64) => elf::R_BPF_64_64, + (RelocationKind::Absolute, _, 32) => elf::R_BPF_64_32, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Csky => match (kind, encoding, size) { + (RelocationKind::Absolute, _, 32) => elf::R_CKCORE_ADDR32, + (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { + elf::R_CKCORE_PCREL32 + } + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::I386 => match (kind, size) { + (RelocationKind::Absolute, 32) => elf::R_386_32, + (RelocationKind::Relative, 32) => elf::R_386_PC32, + (RelocationKind::Got, 32) => elf::R_386_GOT32, + (RelocationKind::PltRelative, 32) => elf::R_386_PLT32, + (RelocationKind::GotBaseOffset, 32) => elf::R_386_GOTOFF, + (RelocationKind::GotBaseRelative, 32) => elf::R_386_GOTPC, + (RelocationKind::Absolute, 16) => elf::R_386_16, + (RelocationKind::Relative, 16) => elf::R_386_PC16, + (RelocationKind::Absolute, 8) => elf::R_386_8, + (RelocationKind::Relative, 8) => elf::R_386_PC8, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::X86_64 | Architecture::X86_64_X32 => match (kind, encoding, size) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 64) => elf::R_X86_64_64, + (RelocationKind::Relative, RelocationEncoding::X86Branch, 32) => { + elf::R_X86_64_PLT32 + } + (RelocationKind::Relative, _, 32) => elf::R_X86_64_PC32, + (RelocationKind::Got, _, 32) => elf::R_X86_64_GOT32, + (RelocationKind::PltRelative, _, 32) => elf::R_X86_64_PLT32, + (RelocationKind::GotRelative, _, 32) => elf::R_X86_64_GOTPCREL, + (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => elf::R_X86_64_32, + (RelocationKind::Absolute, RelocationEncoding::X86Signed, 32) => elf::R_X86_64_32S, + (RelocationKind::Absolute, _, 16) => elf::R_X86_64_16, + (RelocationKind::Relative, _, 16) => elf::R_X86_64_PC16, + (RelocationKind::Absolute, _, 8) => elf::R_X86_64_8, + (RelocationKind::Relative, _, 8) => elf::R_X86_64_PC8, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Hexagon => match (kind, encoding, size) { + (RelocationKind::Absolute, _, 32) => elf::R_HEX_32, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::LoongArch64 => match (kind, encoding, size) { + (RelocationKind::Absolute, _, 32) => elf::R_LARCH_32, + (RelocationKind::Absolute, _, 64) => elf::R_LARCH_64, + (RelocationKind::Relative, _, 32) => elf::R_LARCH_32_PCREL, + (RelocationKind::Relative, _, 64) => elf::R_LARCH_64_PCREL, + (RelocationKind::Relative, RelocationEncoding::LoongArchBranch, 16) + | (RelocationKind::PltRelative, RelocationEncoding::LoongArchBranch, 16) => { + elf::R_LARCH_B16 + } + (RelocationKind::Relative, RelocationEncoding::LoongArchBranch, 21) + | (RelocationKind::PltRelative, RelocationEncoding::LoongArchBranch, 21) => { + elf::R_LARCH_B21 + } + (RelocationKind::Relative, RelocationEncoding::LoongArchBranch, 26) + | (RelocationKind::PltRelative, RelocationEncoding::LoongArchBranch, 26) => { + elf::R_LARCH_B26 + } + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Mips | Architecture::Mips64 => match (kind, encoding, size) { + (RelocationKind::Absolute, _, 16) => elf::R_MIPS_16, + (RelocationKind::Absolute, _, 32) => elf::R_MIPS_32, + (RelocationKind::Absolute, _, 64) => elf::R_MIPS_64, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Msp430 => match (kind, encoding, size) { + (RelocationKind::Absolute, _, 32) => elf::R_MSP430_32, + (RelocationKind::Absolute, _, 16) => elf::R_MSP430_16_BYTE, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::PowerPc => match (kind, encoding, size) { + (RelocationKind::Absolute, _, 32) => elf::R_PPC_ADDR32, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::PowerPc64 => match (kind, encoding, size) { + (RelocationKind::Absolute, _, 32) => elf::R_PPC64_ADDR32, + (RelocationKind::Absolute, _, 64) => elf::R_PPC64_ADDR64, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Riscv32 | Architecture::Riscv64 => match (kind, encoding, size) { + (RelocationKind::Absolute, _, 32) => elf::R_RISCV_32, + (RelocationKind::Absolute, _, 64) => elf::R_RISCV_64, + (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { + elf::R_RISCV_32_PCREL + } + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::S390x => match (kind, encoding, size) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 8) => elf::R_390_8, + (RelocationKind::Absolute, RelocationEncoding::Generic, 16) => elf::R_390_16, + (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => elf::R_390_32, + (RelocationKind::Absolute, RelocationEncoding::Generic, 64) => elf::R_390_64, + (RelocationKind::Relative, RelocationEncoding::Generic, 16) => elf::R_390_PC16, + (RelocationKind::Relative, RelocationEncoding::Generic, 32) => elf::R_390_PC32, + (RelocationKind::Relative, RelocationEncoding::Generic, 64) => elf::R_390_PC64, + (RelocationKind::Relative, RelocationEncoding::S390xDbl, 16) => elf::R_390_PC16DBL, + (RelocationKind::Relative, RelocationEncoding::S390xDbl, 32) => elf::R_390_PC32DBL, + (RelocationKind::PltRelative, RelocationEncoding::S390xDbl, 16) => { + elf::R_390_PLT16DBL + } + (RelocationKind::PltRelative, RelocationEncoding::S390xDbl, 32) => { + elf::R_390_PLT32DBL + } + (RelocationKind::Got, RelocationEncoding::Generic, 16) => elf::R_390_GOT16, + (RelocationKind::Got, RelocationEncoding::Generic, 32) => elf::R_390_GOT32, + (RelocationKind::Got, RelocationEncoding::Generic, 64) => elf::R_390_GOT64, + (RelocationKind::GotRelative, RelocationEncoding::S390xDbl, 32) => { + elf::R_390_GOTENT + } + (RelocationKind::GotBaseOffset, RelocationEncoding::Generic, 16) => { + elf::R_390_GOTOFF16 + } + (RelocationKind::GotBaseOffset, RelocationEncoding::Generic, 32) => { + elf::R_390_GOTOFF32 + } + (RelocationKind::GotBaseOffset, RelocationEncoding::Generic, 64) => { + elf::R_390_GOTOFF64 + } + (RelocationKind::GotBaseRelative, RelocationEncoding::Generic, 64) => { + elf::R_390_GOTPC + } + (RelocationKind::GotBaseRelative, RelocationEncoding::S390xDbl, 32) => { + elf::R_390_GOTPCDBL + } + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Sbf => match (kind, encoding, size) { + (RelocationKind::Absolute, _, 64) => elf::R_SBF_64_64, + (RelocationKind::Absolute, _, 32) => elf::R_SBF_64_32, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Sharc => match (kind, encoding, size) { + (RelocationKind::Absolute, RelocationEncoding::SharcTypeA, 32) => { + elf::R_SHARC_ADDR32_V3 + } + (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => { + elf::R_SHARC_ADDR_VAR_V3 + } + (RelocationKind::Relative, RelocationEncoding::SharcTypeA, 24) => { + elf::R_SHARC_PCRLONG_V3 + } + (RelocationKind::Relative, RelocationEncoding::SharcTypeA, 6) => { + elf::R_SHARC_PCRSHORT_V3 + } + (RelocationKind::Relative, RelocationEncoding::SharcTypeB, 6) => { + elf::R_SHARC_PCRSHORT_V3 + } + (RelocationKind::Absolute, RelocationEncoding::Generic, 16) => { + elf::R_SHARC_ADDR_VAR16_V3 + } + (RelocationKind::Absolute, RelocationEncoding::SharcTypeA, 16) => { + elf::R_SHARC_DATA16_V3 + } + (RelocationKind::Absolute, RelocationEncoding::SharcTypeB, 16) => { + elf::R_SHARC_DATA16_VISA_V3 + } + (RelocationKind::Absolute, RelocationEncoding::SharcTypeA, 24) => { + elf::R_SHARC_ADDR24_V3 + } + (RelocationKind::Absolute, RelocationEncoding::SharcTypeA, 6) => { + elf::R_SHARC_DATA6_V3 + } + (RelocationKind::Absolute, RelocationEncoding::SharcTypeB, 6) => { + elf::R_SHARC_DATA6_VISA_V3 + } + (RelocationKind::Absolute, RelocationEncoding::SharcTypeB, 7) => { + elf::R_SHARC_DATA7_VISA_V3 + } + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Sparc | Architecture::Sparc32Plus => match (kind, encoding, size) { + // TODO: use R_SPARC_32 if aligned. + (RelocationKind::Absolute, _, 32) => elf::R_SPARC_UA32, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Sparc64 => match (kind, encoding, size) { + // TODO: use R_SPARC_32/R_SPARC_64 if aligned. + (RelocationKind::Absolute, _, 32) => elf::R_SPARC_UA32, + (RelocationKind::Absolute, _, 64) => elf::R_SPARC_UA64, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Xtensa => match (kind, encoding, size) { + (RelocationKind::Absolute, _, 32) => elf::R_XTENSA_32, + (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { + elf::R_XTENSA_32_PCREL + } + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }; + reloc.flags = RelocationFlags::Elf { r_type }; + Ok(()) + } + + pub(crate) fn elf_adjust_addend(&mut self, _relocation: &mut Relocation) -> Result { + // Determine whether the addend is stored in the relocation or the data. + let implicit = !self.elf_has_relocation_addend()?; + Ok(implicit) + } + + pub(crate) fn elf_relocation_size(&self, reloc: &Relocation) -> Result { + let r_type = if let RelocationFlags::Elf { r_type } = reloc.flags { + r_type + } else { + return Err(Error("invalid relocation flags".into())); + }; + // This only needs to support architectures that use implicit addends. + let size = match self.architecture { + Architecture::Arm => match r_type { + elf::R_ARM_ABS16 => Some(16), + elf::R_ARM_ABS32 | elf::R_ARM_REL32 => Some(32), + _ => None, + }, + Architecture::Bpf => match r_type { + elf::R_BPF_64_32 => Some(32), + elf::R_BPF_64_64 => Some(64), + _ => None, + }, + Architecture::I386 => match r_type { + elf::R_386_8 | elf::R_386_PC8 => Some(8), + elf::R_386_16 | elf::R_386_PC16 => Some(16), + elf::R_386_32 + | elf::R_386_PC32 + | elf::R_386_GOT32 + | elf::R_386_PLT32 + | elf::R_386_GOTOFF + | elf::R_386_GOTPC => Some(32), + _ => None, + }, + Architecture::Mips => match r_type { + elf::R_MIPS_16 => Some(16), + elf::R_MIPS_32 => Some(32), + elf::R_MIPS_64 => Some(64), + _ => None, + }, + Architecture::Sbf => match r_type { + elf::R_SBF_64_32 => Some(32), + elf::R_SBF_64_64 => Some(64), + _ => None, + }, + _ => { + return Err(Error(format!( + "unimplemented architecture {:?}", + self.architecture + ))); + } + }; + size.ok_or_else(|| Error(format!("unsupported relocation for size {:?}", reloc))) } pub(crate) fn elf_is_64(&self) -> bool { @@ -324,33 +618,36 @@ impl<'a> Object<'a> { // Start writing. let e_type = elf::ET_REL; - let e_machine = match self.architecture { - Architecture::Aarch64 => elf::EM_AARCH64, - Architecture::Aarch64_Ilp32 => elf::EM_AARCH64, - Architecture::Arm => elf::EM_ARM, - Architecture::Avr => elf::EM_AVR, - Architecture::Bpf => elf::EM_BPF, - Architecture::Csky => elf::EM_CSKY, - Architecture::I386 => elf::EM_386, - Architecture::X86_64 => elf::EM_X86_64, - Architecture::X86_64_X32 => elf::EM_X86_64, - Architecture::Hexagon => elf::EM_HEXAGON, - Architecture::LoongArch64 => elf::EM_LOONGARCH, - Architecture::Mips => elf::EM_MIPS, - Architecture::Mips64 => elf::EM_MIPS, - Architecture::Msp430 => elf::EM_MSP430, - Architecture::PowerPc => elf::EM_PPC, - Architecture::PowerPc64 => elf::EM_PPC64, - Architecture::Riscv32 => elf::EM_RISCV, - Architecture::Riscv64 => elf::EM_RISCV, - Architecture::S390x => elf::EM_S390, - Architecture::Sbf => elf::EM_SBF, - Architecture::Sparc64 => elf::EM_SPARCV9, - Architecture::Xtensa => elf::EM_XTENSA, + let e_machine = match (self.architecture, self.sub_architecture) { + (Architecture::Aarch64, None) => elf::EM_AARCH64, + (Architecture::Aarch64_Ilp32, None) => elf::EM_AARCH64, + (Architecture::Arm, None) => elf::EM_ARM, + (Architecture::Avr, None) => elf::EM_AVR, + (Architecture::Bpf, None) => elf::EM_BPF, + (Architecture::Csky, None) => elf::EM_CSKY, + (Architecture::I386, None) => elf::EM_386, + (Architecture::X86_64, None) => elf::EM_X86_64, + (Architecture::X86_64_X32, None) => elf::EM_X86_64, + (Architecture::Hexagon, None) => elf::EM_HEXAGON, + (Architecture::LoongArch64, None) => elf::EM_LOONGARCH, + (Architecture::Mips, None) => elf::EM_MIPS, + (Architecture::Mips64, None) => elf::EM_MIPS, + (Architecture::Msp430, None) => elf::EM_MSP430, + (Architecture::PowerPc, None) => elf::EM_PPC, + (Architecture::PowerPc64, None) => elf::EM_PPC64, + (Architecture::Riscv32, None) => elf::EM_RISCV, + (Architecture::Riscv64, None) => elf::EM_RISCV, + (Architecture::S390x, None) => elf::EM_S390, + (Architecture::Sbf, None) => elf::EM_SBF, + (Architecture::Sharc, None) => elf::EM_SHARC, + (Architecture::Sparc, None) => elf::EM_SPARC, + (Architecture::Sparc32Plus, None) => elf::EM_SPARC32PLUS, + (Architecture::Sparc64, None) => elf::EM_SPARCV9, + (Architecture::Xtensa, None) => elf::EM_XTENSA, _ => { return Err(Error(format!( - "unimplemented architecture {:?}", - self.architecture + "unimplemented architecture {:?} with sub-architecture {:?}", + self.architecture, self.sub_architecture ))); } }; @@ -393,7 +690,6 @@ impl<'a> Object<'a> { st_info } else { let st_type = match symbol.kind { - SymbolKind::Null => elf::STT_NOTYPE, SymbolKind::Text => { if symbol.is_undefined() { elf::STT_NOTYPE @@ -484,314 +780,10 @@ impl<'a> Object<'a> { writer.write_align_relocation(); debug_assert_eq!(section_offsets[index].reloc_offset, writer.len()); for reloc in §ion.relocations { - let r_type = match self.architecture { - Architecture::Aarch64 => match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, RelocationEncoding::Generic, 64) => { - elf::R_AARCH64_ABS64 - } - (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => { - elf::R_AARCH64_ABS32 - } - (RelocationKind::Absolute, RelocationEncoding::Generic, 16) => { - elf::R_AARCH64_ABS16 - } - (RelocationKind::Relative, RelocationEncoding::Generic, 64) => { - elf::R_AARCH64_PREL64 - } - (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { - elf::R_AARCH64_PREL32 - } - (RelocationKind::Relative, RelocationEncoding::Generic, 16) => { - elf::R_AARCH64_PREL16 - } - (RelocationKind::Relative, RelocationEncoding::AArch64Call, 26) - | (RelocationKind::PltRelative, RelocationEncoding::AArch64Call, 26) => { - elf::R_AARCH64_CALL26 - } - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::Aarch64_Ilp32 => { - match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => { - elf::R_AARCH64_P32_ABS32 - } - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!( - "unimplemented relocation {:?}", - reloc - ))); - } - } - } - Architecture::Arm => match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, _, 32) => elf::R_ARM_ABS32, - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::Avr => match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, _, 32) => elf::R_AVR_32, - (RelocationKind::Absolute, _, 16) => elf::R_AVR_16, - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::Bpf => match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, _, 64) => elf::R_BPF_64_64, - (RelocationKind::Absolute, _, 32) => elf::R_BPF_64_32, - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::Csky => match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, _, 32) => elf::R_CKCORE_ADDR32, - (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { - elf::R_CKCORE_PCREL32 - } - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::I386 => match (reloc.kind, reloc.size) { - (RelocationKind::Absolute, 32) => elf::R_386_32, - (RelocationKind::Relative, 32) => elf::R_386_PC32, - (RelocationKind::Got, 32) => elf::R_386_GOT32, - (RelocationKind::PltRelative, 32) => elf::R_386_PLT32, - (RelocationKind::GotBaseOffset, 32) => elf::R_386_GOTOFF, - (RelocationKind::GotBaseRelative, 32) => elf::R_386_GOTPC, - (RelocationKind::Absolute, 16) => elf::R_386_16, - (RelocationKind::Relative, 16) => elf::R_386_PC16, - (RelocationKind::Absolute, 8) => elf::R_386_8, - (RelocationKind::Relative, 8) => elf::R_386_PC8, - (RelocationKind::Elf(x), _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::X86_64 | Architecture::X86_64_X32 => { - match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, RelocationEncoding::Generic, 64) => { - elf::R_X86_64_64 - } - (RelocationKind::Relative, _, 32) => elf::R_X86_64_PC32, - (RelocationKind::Got, _, 32) => elf::R_X86_64_GOT32, - (RelocationKind::PltRelative, _, 32) => elf::R_X86_64_PLT32, - (RelocationKind::GotRelative, _, 32) => elf::R_X86_64_GOTPCREL, - (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => { - elf::R_X86_64_32 - } - (RelocationKind::Absolute, RelocationEncoding::X86Signed, 32) => { - elf::R_X86_64_32S - } - (RelocationKind::Absolute, _, 16) => elf::R_X86_64_16, - (RelocationKind::Relative, _, 16) => elf::R_X86_64_PC16, - (RelocationKind::Absolute, _, 8) => elf::R_X86_64_8, - (RelocationKind::Relative, _, 8) => elf::R_X86_64_PC8, - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!( - "unimplemented relocation {:?}", - reloc - ))); - } - } - } - Architecture::Hexagon => match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, _, 32) => elf::R_HEX_32, - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::LoongArch64 => match (reloc.kind, reloc.encoding, reloc.size) - { - (RelocationKind::Absolute, _, 32) => elf::R_LARCH_32, - (RelocationKind::Absolute, _, 64) => elf::R_LARCH_64, - (RelocationKind::Relative, _, 32) => elf::R_LARCH_32_PCREL, - (RelocationKind::Relative, RelocationEncoding::LoongArchBranch, 16) - | ( - RelocationKind::PltRelative, - RelocationEncoding::LoongArchBranch, - 16, - ) => elf::R_LARCH_B16, - (RelocationKind::Relative, RelocationEncoding::LoongArchBranch, 21) - | ( - RelocationKind::PltRelative, - RelocationEncoding::LoongArchBranch, - 21, - ) => elf::R_LARCH_B21, - (RelocationKind::Relative, RelocationEncoding::LoongArchBranch, 26) - | ( - RelocationKind::PltRelative, - RelocationEncoding::LoongArchBranch, - 26, - ) => elf::R_LARCH_B26, - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::Mips | Architecture::Mips64 => { - match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, _, 16) => elf::R_MIPS_16, - (RelocationKind::Absolute, _, 32) => elf::R_MIPS_32, - (RelocationKind::Absolute, _, 64) => elf::R_MIPS_64, - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!( - "unimplemented relocation {:?}", - reloc - ))); - } - } - } - Architecture::Msp430 => match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, _, 32) => elf::R_MSP430_32, - (RelocationKind::Absolute, _, 16) => elf::R_MSP430_16_BYTE, - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::PowerPc => match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, _, 32) => elf::R_PPC_ADDR32, - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::PowerPc64 => match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, _, 32) => elf::R_PPC64_ADDR32, - (RelocationKind::Absolute, _, 64) => elf::R_PPC64_ADDR64, - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::Riscv32 | Architecture::Riscv64 => { - match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, _, 32) => elf::R_RISCV_32, - (RelocationKind::Absolute, _, 64) => elf::R_RISCV_64, - (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { - elf::R_RISCV_32_PCREL - } - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!( - "unimplemented relocation {:?}", - reloc - ))); - } - } - } - Architecture::S390x => match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, RelocationEncoding::Generic, 8) => { - elf::R_390_8 - } - (RelocationKind::Absolute, RelocationEncoding::Generic, 16) => { - elf::R_390_16 - } - (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => { - elf::R_390_32 - } - (RelocationKind::Absolute, RelocationEncoding::Generic, 64) => { - elf::R_390_64 - } - (RelocationKind::Relative, RelocationEncoding::Generic, 16) => { - elf::R_390_PC16 - } - (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { - elf::R_390_PC32 - } - (RelocationKind::Relative, RelocationEncoding::Generic, 64) => { - elf::R_390_PC64 - } - (RelocationKind::Relative, RelocationEncoding::S390xDbl, 16) => { - elf::R_390_PC16DBL - } - (RelocationKind::Relative, RelocationEncoding::S390xDbl, 32) => { - elf::R_390_PC32DBL - } - (RelocationKind::PltRelative, RelocationEncoding::S390xDbl, 16) => { - elf::R_390_PLT16DBL - } - (RelocationKind::PltRelative, RelocationEncoding::S390xDbl, 32) => { - elf::R_390_PLT32DBL - } - (RelocationKind::Got, RelocationEncoding::Generic, 16) => { - elf::R_390_GOT16 - } - (RelocationKind::Got, RelocationEncoding::Generic, 32) => { - elf::R_390_GOT32 - } - (RelocationKind::Got, RelocationEncoding::Generic, 64) => { - elf::R_390_GOT64 - } - (RelocationKind::GotRelative, RelocationEncoding::S390xDbl, 32) => { - elf::R_390_GOTENT - } - (RelocationKind::GotBaseOffset, RelocationEncoding::Generic, 16) => { - elf::R_390_GOTOFF16 - } - (RelocationKind::GotBaseOffset, RelocationEncoding::Generic, 32) => { - elf::R_390_GOTOFF32 - } - (RelocationKind::GotBaseOffset, RelocationEncoding::Generic, 64) => { - elf::R_390_GOTOFF64 - } - (RelocationKind::GotBaseRelative, RelocationEncoding::Generic, 64) => { - elf::R_390_GOTPC - } - (RelocationKind::GotBaseRelative, RelocationEncoding::S390xDbl, 32) => { - elf::R_390_GOTPCDBL - } - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::Sbf => match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, _, 64) => elf::R_SBF_64_64, - (RelocationKind::Absolute, _, 32) => elf::R_SBF_64_32, - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::Sparc64 => match (reloc.kind, reloc.encoding, reloc.size) { - // TODO: use R_SPARC_32/R_SPARC_64 if aligned. - (RelocationKind::Absolute, _, 32) => elf::R_SPARC_UA32, - (RelocationKind::Absolute, _, 64) => elf::R_SPARC_UA64, - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::Xtensa => match (reloc.kind, reloc.encoding, reloc.size) { - (RelocationKind::Absolute, _, 32) => elf::R_XTENSA_32, - (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { - elf::R_XTENSA_32_PCREL - } - (RelocationKind::Elf(x), _, _) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - _ => { - if let RelocationKind::Elf(x) = reloc.kind { - x - } else { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - } + let r_type = if let RelocationFlags::Elf { r_type } = reloc.flags { + r_type + } else { + return Err(Error("invalid relocation flags".into())); }; let r_sym = symbol_offsets[reloc.symbol.0].index.0; writer.write_relocation( @@ -844,7 +836,9 @@ impl<'a> Object<'a> { SectionKind::ReadOnlyString => { elf::SHF_ALLOC | elf::SHF_STRINGS | elf::SHF_MERGE } - SectionKind::OtherString => elf::SHF_STRINGS | elf::SHF_MERGE, + SectionKind::OtherString | SectionKind::DebugString => { + elf::SHF_STRINGS | elf::SHF_MERGE + } SectionKind::Other | SectionKind::Debug | SectionKind::Metadata diff --git a/third_party/rust/object/src/write/elf/writer.rs b/third_party/rust/object/src/write/elf/writer.rs index 9750924969e3..420a9ea5c284 100644 --- a/third_party/rust/object/src/write/elf/writer.rs +++ b/third_party/rust/object/src/write/elf/writer.rs @@ -210,6 +210,11 @@ impl<'a> Writer<'a> { } } + /// Get the file class that will be written. + fn class(&self) -> Class { + Class { is_64: self.is_64 } + } + /// Return the current file length that has been reserved. pub fn reserved_len(&self) -> usize { self.len @@ -224,6 +229,8 @@ impl<'a> Writer<'a> { /// Reserve a file range with the given size and starting alignment. /// /// Returns the aligned offset of the start of the range. + /// + /// `align_start` must be a power of two. pub fn reserve(&mut self, len: usize, align_start: usize) -> usize { if align_start > 1 { self.len = util::align(self.len, align_start); @@ -259,20 +266,12 @@ impl<'a> Writer<'a> { self.buffer.resize(offset); } - fn file_header_size(&self) -> usize { - if self.is_64 { - mem::size_of::>() - } else { - mem::size_of::>() - } - } - /// Reserve the range for the file header. /// /// This must be at the start of the file. pub fn reserve_file_header(&mut self) { debug_assert_eq!(self.len, 0); - self.reserve(self.file_header_size(), 1); + self.reserve(self.class().file_header_size(), 1); } /// Write the file header. @@ -310,13 +309,13 @@ impl<'a> Writer<'a> { padding: [0; 7], }; - let e_ehsize = self.file_header_size() as u16; + let e_ehsize = self.class().file_header_size() as u16; let e_phoff = self.segment_offset as u64; let e_phentsize = if self.segment_num == 0 { 0 } else { - self.program_header_size() as u16 + self.class().program_header_size() as u16 }; // TODO: overflow let e_phnum = self.segment_num as u16; @@ -325,7 +324,7 @@ impl<'a> Writer<'a> { let e_shentsize = if self.section_num == 0 { 0 } else { - self.section_header_size() as u16 + self.class().section_header_size() as u16 }; let e_shnum = if self.section_num >= elf::SHN_LORESERVE.into() { 0 @@ -380,14 +379,6 @@ impl<'a> Writer<'a> { Ok(()) } - fn program_header_size(&self) -> usize { - if self.is_64 { - mem::size_of::>() - } else { - mem::size_of::>() - } - } - /// Reserve the range for the program headers. pub fn reserve_program_headers(&mut self, num: u32) { debug_assert_eq!(self.segment_offset, 0); @@ -395,8 +386,10 @@ impl<'a> Writer<'a> { return; } self.segment_num = num; - self.segment_offset = - self.reserve(num as usize * self.program_header_size(), self.elf_align); + self.segment_offset = self.reserve( + num as usize * self.class().program_header_size(), + self.elf_align, + ); } /// Write alignment padding bytes prior to the program headers. @@ -467,14 +460,6 @@ impl<'a> Writer<'a> { SectionIndex(index) } - fn section_header_size(&self) -> usize { - if self.is_64 { - mem::size_of::>() - } else { - mem::size_of::>() - } - } - /// Reserve the range for the section headers. /// /// This function does nothing if no sections were reserved. @@ -486,7 +471,7 @@ impl<'a> Writer<'a> { return; } self.section_offset = self.reserve( - self.section_num as usize * self.section_header_size(), + self.section_num as usize * self.class().section_header_size(), self.elf_align, ); } @@ -607,8 +592,16 @@ impl<'a> Writer<'a> { /// This must be called before [`Self::reserve_shstrtab`] /// and [`Self::reserve_section_headers`]. pub fn reserve_shstrtab_section_index(&mut self) -> SectionIndex { + self.reserve_shstrtab_section_index_with_name(&b".shstrtab"[..]) + } + + /// Reserve the section index for the section header string table. + /// + /// This must be called before [`Self::reserve_shstrtab`] + /// and [`Self::reserve_section_headers`]. + pub fn reserve_shstrtab_section_index_with_name(&mut self, name: &'a [u8]) -> SectionIndex { debug_assert_eq!(self.shstrtab_index, SectionIndex(0)); - self.shstrtab_str_id = Some(self.add_section_name(&b".shstrtab"[..])); + self.shstrtab_str_id = Some(self.add_section_name(name)); self.shstrtab_index = self.reserve_section_index(); self.shstrtab_index } @@ -650,11 +643,16 @@ impl<'a> Writer<'a> { self.need_strtab } + /// Require the string table even if no strings were added. + pub fn require_strtab(&mut self) { + self.need_strtab = true; + } + /// Reserve the range for the string table. /// /// This range is used for a section named `.strtab`. /// - /// This function does nothing if no strings or symbols were defined. + /// This function does nothing if no strings were defined. /// This must be called after [`Self::add_string`]. pub fn reserve_strtab(&mut self) { debug_assert_eq!(self.strtab_offset, 0); @@ -682,8 +680,15 @@ impl<'a> Writer<'a> { /// /// This must be called before [`Self::reserve_section_headers`]. pub fn reserve_strtab_section_index(&mut self) -> SectionIndex { + self.reserve_strtab_section_index_with_name(&b".strtab"[..]) + } + + /// Reserve the section index for the string table. + /// + /// This must be called before [`Self::reserve_section_headers`]. + pub fn reserve_strtab_section_index_with_name(&mut self, name: &'a [u8]) -> SectionIndex { debug_assert_eq!(self.strtab_index, SectionIndex(0)); - self.strtab_str_id = Some(self.add_section_name(&b".strtab"[..])); + self.strtab_str_id = Some(self.add_section_name(name)); self.strtab_index = self.reserve_section_index(); self.strtab_index } @@ -721,8 +726,6 @@ impl<'a> Writer<'a> { debug_assert_eq!(self.symtab_offset, 0); debug_assert_eq!(self.symtab_num, 0); self.symtab_num = 1; - // The symtab must link to a strtab. - self.need_strtab = true; SymbolIndex(0) } @@ -743,8 +746,6 @@ impl<'a> Writer<'a> { debug_assert_eq!(self.symtab_shndx_offset, 0); if self.symtab_num == 0 { self.symtab_num = 1; - // The symtab must link to a strtab. - self.need_strtab = true; } let index = self.symtab_num; self.symtab_num += 1; @@ -763,14 +764,6 @@ impl<'a> Writer<'a> { self.symtab_num } - fn symbol_size(&self) -> usize { - if self.is_64 { - mem::size_of::>() - } else { - mem::size_of::>() - } - } - /// Reserve the range for the symbol table. /// /// This range is used for a section named `.symtab`. @@ -782,7 +775,7 @@ impl<'a> Writer<'a> { return; } self.symtab_offset = self.reserve( - self.symtab_num as usize * self.symbol_size(), + self.symtab_num as usize * self.class().sym_size(), self.elf_align, ); } @@ -859,8 +852,15 @@ impl<'a> Writer<'a> { /// /// This must be called before [`Self::reserve_section_headers`]. pub fn reserve_symtab_section_index(&mut self) -> SectionIndex { + self.reserve_symtab_section_index_with_name(&b".symtab"[..]) + } + + /// Reserve the section index for the symbol table. + /// + /// This must be called before [`Self::reserve_section_headers`]. + pub fn reserve_symtab_section_index_with_name(&mut self, name: &'a [u8]) -> SectionIndex { debug_assert_eq!(self.symtab_index, SectionIndex(0)); - self.symtab_str_id = Some(self.add_section_name(&b".symtab"[..])); + self.symtab_str_id = Some(self.add_section_name(name)); self.symtab_index = self.reserve_section_index(); self.symtab_index } @@ -883,11 +883,11 @@ impl<'a> Writer<'a> { sh_flags: 0, sh_addr: 0, sh_offset: self.symtab_offset as u64, - sh_size: self.symtab_num as u64 * self.symbol_size() as u64, + sh_size: self.symtab_num as u64 * self.class().sym_size() as u64, sh_link: self.strtab_index.0, sh_info: num_local, sh_addralign: self.elf_align as u64, - sh_entsize: self.symbol_size() as u64, + sh_entsize: self.class().sym_size() as u64, }); } @@ -896,6 +896,12 @@ impl<'a> Writer<'a> { self.need_symtab_shndx } + /// Require the extended section indices for the symbol table even + /// if no section indices are too large. + pub fn require_symtab_shndx(&mut self) { + self.need_symtab_shndx = true; + } + /// Reserve the range for the extended section indices for the symbol table. /// /// This range is used for a section named `.symtab_shndx`. @@ -931,8 +937,18 @@ impl<'a> Writer<'a> { /// /// This must be called before [`Self::reserve_section_headers`]. pub fn reserve_symtab_shndx_section_index(&mut self) -> SectionIndex { + self.reserve_symtab_shndx_section_index_with_name(&b".symtab_shndx"[..]) + } + + /// Reserve the section index for the extended section indices symbol table. + /// + /// You should check [`Self::symtab_shndx_needed`] before calling this + /// unless you have other means of knowing if this section is needed. + /// + /// This must be called before [`Self::reserve_section_headers`]. + pub fn reserve_symtab_shndx_section_index_with_name(&mut self, name: &'a [u8]) -> SectionIndex { debug_assert!(self.symtab_shndx_str_id.is_none()); - self.symtab_shndx_str_id = Some(self.add_section_name(&b".symtab_shndx"[..])); + self.symtab_shndx_str_id = Some(self.add_section_name(name)); self.reserve_section_index() } @@ -985,21 +1001,35 @@ impl<'a> Writer<'a> { self.need_dynstr } + /// Require the dynamic string table even if no strings were added. + pub fn require_dynstr(&mut self) { + self.need_dynstr = true; + } + /// Reserve the range for the dynamic string table. /// /// This range is used for a section named `.dynstr`. /// - /// This function does nothing if no dynamic strings or symbols were defined. + /// This function does nothing if no dynamic strings were defined. /// This must be called after [`Self::add_dynamic_string`]. - pub fn reserve_dynstr(&mut self) { + pub fn reserve_dynstr(&mut self) -> usize { debug_assert_eq!(self.dynstr_offset, 0); if !self.need_dynstr { - return; + return 0; } // Start with null string. self.dynstr_data = vec![0]; self.dynstr.write(1, &mut self.dynstr_data); self.dynstr_offset = self.reserve(self.dynstr_data.len(), 1); + self.dynstr_offset + } + + /// Return the size of the dynamic string table. + /// + /// This must be called after [`Self::reserve_dynstr`]. + pub fn dynstr_len(&mut self) -> usize { + debug_assert_ne!(self.dynstr_offset, 0); + self.dynstr_data.len() } /// Write the dynamic string table. @@ -1017,8 +1047,15 @@ impl<'a> Writer<'a> { /// /// This must be called before [`Self::reserve_section_headers`]. pub fn reserve_dynstr_section_index(&mut self) -> SectionIndex { + self.reserve_dynstr_section_index_with_name(&b".dynstr"[..]) + } + + /// Reserve the section index for the dynamic string table. + /// + /// This must be called before [`Self::reserve_section_headers`]. + pub fn reserve_dynstr_section_index_with_name(&mut self, name: &'a [u8]) -> SectionIndex { debug_assert_eq!(self.dynstr_index, SectionIndex(0)); - self.dynstr_str_id = Some(self.add_section_name(&b".dynstr"[..])); + self.dynstr_str_id = Some(self.add_section_name(name)); self.dynstr_index = self.reserve_section_index(); self.dynstr_index } @@ -1061,8 +1098,6 @@ impl<'a> Writer<'a> { debug_assert_eq!(self.dynsym_offset, 0); debug_assert_eq!(self.dynsym_num, 0); self.dynsym_num = 1; - // The symtab must link to a strtab. - self.need_dynstr = true; SymbolIndex(0) } @@ -1079,8 +1114,6 @@ impl<'a> Writer<'a> { debug_assert_eq!(self.dynsym_offset, 0); if self.dynsym_num == 0 { self.dynsym_num = 1; - // The symtab must link to a strtab. - self.need_dynstr = true; } let index = self.dynsym_num; self.dynsym_num += 1; @@ -1100,15 +1133,16 @@ impl<'a> Writer<'a> { /// /// This function does nothing if no dynamic symbols were reserved. /// This must be called after [`Self::reserve_dynamic_symbol_index`]. - pub fn reserve_dynsym(&mut self) { + pub fn reserve_dynsym(&mut self) -> usize { debug_assert_eq!(self.dynsym_offset, 0); if self.dynsym_num == 0 { - return; + return 0; } self.dynsym_offset = self.reserve( - self.dynsym_num as usize * self.symbol_size(), + self.dynsym_num as usize * self.class().sym_size(), self.elf_align, ); + self.dynsym_offset } /// Write the null dynamic symbol. @@ -1176,8 +1210,15 @@ impl<'a> Writer<'a> { /// /// This must be called before [`Self::reserve_section_headers`]. pub fn reserve_dynsym_section_index(&mut self) -> SectionIndex { + self.reserve_dynsym_section_index_with_name(&b".dynsym"[..]) + } + + /// Reserve the section index for the dynamic symbol table. + /// + /// This must be called before [`Self::reserve_section_headers`]. + pub fn reserve_dynsym_section_index_with_name(&mut self, name: &'a [u8]) -> SectionIndex { debug_assert_eq!(self.dynsym_index, SectionIndex(0)); - self.dynsym_str_id = Some(self.add_section_name(&b".dynsym"[..])); + self.dynsym_str_id = Some(self.add_section_name(name)); self.dynsym_index = self.reserve_section_index(); self.dynsym_index } @@ -1200,32 +1241,25 @@ impl<'a> Writer<'a> { sh_flags: elf::SHF_ALLOC.into(), sh_addr, sh_offset: self.dynsym_offset as u64, - sh_size: self.dynsym_num as u64 * self.symbol_size() as u64, + sh_size: self.dynsym_num as u64 * self.class().sym_size() as u64, sh_link: self.dynstr_index.0, sh_info: num_local, sh_addralign: self.elf_align as u64, - sh_entsize: self.symbol_size() as u64, + sh_entsize: self.class().sym_size() as u64, }); } - fn dyn_size(&self) -> usize { - if self.is_64 { - mem::size_of::>() - } else { - mem::size_of::>() - } - } - /// Reserve the range for the `.dynamic` section. /// /// This function does nothing if `dynamic_num` is zero. - pub fn reserve_dynamic(&mut self, dynamic_num: usize) { + pub fn reserve_dynamic(&mut self, dynamic_num: usize) -> usize { debug_assert_eq!(self.dynamic_offset, 0); if dynamic_num == 0 { - return; + return 0; } self.dynamic_num = dynamic_num; - self.dynamic_offset = self.reserve(dynamic_num * self.dyn_size(), self.elf_align); + self.dynamic_offset = self.reserve_dynamics(dynamic_num); + self.dynamic_offset } /// Write alignment padding bytes prior to the `.dynamic` section. @@ -1239,6 +1273,13 @@ impl<'a> Writer<'a> { debug_assert_eq!(self.dynamic_offset, self.buffer.len()); } + /// Reserve a file range for the given number of dynamic entries. + /// + /// Returns the offset of the range. + pub fn reserve_dynamics(&mut self, dynamic_num: usize) -> usize { + self.reserve(dynamic_num * self.class().dyn_size(), self.elf_align) + } + /// Write a dynamic string entry. pub fn write_dynamic_string(&mut self, tag: u32, id: StringId) { self.write_dynamic(tag, self.dynstr.get_offset(id) as u64); @@ -1246,7 +1287,6 @@ impl<'a> Writer<'a> { /// Write a dynamic value entry. pub fn write_dynamic(&mut self, d_tag: u32, d_val: u64) { - debug_assert!(self.dynamic_offset <= self.buffer.len()); let endian = self.endian; if self.is_64 { let d = elf::Dyn64 { @@ -1261,9 +1301,6 @@ impl<'a> Writer<'a> { }; self.buffer.write(&d); } - debug_assert!( - self.dynamic_offset + self.dynamic_num * self.dyn_size() >= self.buffer.len() - ); } /// Reserve the section index for the dynamic table. @@ -1286,39 +1323,22 @@ impl<'a> Writer<'a> { sh_flags: (elf::SHF_WRITE | elf::SHF_ALLOC).into(), sh_addr, sh_offset: self.dynamic_offset as u64, - sh_size: (self.dynamic_num * self.dyn_size()) as u64, + sh_size: (self.dynamic_num * self.class().dyn_size()) as u64, sh_link: self.dynstr_index.0, sh_info: 0, sh_addralign: self.elf_align as u64, - sh_entsize: self.dyn_size() as u64, + sh_entsize: self.class().dyn_size() as u64, }); } - fn rel_size(&self, is_rela: bool) -> usize { - if self.is_64 { - if is_rela { - mem::size_of::>() - } else { - mem::size_of::>() - } - } else { - if is_rela { - mem::size_of::>() - } else { - mem::size_of::>() - } - } - } - /// Reserve a file range for a SysV hash section. /// /// `symbol_count` is the number of symbols in the hash, /// not the total number of symbols. - pub fn reserve_hash(&mut self, bucket_count: u32, chain_count: u32) { - self.hash_size = mem::size_of::>() - + bucket_count as usize * 4 - + chain_count as usize * 4; + pub fn reserve_hash(&mut self, bucket_count: u32, chain_count: u32) -> usize { + self.hash_size = self.class().hash_size(bucket_count, chain_count); self.hash_offset = self.reserve(self.hash_size, self.elf_align); + self.hash_offset } /// Write a SysV hash section. @@ -1351,8 +1371,13 @@ impl<'a> Writer<'a> { /// Reserve the section index for the SysV hash table. pub fn reserve_hash_section_index(&mut self) -> SectionIndex { + self.reserve_hash_section_index_with_name(&b".hash"[..]) + } + + /// Reserve the section index for the SysV hash table. + pub fn reserve_hash_section_index_with_name(&mut self, name: &'a [u8]) -> SectionIndex { debug_assert!(self.hash_str_id.is_none()); - self.hash_str_id = Some(self.add_section_name(&b".hash"[..])); + self.hash_str_id = Some(self.add_section_name(name)); self.reserve_section_index() } @@ -1381,12 +1406,17 @@ impl<'a> Writer<'a> { /// /// `symbol_count` is the number of symbols in the hash, /// not the total number of symbols. - pub fn reserve_gnu_hash(&mut self, bloom_count: u32, bucket_count: u32, symbol_count: u32) { - self.gnu_hash_size = mem::size_of::>() - + bloom_count as usize * self.elf_align - + bucket_count as usize * 4 - + symbol_count as usize * 4; + pub fn reserve_gnu_hash( + &mut self, + bloom_count: u32, + bucket_count: u32, + symbol_count: u32, + ) -> usize { + self.gnu_hash_size = self + .class() + .gnu_hash_size(bloom_count, bucket_count, symbol_count); self.gnu_hash_offset = self.reserve(self.gnu_hash_size, self.elf_align); + self.gnu_hash_offset } /// Write a GNU hash section. @@ -1472,8 +1502,13 @@ impl<'a> Writer<'a> { /// Reserve the section index for the GNU hash table. pub fn reserve_gnu_hash_section_index(&mut self) -> SectionIndex { + self.reserve_gnu_hash_section_index_with_name(&b".gnu.hash"[..]) + } + + /// Reserve the section index for the GNU hash table. + pub fn reserve_gnu_hash_section_index_with_name(&mut self, name: &'a [u8]) -> SectionIndex { debug_assert!(self.gnu_hash_str_id.is_none()); - self.gnu_hash_str_id = Some(self.add_section_name(&b".gnu.hash"[..])); + self.gnu_hash_str_id = Some(self.add_section_name(name)); self.reserve_section_index() } @@ -1494,19 +1529,20 @@ impl<'a> Writer<'a> { sh_link: self.dynsym_index.0, sh_info: 0, sh_addralign: self.elf_align as u64, - sh_entsize: 0, + sh_entsize: if self.is_64 { 0 } else { 4 }, }); } /// Reserve the range for the `.gnu.version` section. /// /// This function does nothing if no dynamic symbols were reserved. - pub fn reserve_gnu_versym(&mut self) { + pub fn reserve_gnu_versym(&mut self) -> usize { debug_assert_eq!(self.gnu_versym_offset, 0); if self.dynsym_num == 0 { - return; + return 0; } self.gnu_versym_offset = self.reserve(self.dynsym_num as usize * 2, 2); + self.gnu_versym_offset } /// Write the null symbol version entry. @@ -1529,8 +1565,13 @@ impl<'a> Writer<'a> { /// Reserve the section index for the `.gnu.version` section. pub fn reserve_gnu_versym_section_index(&mut self) -> SectionIndex { + self.reserve_gnu_versym_section_index_with_name(&b".gnu.version"[..]) + } + + /// Reserve the section index for the `.gnu.version` section. + pub fn reserve_gnu_versym_section_index_with_name(&mut self, name: &'a [u8]) -> SectionIndex { debug_assert!(self.gnu_versym_str_id.is_none()); - self.gnu_versym_str_id = Some(self.add_section_name(&b".gnu.version"[..])); + self.gnu_versym_str_id = Some(self.add_section_name(name)); self.reserve_section_index() } @@ -1547,7 +1588,7 @@ impl<'a> Writer<'a> { sh_flags: elf::SHF_ALLOC.into(), sh_addr, sh_offset: self.gnu_versym_offset as u64, - sh_size: self.dynsym_num as u64 * 2, + sh_size: self.class().gnu_versym_size(self.dynsym_num as usize) as u64, sh_link: self.dynsym_index.0, sh_info: 0, sh_addralign: 2, @@ -1556,16 +1597,16 @@ impl<'a> Writer<'a> { } /// Reserve the range for the `.gnu.version_d` section. - pub fn reserve_gnu_verdef(&mut self, verdef_count: usize, verdaux_count: usize) { + pub fn reserve_gnu_verdef(&mut self, verdef_count: usize, verdaux_count: usize) -> usize { debug_assert_eq!(self.gnu_verdef_offset, 0); if verdef_count == 0 { - return; + return 0; } - self.gnu_verdef_size = verdef_count * mem::size_of::>() - + verdaux_count * mem::size_of::>(); + self.gnu_verdef_size = self.class().gnu_verdef_size(verdef_count, verdaux_count); self.gnu_verdef_offset = self.reserve(self.gnu_verdef_size, self.elf_align); self.gnu_verdef_count = verdef_count as u16; self.gnu_verdef_remaining = self.gnu_verdef_count; + self.gnu_verdef_offset } /// Write alignment padding bytes prior to a `.gnu.version_d` section. @@ -1624,8 +1665,13 @@ impl<'a> Writer<'a> { /// Reserve the section index for the `.gnu.version_d` section. pub fn reserve_gnu_verdef_section_index(&mut self) -> SectionIndex { + self.reserve_gnu_verdef_section_index_with_name(&b".gnu.version_d"[..]) + } + + /// Reserve the section index for the `.gnu.version_d` section. + pub fn reserve_gnu_verdef_section_index_with_name(&mut self, name: &'a [u8]) -> SectionIndex { debug_assert!(self.gnu_verdef_str_id.is_none()); - self.gnu_verdef_str_id = Some(self.add_section_name(&b".gnu.version_d"[..])); + self.gnu_verdef_str_id = Some(self.add_section_name(name)); self.reserve_section_index() } @@ -1651,16 +1697,16 @@ impl<'a> Writer<'a> { } /// Reserve the range for the `.gnu.version_r` section. - pub fn reserve_gnu_verneed(&mut self, verneed_count: usize, vernaux_count: usize) { + pub fn reserve_gnu_verneed(&mut self, verneed_count: usize, vernaux_count: usize) -> usize { debug_assert_eq!(self.gnu_verneed_offset, 0); if verneed_count == 0 { - return; + return 0; } - self.gnu_verneed_size = verneed_count * mem::size_of::>() - + vernaux_count * mem::size_of::>(); + self.gnu_verneed_size = self.class().gnu_verneed_size(verneed_count, vernaux_count); self.gnu_verneed_offset = self.reserve(self.gnu_verneed_size, self.elf_align); self.gnu_verneed_count = verneed_count as u16; self.gnu_verneed_remaining = self.gnu_verneed_count; + self.gnu_verneed_offset } /// Write alignment padding bytes prior to a `.gnu.version_r` section. @@ -1719,8 +1765,13 @@ impl<'a> Writer<'a> { /// Reserve the section index for the `.gnu.version_r` section. pub fn reserve_gnu_verneed_section_index(&mut self) -> SectionIndex { + self.reserve_gnu_verneed_section_index_with_name(&b".gnu.version_r"[..]) + } + + /// Reserve the section index for the `.gnu.version_r` section. + pub fn reserve_gnu_verneed_section_index_with_name(&mut self, name: &'a [u8]) -> SectionIndex { debug_assert!(self.gnu_verneed_str_id.is_none()); - self.gnu_verneed_str_id = Some(self.add_section_name(&b".gnu.version_r"[..])); + self.gnu_verneed_str_id = Some(self.add_section_name(name)); self.reserve_section_index() } @@ -1747,19 +1798,28 @@ impl<'a> Writer<'a> { /// Reserve the section index for the `.gnu.attributes` section. pub fn reserve_gnu_attributes_section_index(&mut self) -> SectionIndex { + self.reserve_gnu_attributes_section_index_with_name(&b".gnu.attributes"[..]) + } + + /// Reserve the section index for the `.gnu.attributes` section. + pub fn reserve_gnu_attributes_section_index_with_name( + &mut self, + name: &'a [u8], + ) -> SectionIndex { debug_assert!(self.gnu_attributes_str_id.is_none()); - self.gnu_attributes_str_id = Some(self.add_section_name(&b".gnu.attributes"[..])); + self.gnu_attributes_str_id = Some(self.add_section_name(name)); self.reserve_section_index() } /// Reserve the range for the `.gnu.attributes` section. - pub fn reserve_gnu_attributes(&mut self, gnu_attributes_size: usize) { + pub fn reserve_gnu_attributes(&mut self, gnu_attributes_size: usize) -> usize { debug_assert_eq!(self.gnu_attributes_offset, 0); if gnu_attributes_size == 0 { - return; + return 0; } self.gnu_attributes_size = gnu_attributes_size; self.gnu_attributes_offset = self.reserve(self.gnu_attributes_size, self.elf_align); + self.gnu_attributes_offset } /// Write the section header for the `.gnu.attributes` section. @@ -1797,7 +1857,7 @@ impl<'a> Writer<'a> { /// /// Returns the offset of the range. pub fn reserve_relocations(&mut self, count: usize, is_rela: bool) -> usize { - self.reserve(count * self.rel_size(is_rela), self.elf_align) + self.reserve(count * self.class().rel_size(is_rela), self.elf_align) } /// Write alignment padding bytes prior to a relocation section. @@ -1865,11 +1925,11 @@ impl<'a> Writer<'a> { sh_flags: elf::SHF_INFO_LINK.into(), sh_addr: 0, sh_offset: offset as u64, - sh_size: (count * self.rel_size(is_rela)) as u64, + sh_size: (count * self.class().rel_size(is_rela)) as u64, sh_link: symtab.0, sh_info: section.0, sh_addralign: self.elf_align as u64, - sh_entsize: self.rel_size(is_rela) as u64, + sh_entsize: self.class().rel_size(is_rela) as u64, }); } @@ -2047,6 +2107,119 @@ impl AttributesWriter { } } +/// An ELF file class. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub struct Class { + /// Whether the file is 64-bit. + pub is_64: bool, +} + +impl Class { + /// Return the alignment size. + pub fn align(self) -> usize { + if self.is_64 { + 8 + } else { + 4 + } + } + + /// Return the size of the file header. + pub fn file_header_size(self) -> usize { + if self.is_64 { + mem::size_of::>() + } else { + mem::size_of::>() + } + } + + /// Return the size of a program header. + pub fn program_header_size(self) -> usize { + if self.is_64 { + mem::size_of::>() + } else { + mem::size_of::>() + } + } + + /// Return the size of a section header. + pub fn section_header_size(self) -> usize { + if self.is_64 { + mem::size_of::>() + } else { + mem::size_of::>() + } + } + + /// Return the size of a symbol. + pub fn sym_size(self) -> usize { + if self.is_64 { + mem::size_of::>() + } else { + mem::size_of::>() + } + } + + /// Return the size of a relocation entry. + pub fn rel_size(self, is_rela: bool) -> usize { + if self.is_64 { + if is_rela { + mem::size_of::>() + } else { + mem::size_of::>() + } + } else { + if is_rela { + mem::size_of::>() + } else { + mem::size_of::>() + } + } + } + + /// Return the size of a dynamic entry. + pub fn dyn_size(self) -> usize { + if self.is_64 { + mem::size_of::>() + } else { + mem::size_of::>() + } + } + + /// Return the size of a hash table. + pub fn hash_size(self, bucket_count: u32, chain_count: u32) -> usize { + mem::size_of::>() + + bucket_count as usize * 4 + + chain_count as usize * 4 + } + + /// Return the size of a GNU hash table. + pub fn gnu_hash_size(self, bloom_count: u32, bucket_count: u32, symbol_count: u32) -> usize { + let bloom_size = if self.is_64 { 8 } else { 4 }; + mem::size_of::>() + + bloom_count as usize * bloom_size + + bucket_count as usize * 4 + + symbol_count as usize * 4 + } + + /// Return the size of a GNU symbol version section. + pub fn gnu_versym_size(self, symbol_count: usize) -> usize { + symbol_count * 2 + } + + /// Return the size of a GNU version definition section. + pub fn gnu_verdef_size(self, verdef_count: usize, verdaux_count: usize) -> usize { + verdef_count * mem::size_of::>() + + verdaux_count * mem::size_of::>() + } + + /// Return the size of a GNU version dependency section. + pub fn gnu_verneed_size(self, verneed_count: usize, vernaux_count: usize) -> usize { + verneed_count * mem::size_of::>() + + vernaux_count * mem::size_of::>() + } +} + /// Native endian version of [`elf::FileHeader64`]. #[allow(missing_docs)] #[derive(Debug, Clone)] diff --git a/third_party/rust/object/src/write/macho.rs b/third_party/rust/object/src/write/macho.rs index e3ce55bb4ec4..359476982fce 100644 --- a/third_party/rust/object/src/write/macho.rs +++ b/third_party/rust/object/src/write/macho.rs @@ -5,7 +5,6 @@ use crate::macho; use crate::write::string::*; use crate::write::util::*; use crate::write::*; -use crate::AddressSize; #[derive(Default, Clone, Copy)] struct SectionOffsets { @@ -13,11 +12,11 @@ struct SectionOffsets { offset: usize, address: u64, reloc_offset: usize, + reloc_count: usize, } #[derive(Default, Clone, Copy)] struct SymbolOffsets { - emit: bool, index: usize, str_id: Option, } @@ -48,6 +47,14 @@ impl MachOBuildVersion { // Public methods. impl<'a> Object<'a> { + /// Specify the Mach-O CPU subtype. + /// + /// Requires `feature = "macho"`. + #[inline] + pub fn set_macho_cpu_subtype(&mut self, cpu_subtype: u32) { + self.macho_cpu_subtype = Some(cpu_subtype); + } + /// Specify information for a Mach-O `LC_BUILD_VERSION` command. /// /// Requires `feature = "macho"`. @@ -59,16 +66,6 @@ impl<'a> Object<'a> { // Private methods. impl<'a> Object<'a> { - pub(crate) fn macho_set_subsections_via_symbols(&mut self) { - let flags = match self.flags { - FileFlags::MachO { flags } => flags, - _ => 0, - }; - self.flags = FileFlags::MachO { - flags: flags | macho::MH_SUBSECTIONS_VIA_SYMBOLS, - }; - } - pub(crate) fn macho_segment_name(&self, segment: StandardSegment) -> &'static [u8] { match segment { StandardSegment::Text => &b"__TEXT"[..], @@ -213,11 +210,13 @@ impl<'a> Object<'a> { section, Relocation { offset, - size: address_size * 8, - kind: RelocationKind::Absolute, - encoding: RelocationEncoding::Generic, symbol: tlv_bootstrap, addend: 0, + flags: RelocationFlags::Generic { + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + size: address_size * 8, + }, }, ) .unwrap(); @@ -225,11 +224,13 @@ impl<'a> Object<'a> { section, Relocation { offset: offset + u64::from(address_size) * 2, - size: address_size * 8, - kind: RelocationKind::Absolute, - encoding: RelocationEncoding::Generic, symbol: init_symbol_id, addend: 0, + flags: RelocationFlags::Generic { + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + size: address_size * 8, + }, }, ) .unwrap(); @@ -243,21 +244,132 @@ impl<'a> Object<'a> { init_symbol_id } - pub(crate) fn macho_fixup_relocation(&mut self, mut relocation: &mut Relocation) -> i64 { - let constant = match relocation.kind { - // AArch64Call relocations have special handling for the addend, so don't adjust it - RelocationKind::Relative if relocation.encoding == RelocationEncoding::AArch64Call => 0, - RelocationKind::Relative - | RelocationKind::GotRelative - | RelocationKind::PltRelative => relocation.addend + 4, - _ => relocation.addend, + pub(crate) fn macho_translate_relocation(&mut self, reloc: &mut Relocation) -> Result<()> { + let (kind, encoding, mut size) = if let RelocationFlags::Generic { + kind, + encoding, + size, + } = reloc.flags + { + (kind, encoding, size) + } else { + return Ok(()); }; // Aarch64 relocs of these sizes act as if they are double-word length - if self.architecture == Architecture::Aarch64 && matches!(relocation.size, 12 | 21 | 26) { - relocation.size = 32; + if self.architecture == Architecture::Aarch64 && matches!(size, 12 | 21 | 26) { + size = 32; + } + let r_length = match size { + 8 => 0, + 16 => 1, + 32 => 2, + 64 => 3, + _ => return Err(Error(format!("unimplemented reloc size {:?}", reloc))), + }; + let (r_pcrel, r_type) = match self.architecture { + Architecture::I386 => match kind { + RelocationKind::Absolute => (false, macho::GENERIC_RELOC_VANILLA), + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::X86_64 => match (kind, encoding) { + (RelocationKind::Absolute, RelocationEncoding::Generic) => { + (false, macho::X86_64_RELOC_UNSIGNED) + } + (RelocationKind::Relative, RelocationEncoding::Generic) => { + (true, macho::X86_64_RELOC_SIGNED) + } + (RelocationKind::Relative, RelocationEncoding::X86RipRelative) => { + (true, macho::X86_64_RELOC_SIGNED) + } + (RelocationKind::Relative, RelocationEncoding::X86Branch) => { + (true, macho::X86_64_RELOC_BRANCH) + } + (RelocationKind::PltRelative, RelocationEncoding::X86Branch) => { + (true, macho::X86_64_RELOC_BRANCH) + } + (RelocationKind::GotRelative, RelocationEncoding::Generic) => { + (true, macho::X86_64_RELOC_GOT) + } + (RelocationKind::GotRelative, RelocationEncoding::X86RipRelativeMovq) => { + (true, macho::X86_64_RELOC_GOT_LOAD) + } + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Aarch64 | Architecture::Aarch64_Ilp32 => match (kind, encoding) { + (RelocationKind::Absolute, RelocationEncoding::Generic) => { + (false, macho::ARM64_RELOC_UNSIGNED) + } + (RelocationKind::Relative, RelocationEncoding::AArch64Call) => { + (true, macho::ARM64_RELOC_BRANCH26) + } + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }; + reloc.flags = RelocationFlags::MachO { + r_type, + r_pcrel, + r_length, + }; + Ok(()) + } + + pub(crate) fn macho_adjust_addend(&mut self, relocation: &mut Relocation) -> Result { + let (r_type, r_pcrel) = if let RelocationFlags::MachO { + r_type, r_pcrel, .. + } = relocation.flags + { + (r_type, r_pcrel) + } else { + return Err(Error(format!("invalid relocation flags {:?}", relocation))); + }; + if r_pcrel { + // For PC relative relocations on some architectures, the + // addend does not include the offset required due to the + // PC being different from the place of the relocation. + // This differs from other file formats, so adjust the + // addend here to account for this. + let pcrel_offset = match self.architecture { + Architecture::I386 => 4, + Architecture::X86_64 => match r_type { + macho::X86_64_RELOC_SIGNED_1 => 5, + macho::X86_64_RELOC_SIGNED_2 => 6, + macho::X86_64_RELOC_SIGNED_4 => 8, + _ => 4, + }, + // TODO: maybe missing support for some architectures and relocations + _ => 0, + }; + relocation.addend += pcrel_offset; + } + // Determine if addend is implicit. + let implicit = if self.architecture == Architecture::Aarch64 { + match r_type { + macho::ARM64_RELOC_BRANCH26 + | macho::ARM64_RELOC_PAGE21 + | macho::ARM64_RELOC_PAGEOFF12 => false, + _ => true, + } + } else { + true + }; + Ok(implicit) + } + + pub(crate) fn macho_relocation_size(&self, reloc: &Relocation) -> Result { + if let RelocationFlags::MachO { r_length, .. } = reloc.flags { + Ok(8 << r_length) + } else { + Err(Error("invalid relocation flags".into())) } - relocation.addend -= constant; - constant } pub(crate) fn macho_write(&self, buffer: &mut dyn WritableBuffer) -> Result<()> { @@ -281,12 +393,6 @@ impl<'a> Object<'a> { let mut ncmds = 0; let command_offset = offset; - let build_version_offset = offset; - if let Some(version) = &self.macho_build_version { - offset += version.cmdsize() as usize; - ncmds += 1; - } - // Calculate size of segment command and section headers. let segment_command_offset = offset; let segment_command_len = @@ -294,12 +400,25 @@ impl<'a> Object<'a> { offset += segment_command_len; ncmds += 1; + // Calculate size of build version. + let build_version_offset = offset; + if let Some(version) = &self.macho_build_version { + offset += version.cmdsize() as usize; + ncmds += 1; + } + // Calculate size of symtab command. let symtab_command_offset = offset; let symtab_command_len = mem::size_of::>(); offset += symtab_command_len; ncmds += 1; + // Calculate size of dysymtab command. + let dysymtab_command_offset = offset; + let dysymtab_command_len = mem::size_of::>(); + offset += dysymtab_command_len; + ncmds += 1; + let sizeofcmds = offset - command_offset; // Calculate size of section data. @@ -327,10 +446,12 @@ impl<'a> Object<'a> { } } - // Count symbols and add symbol strings to strtab. + // Partition symbols and add symbol strings to strtab. let mut strtab = StringTable::default(); let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; - let mut nsyms = 0; + let mut local_symbols = vec![]; + let mut external_symbols = vec![]; + let mut undefined_symbols = vec![]; for (index, symbol) in self.symbols.iter().enumerate() { // The unified API allows creating symbols that we don't emit, so filter // them out here. @@ -339,7 +460,7 @@ impl<'a> Object<'a> { match symbol.kind { SymbolKind::Text | SymbolKind::Data | SymbolKind::Tls | SymbolKind::Unknown => {} SymbolKind::File | SymbolKind::Section => continue, - SymbolKind::Null | SymbolKind::Label => { + SymbolKind::Label => { return Err(Error(format!( "unimplemented symbol `{}` kind {:?}", symbol.name().unwrap_or(""), @@ -347,12 +468,47 @@ impl<'a> Object<'a> { ))); } } - symbol_offsets[index].emit = true; - symbol_offsets[index].index = nsyms; - nsyms += 1; if !symbol.name.is_empty() { symbol_offsets[index].str_id = Some(strtab.add(&symbol.name)); } + if symbol.is_undefined() { + undefined_symbols.push(index); + } else if symbol.is_local() { + local_symbols.push(index); + } else { + external_symbols.push(index); + } + } + + external_symbols.sort_by_key(|index| &*self.symbols[*index].name); + undefined_symbols.sort_by_key(|index| &*self.symbols[*index].name); + + // Count symbols. + let mut nsyms = 0; + for index in local_symbols + .iter() + .copied() + .chain(external_symbols.iter().copied()) + .chain(undefined_symbols.iter().copied()) + { + symbol_offsets[index].index = nsyms; + nsyms += 1; + } + + // Calculate size of relocations. + for (index, section) in self.sections.iter().enumerate() { + let count: usize = section + .relocations + .iter() + .map(|reloc| 1 + usize::from(reloc.addend != 0)) + .sum(); + if count != 0 { + offset = align(offset, pointer_align); + section_offsets[index].reloc_offset = offset; + section_offsets[index].reloc_count = count; + let len = count * mem::size_of::>(); + offset += len; + } } // Calculate size of symtab. @@ -366,47 +522,51 @@ impl<'a> Object<'a> { // Start with null name. let mut strtab_data = vec![0]; strtab.write(1, &mut strtab_data); + write_align(&mut strtab_data, pointer_align); offset += strtab_data.len(); - // Calculate size of relocations. - for (index, section) in self.sections.iter().enumerate() { - let count = section.relocations.len(); - if count != 0 { - offset = align(offset, 4); - section_offsets[index].reloc_offset = offset; - let len = count * mem::size_of::>(); - offset += len; - } - } - // Start writing. buffer .reserve(offset) .map_err(|_| Error(String::from("Cannot allocate buffer")))?; // Write file header. - let (cputype, cpusubtype) = match self.architecture { - Architecture::Arm => (macho::CPU_TYPE_ARM, macho::CPU_SUBTYPE_ARM_ALL), - Architecture::Aarch64 => (macho::CPU_TYPE_ARM64, macho::CPU_SUBTYPE_ARM64_ALL), - Architecture::Aarch64_Ilp32 => { + let (cputype, mut cpusubtype) = match (self.architecture, self.sub_architecture) { + (Architecture::Arm, None) => (macho::CPU_TYPE_ARM, macho::CPU_SUBTYPE_ARM_ALL), + (Architecture::Aarch64, None) => (macho::CPU_TYPE_ARM64, macho::CPU_SUBTYPE_ARM64_ALL), + (Architecture::Aarch64, Some(SubArchitecture::Arm64E)) => { + (macho::CPU_TYPE_ARM64, macho::CPU_SUBTYPE_ARM64E) + } + (Architecture::Aarch64_Ilp32, None) => { (macho::CPU_TYPE_ARM64_32, macho::CPU_SUBTYPE_ARM64_32_V8) } - Architecture::I386 => (macho::CPU_TYPE_X86, macho::CPU_SUBTYPE_I386_ALL), - Architecture::X86_64 => (macho::CPU_TYPE_X86_64, macho::CPU_SUBTYPE_X86_64_ALL), - Architecture::PowerPc => (macho::CPU_TYPE_POWERPC, macho::CPU_SUBTYPE_POWERPC_ALL), - Architecture::PowerPc64 => (macho::CPU_TYPE_POWERPC64, macho::CPU_SUBTYPE_POWERPC_ALL), + (Architecture::I386, None) => (macho::CPU_TYPE_X86, macho::CPU_SUBTYPE_I386_ALL), + (Architecture::X86_64, None) => (macho::CPU_TYPE_X86_64, macho::CPU_SUBTYPE_X86_64_ALL), + (Architecture::PowerPc, None) => { + (macho::CPU_TYPE_POWERPC, macho::CPU_SUBTYPE_POWERPC_ALL) + } + (Architecture::PowerPc64, None) => { + (macho::CPU_TYPE_POWERPC64, macho::CPU_SUBTYPE_POWERPC_ALL) + } _ => { return Err(Error(format!( - "unimplemented architecture {:?}", - self.architecture + "unimplemented architecture {:?} with sub-architecture {:?}", + self.architecture, self.sub_architecture ))); } }; - let flags = match self.flags { + if let Some(cpu_subtype) = self.macho_cpu_subtype { + cpusubtype = cpu_subtype; + } + + let mut flags = match self.flags { FileFlags::MachO { flags } => flags, _ => 0, }; + if self.macho_subsections_via_symbols { + flags |= macho::MH_SUBSECTIONS_VIA_SYMBOLS; + } macho.write_mach_header( buffer, MachHeader { @@ -419,18 +579,6 @@ impl<'a> Object<'a> { }, ); - if let Some(version) = &self.macho_build_version { - debug_assert_eq!(build_version_offset, buffer.len()); - buffer.write(&macho::BuildVersionCommand { - cmd: U32::new(endian, macho::LC_BUILD_VERSION), - cmdsize: U32::new(endian, version.cmdsize()), - platform: U32::new(endian, version.platform), - minos: U32::new(endian, version.minos), - sdk: U32::new(endian, version.sdk), - ntools: U32::new(endian, 0), - }); - } - // Write segment command. debug_assert_eq!(segment_command_offset, buffer.len()); macho.write_segment_command( @@ -485,7 +633,7 @@ impl<'a> Object<'a> { SectionKind::Tls => macho::S_THREAD_LOCAL_REGULAR, SectionKind::UninitializedTls => macho::S_THREAD_LOCAL_ZEROFILL, SectionKind::TlsVariables => macho::S_THREAD_LOCAL_VARIABLES, - SectionKind::Debug => macho::S_ATTR_DEBUG, + SectionKind::Debug | SectionKind::DebugString => macho::S_ATTR_DEBUG, SectionKind::OtherString => macho::S_CSTRING_LITERALS, SectionKind::Other | SectionKind::Linker | SectionKind::Metadata => 0, SectionKind::Note | SectionKind::Unknown | SectionKind::Elf(_) => { @@ -507,12 +655,25 @@ impl<'a> Object<'a> { offset: section_offsets[index].offset as u32, align: section.align.trailing_zeros(), reloff: section_offsets[index].reloc_offset as u32, - nreloc: section.relocations.len() as u32, + nreloc: section_offsets[index].reloc_count as u32, flags, }, ); } + // Write build version. + if let Some(version) = &self.macho_build_version { + debug_assert_eq!(build_version_offset, buffer.len()); + buffer.write(&macho::BuildVersionCommand { + cmd: U32::new(endian, macho::LC_BUILD_VERSION), + cmdsize: U32::new(endian, version.cmdsize()), + platform: U32::new(endian, version.platform), + minos: U32::new(endian, version.minos), + sdk: U32::new(endian, version.sdk), + ntools: U32::new(endian, 0), + }); + } + // Write symtab command. debug_assert_eq!(symtab_command_offset, buffer.len()); let symtab_command = macho::SymtabCommand { @@ -525,6 +686,35 @@ impl<'a> Object<'a> { }; buffer.write(&symtab_command); + // Write dysymtab command. + debug_assert_eq!(dysymtab_command_offset, buffer.len()); + let dysymtab_command = macho::DysymtabCommand { + cmd: U32::new(endian, macho::LC_DYSYMTAB), + cmdsize: U32::new(endian, dysymtab_command_len as u32), + ilocalsym: U32::new(endian, 0), + nlocalsym: U32::new(endian, local_symbols.len() as u32), + iextdefsym: U32::new(endian, local_symbols.len() as u32), + nextdefsym: U32::new(endian, external_symbols.len() as u32), + iundefsym: U32::new( + endian, + local_symbols.len() as u32 + external_symbols.len() as u32, + ), + nundefsym: U32::new(endian, undefined_symbols.len() as u32), + tocoff: U32::default(), + ntoc: U32::default(), + modtaboff: U32::default(), + nmodtab: U32::default(), + extrefsymoff: U32::default(), + nextrefsyms: U32::default(), + indirectsymoff: U32::default(), + nindirectsyms: U32::default(), + extreloff: U32::default(), + nextrel: U32::default(), + locreloff: U32::default(), + nlocrel: U32::default(), + }; + buffer.write(&dysymtab_command); + // Write section data. for (index, section) in self.sections.iter().enumerate() { if !section.is_bss() { @@ -534,13 +724,103 @@ impl<'a> Object<'a> { } debug_assert_eq!(segment_file_offset + segment_file_size, buffer.len()); + // Write relocations. + for (index, section) in self.sections.iter().enumerate() { + if !section.relocations.is_empty() { + write_align(buffer, pointer_align); + debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); + + let mut write_reloc = |reloc: &Relocation| { + let (r_type, r_pcrel, r_length) = if let RelocationFlags::MachO { + r_type, + r_pcrel, + r_length, + } = reloc.flags + { + (r_type, r_pcrel, r_length) + } else { + return Err(Error("invalid relocation flags".into())); + }; + + // Write explicit addend. + if reloc.addend != 0 { + let r_type = match self.architecture { + Architecture::Aarch64 | Architecture::Aarch64_Ilp32 => { + macho::ARM64_RELOC_ADDEND + } + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))) + } + }; + + let reloc_info = macho::RelocationInfo { + r_address: reloc.offset as u32, + r_symbolnum: reloc.addend as u32, + r_pcrel: false, + r_length, + r_extern: false, + r_type, + }; + buffer.write(&reloc_info.relocation(endian)); + } + + let r_extern; + let r_symbolnum; + let symbol = &self.symbols[reloc.symbol.0]; + if symbol.kind == SymbolKind::Section { + r_symbolnum = section_offsets[symbol.section.id().unwrap().0].index as u32; + r_extern = false; + } else { + r_symbolnum = symbol_offsets[reloc.symbol.0].index as u32; + r_extern = true; + } + + let reloc_info = macho::RelocationInfo { + r_address: reloc.offset as u32, + r_symbolnum, + r_pcrel, + r_length, + r_extern, + r_type, + }; + buffer.write(&reloc_info.relocation(endian)); + Ok(()) + }; + + // Relocations are emitted in descending order as otherwise Apple's + // new linker crashes. This matches LLVM's behavior too: + // https://github.com/llvm/llvm-project/blob/e9b8cd0c8/llvm/lib/MC/MachObjectWriter.cpp#L1001-L1002 + let need_reverse = |relocs: &[Relocation]| { + let Some(first) = relocs.first() else { + return false; + }; + let Some(last) = relocs.last() else { + return false; + }; + first.offset < last.offset + }; + if need_reverse(§ion.relocations) { + for reloc in section.relocations.iter().rev() { + write_reloc(reloc)?; + } + } else { + for reloc in §ion.relocations { + write_reloc(reloc)?; + } + } + } + } + // Write symtab. write_align(buffer, pointer_align); debug_assert_eq!(symtab_offset, buffer.len()); - for (index, symbol) in self.symbols.iter().enumerate() { - if !symbol_offsets[index].emit { - continue; - } + for index in local_symbols + .iter() + .copied() + .chain(external_symbols.iter().copied()) + .chain(undefined_symbols.iter().copied()) + { + let symbol = &self.symbols[index]; // TODO: N_STAB let (mut n_type, n_sect) = match symbol.section { SymbolSection::Undefined => (macho::N_UNDF | macho::N_EXT, 0), @@ -604,128 +884,6 @@ impl<'a> Object<'a> { debug_assert_eq!(strtab_offset, buffer.len()); buffer.write_bytes(&strtab_data); - // Write relocations. - for (index, section) in self.sections.iter().enumerate() { - if !section.relocations.is_empty() { - write_align(buffer, 4); - debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); - for reloc in §ion.relocations { - let r_extern; - let mut r_symbolnum; - let symbol = &self.symbols[reloc.symbol.0]; - if symbol.kind == SymbolKind::Section { - r_symbolnum = section_offsets[symbol.section.id().unwrap().0].index as u32; - r_extern = false; - } else { - r_symbolnum = symbol_offsets[reloc.symbol.0].index as u32; - r_extern = true; - } - let r_length = match reloc.size { - 8 => 0, - 16 => 1, - 32 => 2, - 64 => 3, - _ => return Err(Error(format!("unimplemented reloc size {:?}", reloc))), - }; - let (r_pcrel, r_type) = match self.architecture { - Architecture::I386 => match reloc.kind { - RelocationKind::Absolute => (false, macho::GENERIC_RELOC_VANILLA), - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::X86_64 => match (reloc.kind, reloc.encoding, reloc.addend) { - (RelocationKind::Absolute, RelocationEncoding::Generic, 0) => { - (false, macho::X86_64_RELOC_UNSIGNED) - } - (RelocationKind::Relative, RelocationEncoding::Generic, -4) => { - (true, macho::X86_64_RELOC_SIGNED) - } - (RelocationKind::Relative, RelocationEncoding::X86RipRelative, -4) => { - (true, macho::X86_64_RELOC_SIGNED) - } - (RelocationKind::Relative, RelocationEncoding::X86Branch, -4) => { - (true, macho::X86_64_RELOC_BRANCH) - } - (RelocationKind::PltRelative, RelocationEncoding::X86Branch, -4) => { - (true, macho::X86_64_RELOC_BRANCH) - } - (RelocationKind::GotRelative, RelocationEncoding::Generic, -4) => { - (true, macho::X86_64_RELOC_GOT) - } - ( - RelocationKind::GotRelative, - RelocationEncoding::X86RipRelativeMovq, - -4, - ) => (true, macho::X86_64_RELOC_GOT_LOAD), - (RelocationKind::MachO { value, relative }, _, _) => (relative, value), - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }, - Architecture::Aarch64 | Architecture::Aarch64_Ilp32 => { - match (reloc.kind, reloc.encoding, reloc.addend) { - (RelocationKind::Absolute, RelocationEncoding::Generic, 0) => { - (false, macho::ARM64_RELOC_UNSIGNED) - } - (RelocationKind::Relative, RelocationEncoding::AArch64Call, 0) => { - (true, macho::ARM64_RELOC_BRANCH26) - } - // Non-zero addend, so we have to encode the addend separately - ( - RelocationKind::Relative, - RelocationEncoding::AArch64Call, - value, - ) => { - // first emit the BR26 relocation - let reloc_info = macho::RelocationInfo { - r_address: reloc.offset as u32, - r_symbolnum, - r_pcrel: true, - r_length, - r_extern: true, - r_type: macho::ARM64_RELOC_BRANCH26, - }; - buffer.write(&reloc_info.relocation(endian)); - - // set up a separate relocation for the addend - r_symbolnum = value as u32; - (false, macho::ARM64_RELOC_ADDEND) - } - ( - RelocationKind::MachO { value, relative }, - RelocationEncoding::Generic, - 0, - ) => (relative, value), - _ => { - return Err(Error(format!( - "unimplemented relocation {:?}", - reloc - ))); - } - } - } - _ => { - if let RelocationKind::MachO { value, relative } = reloc.kind { - (relative, value) - } else { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - } - }; - let reloc_info = macho::RelocationInfo { - r_address: reloc.offset as u32, - r_symbolnum, - r_pcrel, - r_length, - r_extern, - r_type, - }; - buffer.write(&reloc_info.relocation(endian)); - } - } - } - debug_assert_eq!(offset, buffer.len()); Ok(()) diff --git a/third_party/rust/object/src/write/mod.rs b/third_party/rust/object/src/write/mod.rs index 711ff16d2abd..c16ed2e56d80 100644 --- a/third_party/rust/object/src/write/mod.rs +++ b/third_party/rust/object/src/write/mod.rs @@ -1,4 +1,13 @@ //! Interface for writing object files. +//! +//! This module provides a unified write API for relocatable object files +//! using [`Object`]. This does not support writing executable files. +//! This supports the following file formats: COFF, ELF, Mach-O, and XCOFF. +//! +//! The submodules define helpers for writing the raw structs. These support +//! writing both relocatable and executable files. There are writers for +//! the following file formats: [COFF](coff::Writer), [ELF](elf::Writer), +//! and [PE](pe::Writer). use alloc::borrow::Cow; use alloc::string::String; @@ -10,13 +19,11 @@ use hashbrown::HashMap; use std::{boxed::Box, collections::HashMap, error, io}; use crate::endian::{Endianness, U32, U64}; -use crate::{ - Architecture, BinaryFormat, ComdatKind, FileFlags, RelocationEncoding, RelocationKind, - SectionFlags, SectionKind, SymbolFlags, SymbolKind, SymbolScope, -}; + +pub use crate::common::*; #[cfg(feature = "coff")] -mod coff; +pub mod coff; #[cfg(feature = "coff")] pub use coff::CoffExportStyle; @@ -34,7 +41,7 @@ pub mod pe; #[cfg(feature = "xcoff")] mod xcoff; -mod string; +pub(crate) mod string; pub use string::StringId; mod util; @@ -42,7 +49,7 @@ pub use util::*; /// The error type used within the write module. #[derive(Debug, Clone, PartialEq, Eq)] -pub struct Error(String); +pub struct Error(pub(crate) String); impl fmt::Display for Error { #[inline] @@ -62,21 +69,30 @@ pub type Result = result::Result; pub struct Object<'a> { format: BinaryFormat, architecture: Architecture, + sub_architecture: Option, endian: Endianness, sections: Vec>, standard_sections: HashMap, symbols: Vec, symbol_map: HashMap, SymbolId>, - stub_symbols: HashMap, comdats: Vec, /// File flags that are specific to each file format. pub flags: FileFlags, /// The symbol name mangling scheme. pub mangling: Mangling, + #[cfg(feature = "coff")] + stub_symbols: HashMap, /// Mach-O "_tlv_bootstrap" symbol. + #[cfg(feature = "macho")] tlv_bootstrap: Option, + /// Mach-O CPU subtype. + #[cfg(feature = "macho")] + macho_cpu_subtype: Option, #[cfg(feature = "macho")] macho_build_version: Option, + /// Mach-O MH_SUBSECTIONS_VIA_SYMBOLS flag. Only ever set if format is Mach-O. + #[cfg(feature = "macho")] + macho_subsections_via_symbols: bool, } impl<'a> Object<'a> { @@ -85,18 +101,25 @@ impl<'a> Object<'a> { Object { format, architecture, + sub_architecture: None, endian, sections: Vec::new(), standard_sections: HashMap::new(), symbols: Vec::new(), symbol_map: HashMap::new(), - stub_symbols: HashMap::new(), comdats: Vec::new(), flags: FileFlags::None, mangling: Mangling::default(format, architecture), + #[cfg(feature = "coff")] + stub_symbols: HashMap::new(), + #[cfg(feature = "macho")] tlv_bootstrap: None, #[cfg(feature = "macho")] + macho_cpu_subtype: None, + #[cfg(feature = "macho")] macho_build_version: None, + #[cfg(feature = "macho")] + macho_subsections_via_symbols: false, } } @@ -112,6 +135,17 @@ impl<'a> Object<'a> { self.architecture } + /// Return the sub-architecture. + #[inline] + pub fn sub_architecture(&self) -> Option { + self.sub_architecture + } + + /// Specify the sub-architecture. + pub fn set_sub_architecture(&mut self, sub_architecture: Option) { + self.sub_architecture = sub_architecture; + } + /// Return the current mangling setting. #[inline] pub fn mangling(&self) -> Mangling { @@ -155,6 +189,7 @@ impl<'a> Object<'a> { /// Set the data for an existing section. /// /// Must not be called for sections that already have data, or that contain uninitialized data. + /// `align` must be a power of two. pub fn set_section_data(&mut self, section: SectionId, data: T, align: u64) where T: Into>, @@ -163,11 +198,17 @@ impl<'a> Object<'a> { } /// Append data to an existing section. Returns the section offset of the data. + /// + /// Must not be called for sections that contain uninitialized data. + /// `align` must be a power of two. pub fn append_section_data(&mut self, section: SectionId, data: &[u8], align: u64) -> u64 { self.sections[section.0].append_data(data, align) } /// Append zero-initialized data to an existing section. Returns the section offset of the data. + /// + /// Must not be called for sections that contain initialized data. + /// `align` must be a power of two. pub fn append_section_bss(&mut self, section: SectionId, size: u64, align: u64) -> u64 { self.sections[section.0].append_bss(size, align) } @@ -236,39 +277,35 @@ impl<'a> Object<'a> { } /// Add a subsection. Returns the `SectionId` and section offset of the data. - pub fn add_subsection( - &mut self, - section: StandardSection, - name: &[u8], - data: &[u8], - align: u64, - ) -> (SectionId, u64) { - let section_id = if self.has_subsections_via_symbols() { - self.set_subsections_via_symbols(); + /// + /// For Mach-O, this does not create a subsection, and instead uses the + /// section from [`Self::section_id`]. Use [`Self::set_subsections_via_symbols`] + /// to enable subsections via symbols. + pub fn add_subsection(&mut self, section: StandardSection, name: &[u8]) -> SectionId { + if self.has_subsections_via_symbols() { self.section_id(section) } else { let (segment, name, kind, flags) = self.subsection_info(section, name); let id = self.add_section(segment.to_vec(), name, kind); self.section_mut(id).flags = flags; id - }; - let offset = self.append_section_data(section_id, data, align); - (section_id, offset) - } - - fn has_subsections_via_symbols(&self) -> bool { - match self.format { - BinaryFormat::Coff | BinaryFormat::Elf | BinaryFormat::Xcoff => false, - BinaryFormat::MachO => true, - _ => unimplemented!(), } } - fn set_subsections_via_symbols(&mut self) { - match self.format { - #[cfg(feature = "macho")] - BinaryFormat::MachO => self.macho_set_subsections_via_symbols(), - _ => unimplemented!(), + fn has_subsections_via_symbols(&self) -> bool { + self.format == BinaryFormat::MachO + } + + /// Enable subsections via symbols if supported. + /// + /// This should be called before adding any subsections or symbols. + /// + /// For Mach-O, this sets the `MH_SUBSECTIONS_VIA_SYMBOLS` flag. + /// For other formats, this does nothing. + pub fn set_subsections_via_symbols(&mut self) { + #[cfg(feature = "macho")] + if self.format == BinaryFormat::MachO { + self.macho_subsections_via_symbols = true; } } @@ -381,6 +418,8 @@ impl<'a> Object<'a> { /// Add a new common symbol and return its `SymbolId`. /// /// For Mach-O, this appends the symbol to the `__common` section. + /// + /// `align` must be a power of two. pub fn add_common_symbol(&mut self, mut symbol: Symbol, size: u64, align: u64) -> SymbolId { if self.has_common() { let symbol_id = self.add_symbol(symbol); @@ -439,14 +478,24 @@ impl<'a> Object<'a> { /// For Mach-O, this also creates a `__thread_vars` entry for TLS symbols, and the /// symbol will indirectly point to the added data via the `__thread_vars` entry. /// + /// For Mach-O, if [`Self::set_subsections_via_symbols`] is enabled, this will + /// automatically ensure the data size is at least 1. + /// /// Returns the section offset of the data. + /// + /// Must not be called for sections that contain uninitialized data. + /// `align` must be a power of two. pub fn add_symbol_data( &mut self, symbol_id: SymbolId, section: SectionId, - data: &[u8], + mut data: &[u8], align: u64, ) -> u64 { + #[cfg(feature = "macho")] + if data.is_empty() && self.macho_subsections_via_symbols { + data = &[0]; + } let offset = self.append_section_data(section, data, align); self.set_symbol_data(symbol_id, section, offset, data.len() as u64); offset @@ -457,14 +506,24 @@ impl<'a> Object<'a> { /// For Mach-O, this also creates a `__thread_vars` entry for TLS symbols, and the /// symbol will indirectly point to the added data via the `__thread_vars` entry. /// + /// For Mach-O, if [`Self::set_subsections_via_symbols`] is enabled, this will + /// automatically ensure the data size is at least 1. + /// /// Returns the section offset of the data. + /// + /// Must not be called for sections that contain initialized data. + /// `align` must be a power of two. pub fn add_symbol_bss( &mut self, symbol_id: SymbolId, section: SectionId, - size: u64, + mut size: u64, align: u64, ) -> u64 { + #[cfg(feature = "macho")] + if size == 0 && self.macho_subsections_via_symbols { + size = 1; + } let offset = self.append_section_bss(section, size, align); self.set_symbol_data(symbol_id, section, offset, size); offset @@ -514,19 +573,31 @@ impl<'a> Object<'a> { /// Relocations must only be added after the referenced symbols have been added /// and defined (if applicable). pub fn add_relocation(&mut self, section: SectionId, mut relocation: Relocation) -> Result<()> { - let addend = match self.format { + match self.format { #[cfg(feature = "coff")] - BinaryFormat::Coff => self.coff_fixup_relocation(&mut relocation), + BinaryFormat::Coff => self.coff_translate_relocation(&mut relocation)?, #[cfg(feature = "elf")] - BinaryFormat::Elf => self.elf_fixup_relocation(&mut relocation)?, + BinaryFormat::Elf => self.elf_translate_relocation(&mut relocation)?, #[cfg(feature = "macho")] - BinaryFormat::MachO => self.macho_fixup_relocation(&mut relocation), + BinaryFormat::MachO => self.macho_translate_relocation(&mut relocation)?, #[cfg(feature = "xcoff")] - BinaryFormat::Xcoff => self.xcoff_fixup_relocation(&mut relocation), + BinaryFormat::Xcoff => self.xcoff_translate_relocation(&mut relocation)?, + _ => unimplemented!(), + } + let implicit = match self.format { + #[cfg(feature = "coff")] + BinaryFormat::Coff => self.coff_adjust_addend(&mut relocation)?, + #[cfg(feature = "elf")] + BinaryFormat::Elf => self.elf_adjust_addend(&mut relocation)?, + #[cfg(feature = "macho")] + BinaryFormat::MachO => self.macho_adjust_addend(&mut relocation)?, + #[cfg(feature = "xcoff")] + BinaryFormat::Xcoff => self.xcoff_adjust_addend(&mut relocation)?, _ => unimplemented!(), }; - if addend != 0 { - self.write_relocation_addend(section, &relocation, addend)?; + if implicit && relocation.addend != 0 { + self.write_relocation_addend(section, &relocation)?; + relocation.addend = 0; } self.sections[section.0].relocations.push(relocation); Ok(()) @@ -536,13 +607,23 @@ impl<'a> Object<'a> { &mut self, section: SectionId, relocation: &Relocation, - addend: i64, ) -> Result<()> { + let size = match self.format { + #[cfg(feature = "coff")] + BinaryFormat::Coff => self.coff_relocation_size(relocation)?, + #[cfg(feature = "elf")] + BinaryFormat::Elf => self.elf_relocation_size(relocation)?, + #[cfg(feature = "macho")] + BinaryFormat::MachO => self.macho_relocation_size(relocation)?, + #[cfg(feature = "xcoff")] + BinaryFormat::Xcoff => self.xcoff_relocation_size(relocation)?, + _ => unimplemented!(), + }; let data = self.sections[section.0].data_mut(); let offset = relocation.offset as usize; - match relocation.size { - 32 => data.write_at(offset, &U32::new(self.endian, addend as u32)), - 64 => data.write_at(offset, &U64::new(self.endian, addend as u64)), + match size { + 32 => data.write_at(offset, &U32::new(self.endian, relocation.addend as u32)), + 64 => data.write_at(offset, &U64::new(self.endian, relocation.addend as u64)), _ => { return Err(Error(format!( "unimplemented relocation addend {:?}", @@ -554,7 +635,7 @@ impl<'a> Object<'a> { Error(format!( "invalid relocation offset {}+{} (max {})", relocation.offset, - relocation.size, + size, data.len() )) }) @@ -707,6 +788,7 @@ impl<'a> Section<'a> { /// Set the data for a section. /// /// Must not be called for sections that already have data, or that contain uninitialized data. + /// `align` must be a power of two. pub fn set_data(&mut self, data: T, align: u64) where T: Into>, @@ -722,6 +804,7 @@ impl<'a> Section<'a> { /// Append data to a section. /// /// Must not be called for sections that contain uninitialized data. + /// `align` must be a power of two. pub fn append_data(&mut self, append_data: &[u8], align: u64) -> u64 { debug_assert!(!self.is_bss()); debug_assert_eq!(align & (align - 1), 0); @@ -743,6 +826,7 @@ impl<'a> Section<'a> { /// Append uninitialized data to a section. /// /// Must not be called for sections that contain initialized data. + /// `align` must be a power of two. pub fn append_bss(&mut self, size: u64, align: u64) -> u64 { debug_assert!(self.is_bss()); debug_assert_eq!(align & (align - 1), 0); @@ -865,12 +949,6 @@ impl Symbol { pub struct Relocation { /// The section offset of the place of the relocation. pub offset: u64, - /// The size in bits of the place of relocation. - pub size: u8, - /// The operation used to calculate the result of the relocation. - pub kind: RelocationKind, - /// Information about how the result of the relocation operation is encoded in the place. - pub encoding: RelocationEncoding, /// The symbol referred to by the relocation. /// /// This may be a section symbol. @@ -879,6 +957,8 @@ pub struct Relocation { /// /// This may be in addition to an implicit addend stored at the place of the relocation. pub addend: i64, + /// The fields that define the relocation type. + pub flags: RelocationFlags, } /// An identifier used to reference a COMDAT section group. diff --git a/third_party/rust/object/src/write/pe.rs b/third_party/rust/object/src/write/pe.rs index 70da3a093711..03f79fa22307 100644 --- a/third_party/rust/object/src/write/pe.rs +++ b/third_party/rust/object/src/write/pe.rs @@ -47,6 +47,8 @@ pub struct Writer<'a> { impl<'a> Writer<'a> { /// Create a new `Writer`. + /// + /// The alignment values must be powers of two. pub fn new( is_64: bool, section_alignment: u32, diff --git a/third_party/rust/object/src/write/string.rs b/third_party/rust/object/src/write/string.rs index b23274a0af52..2864da136bd2 100644 --- a/third_party/rust/object/src/write/string.rs +++ b/third_party/rust/object/src/write/string.rs @@ -1,8 +1,8 @@ use alloc::vec::Vec; -#[cfg(feature = "std")] +#[cfg(feature = "write_std")] type IndexSet = indexmap::IndexSet; -#[cfg(not(feature = "std"))] +#[cfg(not(feature = "write_std"))] type IndexSet = indexmap::IndexSet; /// An identifier for an entry in a string table. @@ -30,6 +30,7 @@ impl<'a> StringTable<'a> { /// Return the id of the given string. /// /// Panics if the string is not in the string table. + #[allow(dead_code)] pub fn get_id(&self, string: &[u8]) -> StringId { let id = self.strings.get_index_of(string).unwrap(); StringId(id) @@ -38,6 +39,7 @@ impl<'a> StringTable<'a> { /// Return the string for the given id. /// /// Panics if the string is not in the string table. + #[allow(dead_code)] pub fn get_string(&self, id: StringId) -> &'a [u8] { self.strings.get_index(id.0).unwrap() } @@ -56,6 +58,8 @@ impl<'a> StringTable<'a> { /// `base` is the initial string table offset. For example, /// this should be 1 for ELF, to account for the initial /// null byte (which must have been written by the caller). + /// + /// Panics if the string table has already been written. pub fn write(&mut self, base: usize, w: &mut Vec) { assert!(self.offsets.is_empty()); @@ -78,6 +82,29 @@ impl<'a> StringTable<'a> { } } } + + /// Calculate the size in bytes of the string table. + /// + /// `base` is the initial string table offset. For example, + /// this should be 1 for ELF, to account for the initial + /// null byte. + #[allow(dead_code)] + pub fn size(&self, base: usize) -> usize { + // TODO: cache this result? + let mut ids: Vec<_> = (0..self.strings.len()).collect(); + sort(&mut ids, 1, &self.strings); + + let mut size = base; + let mut previous = &[][..]; + for id in ids { + let string = self.strings.get_index(id).unwrap(); + if !previous.ends_with(string) { + size += string.len() + 1; + previous = string; + } + } + size + } } // Multi-key quicksort. diff --git a/third_party/rust/object/src/write/util.rs b/third_party/rust/object/src/write/util.rs index b05b14d92785..56ed9947c1b5 100644 --- a/third_party/rust/object/src/write/util.rs +++ b/third_party/rust/object/src/write/util.rs @@ -167,6 +167,7 @@ impl<'a> BytesMut for &'a mut [u8] { /// Write an unsigned number using the LEB128 encoding to a buffer. /// /// Returns the number of bytes written. +#[allow(dead_code)] pub(crate) fn write_uleb128(buf: &mut Vec, mut val: u64) -> usize { let mut len = 0; loop { @@ -190,7 +191,7 @@ pub(crate) fn write_uleb128(buf: &mut Vec, mut val: u64) -> usize { /// /// Returns the number of bytes written. #[allow(dead_code)] -pub(crate) fn write_sleb128(buf: &mut Vec, mut val: i64) -> usize { +pub(crate) fn write_sleb128(buf: &mut Vec, mut val: i64) -> usize { let mut len = 0; loop { let mut byte = val as u8; @@ -255,6 +256,6 @@ mod tests { assert_eq!(bytes.write_at(3, &u16::to_be(0x89ab)), Err(())); assert_eq!(bytes.write_at(4, &u16::to_be(0x89ab)), Err(())); - assert_eq!(vec![].write_at(0, &u32::to_be(0x89ab)), Err(())); + assert_eq!([].write_at(0, &u32::to_be(0x89ab)), Err(())); } } diff --git a/third_party/rust/object/src/write/xcoff.rs b/third_party/rust/object/src/write/xcoff.rs index 6c9a803845af..5aade24d00b4 100644 --- a/third_party/rust/object/src/write/xcoff.rs +++ b/third_party/rust/object/src/write/xcoff.rs @@ -5,7 +5,7 @@ use crate::write::string::*; use crate::write::util::*; use crate::write::*; -use crate::{xcoff, AddressSize}; +use crate::xcoff; #[derive(Default, Clone, Copy)] struct SectionOffsets { @@ -66,13 +66,50 @@ impl<'a> Object<'a> { } } - pub(crate) fn xcoff_fixup_relocation(&mut self, mut relocation: &mut Relocation) -> i64 { - let constant = match relocation.kind { - RelocationKind::Relative => relocation.addend + 4, - _ => relocation.addend, + pub(crate) fn xcoff_translate_relocation(&mut self, reloc: &mut Relocation) -> Result<()> { + let (kind, _encoding, size) = if let RelocationFlags::Generic { + kind, + encoding, + size, + } = reloc.flags + { + (kind, encoding, size) + } else { + return Ok(()); }; - relocation.addend -= constant; - constant + + let r_rtype = match kind { + RelocationKind::Absolute => xcoff::R_POS, + RelocationKind::Relative => xcoff::R_REL, + RelocationKind::Got => xcoff::R_TOC, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }; + let r_rsize = size - 1; + reloc.flags = RelocationFlags::Xcoff { r_rtype, r_rsize }; + Ok(()) + } + + pub(crate) fn xcoff_adjust_addend(&mut self, relocation: &mut Relocation) -> Result { + let r_rtype = if let RelocationFlags::Xcoff { r_rtype, .. } = relocation.flags { + r_rtype + } else { + return Err(Error(format!("invalid relocation flags {:?}", relocation))); + }; + if r_rtype == xcoff::R_REL { + relocation.addend += 4; + } + Ok(true) + } + + pub(crate) fn xcoff_relocation_size(&self, reloc: &Relocation) -> Result { + let r_rsize = if let RelocationFlags::Xcoff { r_rsize, .. } = reloc.flags { + r_rsize + } else { + return Err(Error(format!("unexpected relocation {:?}", reloc))); + }; + Ok(r_rsize + 1) } pub(crate) fn xcoff_write(&self, buffer: &mut dyn WritableBuffer) -> Result<()> { @@ -157,7 +194,6 @@ impl<'a> Object<'a> { n_sclass } else { match symbol.kind { - SymbolKind::Null => xcoff::C_NULL, SymbolKind::File => xcoff::C_FILE, SymbolKind::Text | SymbolKind::Data | SymbolKind::Tls => { if symbol.is_local() { @@ -277,7 +313,7 @@ impl<'a> Object<'a> { SectionKind::Tls => xcoff::STYP_TDATA, SectionKind::UninitializedTls => xcoff::STYP_TBSS, SectionKind::OtherString => xcoff::STYP_INFO, - SectionKind::Debug => xcoff::STYP_DEBUG, + SectionKind::Debug | SectionKind::DebugString => xcoff::STYP_DEBUG, SectionKind::Other | SectionKind::Metadata => 0, SectionKind::Note | SectionKind::Linker @@ -346,30 +382,26 @@ impl<'a> Object<'a> { if !section.relocations.is_empty() { debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); for reloc in §ion.relocations { - let rtype = match reloc.kind { - RelocationKind::Absolute => xcoff::R_POS, - RelocationKind::Relative => xcoff::R_REL, - RelocationKind::Got => xcoff::R_TOC, - RelocationKind::Xcoff(x) => x, - _ => { - return Err(Error(format!("unimplemented relocation {:?}", reloc))); - } - }; + let (r_rtype, r_rsize) = + if let RelocationFlags::Xcoff { r_rtype, r_rsize } = reloc.flags { + (r_rtype, r_rsize) + } else { + return Err(Error("invalid relocation flags".into())); + }; if is_64 { let xcoff_rel = xcoff::Rel64 { r_vaddr: U64::new(BE, reloc.offset), r_symndx: U32::new(BE, symbol_offsets[reloc.symbol.0].index as u32), - // Specifies the bit length of the relocatable reference minus one. - r_rsize: (reloc.size - 1), - r_rtype: rtype, + r_rsize, + r_rtype, }; buffer.write(&xcoff_rel); } else { let xcoff_rel = xcoff::Rel32 { r_vaddr: U32::new(BE, reloc.offset as u32), r_symndx: U32::new(BE, symbol_offsets[reloc.symbol.0].index as u32), - r_rsize: (reloc.size - 1), - r_rtype: rtype, + r_rsize, + r_rtype, }; buffer.write(&xcoff_rel); } diff --git a/third_party/rust/object/src/xcoff.rs b/third_party/rust/object/src/xcoff.rs index 038698926136..ef54c9310960 100644 --- a/third_party/rust/object/src/xcoff.rs +++ b/third_party/rust/object/src/xcoff.rs @@ -331,6 +331,18 @@ pub const STYP_TYPCHK: u16 = 0x4000; /// when either of the counts exceeds 65,534. pub const STYP_OVRFLO: u16 = 0x8000; +pub const SSUBTYP_DWINFO: u32 = 0x10000; +pub const SSUBTYP_DWLINE: u32 = 0x20000; +pub const SSUBTYP_DWPBNMS: u32 = 0x30000; +pub const SSUBTYP_DWPBTYP: u32 = 0x40000; +pub const SSUBTYP_DWARNGE: u32 = 0x50000; +pub const SSUBTYP_DWABREV: u32 = 0x60000; +pub const SSUBTYP_DWSTR: u32 = 0x70000; +pub const SSUBTYP_DWRNGES: u32 = 0x80000; +pub const SSUBTYP_DWLOC: u32 = 0x90000; +pub const SSUBTYP_DWFRAME: u32 = 0xA0000; +pub const SSUBTYP_DWMAC: u32 = 0xB0000; + pub const SIZEOF_SYMBOL: usize = 18; #[derive(Debug, Clone, Copy)] @@ -384,7 +396,7 @@ pub const N_ABS: i16 = -1; /// An undefined external symbol. pub const N_UNDEF: i16 = 0; -// Vlaues for `n_type`. +// Values for `n_type`. // /// Values for visibility as they would appear when encoded in the high 4 bits /// of the 16-bit unsigned n_type field of symbol table entries. Valid for diff --git a/third_party/rust/object/tests/build/elf.rs b/third_party/rust/object/tests/build/elf.rs new file mode 100644 index 000000000000..53979ee1ed05 --- /dev/null +++ b/third_party/rust/object/tests/build/elf.rs @@ -0,0 +1,254 @@ +use object::{build, elf}; + +// Test that offset 0 is supported for SHT_NOBITS sections. +#[test] +fn test_nobits_offset() { + let mut builder = build::elf::Builder::new(object::Endianness::Little, true); + builder.header.e_type = elf::ET_EXEC; + builder.header.e_phoff = 0x40; + + let section = builder.sections.add(); + section.name = b".shstrtab"[..].into(); + section.sh_type = elf::SHT_STRTAB; + section.data = build::elf::SectionData::SectionString; + + let section = builder.sections.add(); + section.name = b".bss"[..].into(); + section.sh_type = elf::SHT_NOBITS; + section.sh_flags = (elf::SHF_ALLOC | elf::SHF_WRITE) as u64; + section.sh_addr = 0x1000; + section.sh_offset = 0; + section.sh_size = 0x1000; + section.sh_addralign = 16; + section.data = build::elf::SectionData::UninitializedData(0x1000); + let section_id = section.id(); + + let segment = builder.segments.add(); + segment.p_type = elf::PT_LOAD; + segment.p_flags = elf::PF_R | elf::PF_W; + segment.p_offset = 0x1000; + segment.p_vaddr = 0x1000; + segment.p_paddr = 0x1000; + segment.p_filesz = 0; + segment.p_memsz = 0x1000; + segment.p_align = 16; + segment.sections.push(section_id); + + let mut buf = Vec::new(); + builder.write(&mut buf).unwrap(); +} + +// Test that we can read and write a file with no dynamic string table. +#[test] +fn test_no_dynstr() { + let mut builder = build::elf::Builder::new(object::Endianness::Little, true); + builder.header.e_type = elf::ET_EXEC; + builder.header.e_machine = elf::EM_X86_64; + builder.header.e_phoff = 0x40; + + let section = builder.sections.add(); + section.name = b".shstrtab"[..].into(); + section.sh_type = elf::SHT_STRTAB; + section.data = build::elf::SectionData::SectionString; + + let section = builder.sections.add(); + section.name = b".dynsym"[..].into(); + section.sh_type = elf::SHT_DYNSYM; + section.sh_flags = elf::SHF_ALLOC as u64; + section.sh_addralign = 8; + section.data = build::elf::SectionData::DynamicSymbol; + let dynsym_id = section.id(); + + let section = builder.sections.add(); + section.name = b".rela.dyn"[..].into(); + section.sh_type = elf::SHT_RELA; + section.sh_flags = elf::SHF_ALLOC as u64; + section.sh_addralign = 8; + section.data = + build::elf::SectionData::DynamicRelocation(vec![build::elf::DynamicRelocation { + r_offset: 0x1000, + symbol: None, + r_type: elf::R_X86_64_64, + r_addend: 0x300, + }]); + let rela_id = section.id(); + + builder.set_section_sizes(); + + let segment = builder.segments.add(); + segment.p_type = elf::PT_LOAD; + segment.p_flags = elf::PF_R; + segment.p_filesz = 0x1000; + segment.p_memsz = 0x1000; + segment.p_align = 8; + segment.append_section(builder.sections.get_mut(dynsym_id)); + segment.append_section(builder.sections.get_mut(rela_id)); + + let mut buf = Vec::new(); + builder.write(&mut buf).unwrap(); + + let builder = build::elf::Builder::read(&*buf).unwrap(); + assert_eq!(builder.sections.count(), 3); + assert_eq!(builder.segments.count(), 1); + for section in &builder.sections { + match §ion.data { + build::elf::SectionData::DynamicSymbol => { + assert_eq!(section.sh_offset, 0x1000); + } + build::elf::SectionData::DynamicRelocation(rela) => { + assert_eq!(section.sh_offset, 0x1018); + assert_eq!(rela.len(), 1); + } + _ => {} + } + } +} + +#[test] +fn test_attribute() { + let mut builder = build::elf::Builder::new(object::Endianness::Little, true); + builder.header.e_type = elf::ET_EXEC; + builder.header.e_machine = elf::EM_X86_64; + builder.header.e_phoff = 0x40; + + let section = builder.sections.add(); + section.name = b".shstrtab"[..].into(); + section.sh_type = elf::SHT_STRTAB; + section.data = build::elf::SectionData::SectionString; + + let attributes = build::elf::AttributesSection { + subsections: vec![build::elf::AttributesSubsection { + vendor: b"GNU"[..].into(), + subsubsections: vec![ + (build::elf::AttributesSubsubsection { + tag: build::elf::AttributeTag::File, + data: b"123"[..].into(), + }), + ], + }], + }; + let section = builder.sections.add(); + section.name = b".gnu.attributes"[..].into(); + section.sh_type = elf::SHT_GNU_ATTRIBUTES; + section.sh_addralign = 8; + section.data = build::elf::SectionData::Attributes(attributes); + + let mut buf = Vec::new(); + builder.write(&mut buf).unwrap(); + + let builder = build::elf::Builder::read(&*buf).unwrap(); + assert_eq!(builder.sections.count(), 2); + for section in &builder.sections { + if let build::elf::SectionData::Attributes(attributes) = §ion.data { + assert_eq!(attributes.subsections.len(), 1); + assert_eq!(attributes.subsections[0].vendor.as_slice(), b"GNU"); + assert_eq!(attributes.subsections[0].subsubsections.len(), 1); + assert_eq!( + attributes.subsections[0].subsubsections[0].tag, + build::elf::AttributeTag::File + ); + assert_eq!( + attributes.subsections[0].subsubsections[0].data.as_slice(), + b"123" + ); + } + } +} + +#[test] +fn test_dynsym() { + let mut builder = build::elf::Builder::new(object::Endianness::Little, true); + builder.header.e_type = elf::ET_EXEC; + builder.header.e_machine = elf::EM_X86_64; + builder.header.e_phoff = 0x40; + + let section = builder.sections.add(); + section.name = b".shstrtab"[..].into(); + section.sh_type = elf::SHT_STRTAB; + section.data = build::elf::SectionData::SectionString; + + let section = builder.sections.add(); + section.name = b".text"[..].into(); + section.sh_type = elf::SHT_PROGBITS; + section.sh_flags = (elf::SHF_ALLOC | elf::SHF_EXECINSTR) as u64; + section.sh_addralign = 16; + section.data = build::elf::SectionData::Data(vec![0xcc; 100].into()); + let text_id = section.id(); + + let section = builder.sections.add(); + section.name = b".dynsym"[..].into(); + section.sh_type = elf::SHT_DYNSYM; + section.sh_flags = elf::SHF_ALLOC as u64; + section.sh_addralign = 8; + section.data = build::elf::SectionData::DynamicSymbol; + let dynsym_id = section.id(); + + let section = builder.sections.add(); + section.name = b".dynstr"[..].into(); + section.sh_type = elf::SHT_STRTAB; + section.sh_flags = elf::SHF_ALLOC as u64; + section.sh_addralign = 1; + section.data = build::elf::SectionData::DynamicString; + let dynstr_id = section.id(); + + let section = builder.sections.add(); + section.name = b".gnu.hash"[..].into(); + section.sh_type = elf::SHT_GNU_HASH; + section.sh_flags = elf::SHF_ALLOC as u64; + section.sh_addralign = 8; + section.data = build::elf::SectionData::GnuHash; + let gnu_hash_id = section.id(); + builder.gnu_hash_bloom_shift = 1; + builder.gnu_hash_bloom_count = 1; + builder.gnu_hash_bucket_count = 1; + + let symbol = builder.dynamic_symbols.add(); + symbol.name = b"global"[..].into(); + symbol.set_st_info(elf::STB_GLOBAL, elf::STT_FUNC); + symbol.section = Some(text_id); + + let symbol = builder.dynamic_symbols.add(); + symbol.name = b"undefined"[..].into(); + symbol.set_st_info(elf::STB_GLOBAL, elf::STT_NOTYPE); + + let symbol = builder.dynamic_symbols.add(); + symbol.name = b"local"[..].into(); + symbol.set_st_info(elf::STB_LOCAL, elf::STT_FUNC); + symbol.section = Some(text_id); + + builder.set_section_sizes(); + + let segment = builder.segments.add(); + segment.p_type = elf::PT_LOAD; + segment.p_flags = elf::PF_R; + segment.p_filesz = 0x1000; + segment.p_memsz = 0x1000; + segment.p_align = 8; + segment.append_section(builder.sections.get_mut(text_id)); + segment.append_section(builder.sections.get_mut(dynsym_id)); + segment.append_section(builder.sections.get_mut(dynstr_id)); + segment.append_section(builder.sections.get_mut(gnu_hash_id)); + + let mut buf = Vec::new(); + builder.write(&mut buf).unwrap(); + + let builder = build::elf::Builder::read(&*buf).unwrap(); + assert_eq!(builder.sections.count(), 5); + assert_eq!(builder.dynamic_symbols.count(), 3); + // Check that the dynamic symbol table sorting handles + // local and undefined symbols correctly. + assert_eq!( + builder + .dynamic_symbols + .iter() + .map(|s| s.name.as_slice()) + .collect::>(), + vec![&b"local"[..], &b"undefined"[..], &b"global"[..]] + ); + for section in &builder.sections { + if let build::elf::SectionData::DynamicSymbol = §ion.data { + // Check that sh_info includes the number of local symbols. + assert_eq!(section.sh_info, 2); + } + } +} diff --git a/third_party/rust/object/tests/build/mod.rs b/third_party/rust/object/tests/build/mod.rs new file mode 100644 index 000000000000..6a777367b3de --- /dev/null +++ b/third_party/rust/object/tests/build/mod.rs @@ -0,0 +1,3 @@ +#![cfg(feature = "build")] + +mod elf; diff --git a/third_party/rust/object/tests/integration.rs b/third_party/rust/object/tests/integration.rs index 6ebcb547f81f..560ba6269379 100644 --- a/third_party/rust/object/tests/integration.rs +++ b/third_party/rust/object/tests/integration.rs @@ -1,2 +1,3 @@ +mod build; mod read; mod round_trip; diff --git a/third_party/rust/object/tests/read/coff.rs b/third_party/rust/object/tests/read/coff.rs index dcf3b3c6aa1e..959e317a9222 100644 --- a/third_party/rust/object/tests/read/coff.rs +++ b/third_party/rust/object/tests/read/coff.rs @@ -6,7 +6,7 @@ use std::path::PathBuf; #[test] fn coff_extended_relocations() { let path_to_obj: PathBuf = ["testfiles", "coff", "relocs_overflow.o"].iter().collect(); - let contents = fs::read(&path_to_obj).expect("Could not read relocs_overflow.o"); + let contents = fs::read(path_to_obj).expect("Could not read relocs_overflow.o"); let file = read::coff::CoffFile::<_>::parse(&contents[..]).expect("Could not parse relocs_overflow.o"); let code_section = file diff --git a/third_party/rust/object/tests/read/elf.rs b/third_party/rust/object/tests/read/elf.rs new file mode 100644 index 000000000000..e42cd516138b --- /dev/null +++ b/third_party/rust/object/tests/read/elf.rs @@ -0,0 +1,47 @@ +#[cfg(feature = "std")] +use std::path::{Path, PathBuf}; + +#[cfg(feature = "std")] +fn get_buildid(path: &Path) -> Result>, object::read::Error> { + use object::Object; + let file = std::fs::File::open(path).unwrap(); + let reader = object::read::ReadCache::new(file); + let object = object::read::File::parse(&reader)?; + object + .build_id() + .map(|option| option.map(ToOwned::to_owned)) +} + +#[cfg(feature = "std")] +#[test] +/// Regression test: used to attempt to allocate 5644418395173552131 bytes +fn get_buildid_bad_elf() { + let path: PathBuf = [ + "testfiles", + "elf", + "yara-fuzzing", + "crash-7dc27920ae1cb85333e7f2735a45014488134673", + ] + .iter() + .collect(); + let _ = get_buildid(&path); +} + +#[cfg(feature = "std")] +#[test] +fn get_buildid_less_bad_elf() { + let path: PathBuf = [ + "testfiles", + "elf", + "yara-fuzzing", + "crash-f1fd008da535b110853885221ebfaac3f262a1c1e280f10929f7b353c44996c8", + ] + .iter() + .collect(); + let buildid = get_buildid(&path).unwrap().unwrap(); + // ground truth obtained from GNU binutils's readelf + assert_eq!( + buildid, + b"\xf9\xc0\xc6\x05\xd3\x76\xbb\xa5\x7e\x02\xf5\x74\x50\x9d\x16\xcc\xe9\x9c\x1b\xf1" + ); +} diff --git a/third_party/rust/object/tests/read/macho.rs b/third_party/rust/object/tests/read/macho.rs new file mode 100644 index 000000000000..59f314be6f0a --- /dev/null +++ b/third_party/rust/object/tests/read/macho.rs @@ -0,0 +1,49 @@ +#[cfg(feature = "std")] +use object::{Object, ObjectSection as _}; + +// Test that we can read compressed sections in Mach-O files as produced +// by the Go compiler. +#[cfg(feature = "std")] +#[test] +fn test_go_macho() { + let macho_testfiles = std::path::Path::new("testfiles/macho"); + + // Section names we expect to find, whether they should be + // compressed, and the actual name of the section in the file. + const EXPECTED: &[(&str, bool, &str)] = &[ + (".debug_abbrev", true, "__zdebug_abbrev"), + (".debug_gdb_scripts", false, "__debug_gdb_scri"), + (".debug_ranges", true, "__zdebug_ranges"), + ("__data", false, "__data"), + ]; + + for file in &["go-aarch64", "go-x86_64"] { + let path = macho_testfiles.join(file); + let file = std::fs::File::open(path).unwrap(); + let reader = object::read::ReadCache::new(file); + let object = object::read::File::parse(&reader).unwrap(); + for &(name, compressed, actual_name) in EXPECTED { + let section = object.section_by_name(name).unwrap(); + assert_eq!(section.name(), Ok(actual_name)); + let compressed_file_range = section.compressed_file_range().unwrap(); + let size = section.size(); + if compressed { + assert_eq!( + compressed_file_range.format, + object::CompressionFormat::Zlib + ); + assert_eq!(compressed_file_range.compressed_size, size - 12); + assert!( + compressed_file_range.uncompressed_size > compressed_file_range.compressed_size, + "decompressed size is greater than compressed size" + ); + } else { + assert_eq!( + compressed_file_range.format, + object::CompressionFormat::None + ); + assert_eq!(compressed_file_range.compressed_size, size); + } + } + } +} diff --git a/third_party/rust/object/tests/read/mod.rs b/third_party/rust/object/tests/read/mod.rs index d60d1933b7c4..48e005ee9aea 100644 --- a/third_party/rust/object/tests/read/mod.rs +++ b/third_party/rust/object/tests/read/mod.rs @@ -1,3 +1,5 @@ #![cfg(feature = "read")] mod coff; +mod elf; +mod macho; diff --git a/third_party/rust/object/tests/round_trip/bss.rs b/third_party/rust/object/tests/round_trip/bss.rs index 1354fcc78174..02fca4ccb035 100644 --- a/third_party/rust/object/tests/round_trip/bss.rs +++ b/third_party/rust/object/tests/round_trip/bss.rs @@ -66,8 +66,8 @@ fn coff_x86_64_bss() { assert_eq!(symbol.kind(), SymbolKind::Data); assert_eq!(symbol.section_index(), Some(bss_index)); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.address(), 0); let symbol = symbols.next().unwrap(); @@ -76,8 +76,8 @@ fn coff_x86_64_bss() { assert_eq!(symbol.kind(), SymbolKind::Data); assert_eq!(symbol.section_index(), Some(bss_index)); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.address(), 24); let symbol = symbols.next(); @@ -125,13 +125,6 @@ fn elf_x86_64_bss() { let mut sections = object.sections(); - let section = sections.next().unwrap(); - println!("{:?}", section); - assert_eq!(section.name(), Ok("")); - assert_eq!(section.kind(), SectionKind::Metadata); - assert_eq!(section.address(), 0); - assert_eq!(section.size(), 0); - let bss = sections.next().unwrap(); println!("{:?}", bss); let bss_index = bss.index(); @@ -142,18 +135,14 @@ fn elf_x86_64_bss() { let mut symbols = object.symbols(); - let symbol = symbols.next().unwrap(); - println!("{:?}", symbol); - assert_eq!(symbol.name(), Ok("")); - let symbol = symbols.next().unwrap(); println!("{:?}", symbol); assert_eq!(symbol.name(), Ok("v1")); assert_eq!(symbol.kind(), SymbolKind::Data); assert_eq!(symbol.section_index(), Some(bss_index)); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.address(), 0); assert_eq!(symbol.size(), 18); @@ -163,8 +152,8 @@ fn elf_x86_64_bss() { assert_eq!(symbol.kind(), SymbolKind::Data); assert_eq!(symbol.section_index(), Some(bss_index)); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.address(), 24); assert_eq!(symbol.size(), 34); @@ -236,8 +225,8 @@ fn macho_x86_64_bss() { assert_eq!(symbol.kind(), SymbolKind::Data); assert_eq!(symbol.section_index(), Some(bss_index)); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.address(), 0); let symbol = symbols.next().unwrap(); @@ -246,8 +235,8 @@ fn macho_x86_64_bss() { assert_eq!(symbol.kind(), SymbolKind::Data); assert_eq!(symbol.section_index(), Some(bss_index)); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.address(), 24); let symbol = symbols.next(); diff --git a/third_party/rust/object/tests/round_trip/coff.rs b/third_party/rust/object/tests/round_trip/coff.rs index 6785dc367e56..2b55788ad27c 100644 --- a/third_party/rust/object/tests/round_trip/coff.rs +++ b/third_party/rust/object/tests/round_trip/coff.rs @@ -1,8 +1,8 @@ use object::read::{Object, ObjectSection}; use object::{read, write}; use object::{ - Architecture, BinaryFormat, Endianness, RelocationEncoding, RelocationKind, SymbolFlags, - SymbolKind, SymbolScope, + Architecture, BinaryFormat, Endianness, RelocationEncoding, RelocationFlags, RelocationKind, + SymbolFlags, SymbolKind, SymbolScope, }; #[test] @@ -27,11 +27,13 @@ fn reloc_overflow() { text, write::Relocation { offset: i, - size: 64, - kind: RelocationKind::Absolute, - encoding: RelocationEncoding::Generic, symbol, addend: 0, + flags: RelocationFlags::Generic { + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + size: 64, + }, }, ) .unwrap(); diff --git a/third_party/rust/object/tests/round_trip/comdat.rs b/third_party/rust/object/tests/round_trip/comdat.rs index 7b697a04c031..88a2e79dc8b2 100644 --- a/third_party/rust/object/tests/round_trip/comdat.rs +++ b/third_party/rust/object/tests/round_trip/comdat.rs @@ -13,11 +13,11 @@ fn coff_x86_64_comdat() { let mut object = write::Object::new(BinaryFormat::Coff, Architecture::X86_64, Endianness::Little); - let (section1, offset) = - object.add_subsection(write::StandardSection::Text, b"s1", &[0, 1, 2, 3], 4); + let section1 = object.add_subsection(write::StandardSection::Text, b"s1"); + let offset = object.append_section_data(section1, &[0, 1, 2, 3], 4); object.section_symbol(section1); - let (section2, _) = - object.add_subsection(write::StandardSection::Data, b"s1", &[0, 1, 2, 3], 4); + let section2 = object.add_subsection(write::StandardSection::Data, b"s1"); + object.append_section_data(section2, &[0, 1, 2, 3], 4); object.section_symbol(section2); let symbol = object.add_symbol(write::Symbol { @@ -107,8 +107,8 @@ fn coff_x86_64_comdat() { read::SymbolSection::Section(section1.index()) ); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.address(), 0); let symbol = symbols.next(); @@ -132,10 +132,10 @@ fn elf_x86_64_comdat() { let mut object = write::Object::new(BinaryFormat::Elf, Architecture::X86_64, Endianness::Little); - let (section1, offset) = - object.add_subsection(write::StandardSection::Text, b"s1", &[0, 1, 2, 3], 4); - let (section2, _) = - object.add_subsection(write::StandardSection::Data, b"s1", &[0, 1, 2, 3], 4); + let section1 = object.add_subsection(write::StandardSection::Text, b"s1"); + let offset = object.append_section_data(section1, &[0, 1, 2, 3], 4); + let section2 = object.add_subsection(write::StandardSection::Data, b"s1"); + object.append_section_data(section2, &[0, 1, 2, 3], 4); let symbol = object.add_symbol(write::Symbol { name: b"s1".to_vec(), @@ -164,10 +164,6 @@ fn elf_x86_64_comdat() { let mut sections = object.sections(); - let section = sections.next().unwrap(); - println!("{:?}", section); - assert_eq!(section.name(), Ok("")); - let section = sections.next().unwrap(); println!("{:?}", section); assert_eq!(section.name(), Ok(".group")); @@ -190,10 +186,6 @@ fn elf_x86_64_comdat() { let mut symbols = object.symbols(); - let symbol = symbols.next().unwrap(); - println!("{:?}", symbol); - assert_eq!(symbol.name(), Ok("")); - let symbol = symbols.next().unwrap(); let symbol_index = symbol.index(); println!("{:?}", symbol); @@ -204,8 +196,8 @@ fn elf_x86_64_comdat() { read::SymbolSection::Section(section1.index()) ); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.address(), 0); let symbol = symbols.next(); diff --git a/third_party/rust/object/tests/round_trip/common.rs b/third_party/rust/object/tests/round_trip/common.rs index 74d443830f62..051688beffbc 100644 --- a/third_party/rust/object/tests/round_trip/common.rs +++ b/third_party/rust/object/tests/round_trip/common.rs @@ -64,8 +64,8 @@ fn coff_x86_64_common() { assert_eq!(symbol.kind(), SymbolKind::Data); assert_eq!(symbol.section(), read::SymbolSection::Common); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.address(), 0); assert_eq!(symbol.size(), 4); @@ -75,8 +75,8 @@ fn coff_x86_64_common() { assert_eq!(symbol.kind(), SymbolKind::Data); assert_eq!(symbol.section(), read::SymbolSection::Common); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.address(), 0); assert_eq!(symbol.size(), 8); @@ -86,8 +86,8 @@ fn coff_x86_64_common() { assert_eq!(symbol.kind(), SymbolKind::Data); assert_eq!(symbol.section(), read::SymbolSection::Undefined); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), true); + assert!(!symbol.is_weak()); + assert!(symbol.is_undefined()); assert_eq!(symbol.address(), 0); assert_eq!(symbol.size(), 0); @@ -134,18 +134,14 @@ fn elf_x86_64_common() { let mut symbols = object.symbols(); - let symbol = symbols.next().unwrap(); - println!("{:?}", symbol); - assert_eq!(symbol.name(), Ok("")); - let symbol = symbols.next().unwrap(); println!("{:?}", symbol); assert_eq!(symbol.name(), Ok("v1")); assert_eq!(symbol.kind(), SymbolKind::Data); assert_eq!(symbol.section(), read::SymbolSection::Common); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.address(), 0); assert_eq!(symbol.size(), 4); @@ -155,8 +151,8 @@ fn elf_x86_64_common() { assert_eq!(symbol.kind(), SymbolKind::Data); assert_eq!(symbol.section(), read::SymbolSection::Common); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.address(), 0); assert_eq!(symbol.size(), 8); @@ -226,8 +222,8 @@ fn macho_x86_64_common() { assert_eq!(symbol.kind(), SymbolKind::Data); assert_eq!(symbol.section_index(), Some(common_index)); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.address(), 0); let symbol = symbols.next().unwrap(); @@ -236,8 +232,8 @@ fn macho_x86_64_common() { assert_eq!(symbol.kind(), SymbolKind::Data); assert_eq!(symbol.section_index(), Some(common_index)); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.address(), 8); let symbol = symbols.next(); diff --git a/third_party/rust/object/tests/round_trip/elf.rs b/third_party/rust/object/tests/round_trip/elf.rs index 0aafadc865a0..dfc566c5b554 100644 --- a/third_party/rust/object/tests/round_trip/elf.rs +++ b/third_party/rust/object/tests/round_trip/elf.rs @@ -13,8 +13,8 @@ fn symtab_shndx() { for i in 0..0x10000 { let name = format!("func{}", i).into_bytes(); - let (section, offset) = - object.add_subsection(write::StandardSection::Text, &name, &[0xcc], 1); + let section = object.add_subsection(write::StandardSection::Text, &name); + let offset = object.append_section_data(section, &[0xcc], 1); object.add_symbol(write::Symbol { name, value: offset, @@ -34,7 +34,7 @@ fn symtab_shndx() { assert_eq!(object.format(), BinaryFormat::Elf); assert_eq!(object.architecture(), Architecture::X86_64); - for symbol in object.symbols().skip(1) { + for symbol in object.symbols() { assert_eq!( symbol.section(), SymbolSection::Section(SectionIndex(symbol.index().0)) @@ -62,7 +62,6 @@ fn aligned_sections() { assert_eq!(object.architecture(), Architecture::X86_64); let mut sections = object.sections(); - let _ = sections.next().unwrap(); let section = sections.next().unwrap(); assert_eq!(section.name(), Ok(".text")); @@ -88,7 +87,7 @@ fn compression_zlib() { ch.ch_addralign.set(LE, 1); let mut buf = Vec::new(); - buf.write(object::bytes_of(&ch)).unwrap(); + buf.write_all(object::bytes_of(&ch)).unwrap(); let mut encoder = flate2::write::ZlibEncoder::new(buf, flate2::Compression::default()); encoder.write_all(data).unwrap(); let compressed = encoder.finish().unwrap(); @@ -163,24 +162,24 @@ fn note() { let mut buffer = Vec::new(); buffer - .write(object::bytes_of(&elf::NoteHeader32 { + .write_all(object::bytes_of(&elf::NoteHeader32 { n_namesz: U32::new(endian, 6), n_descsz: U32::new(endian, 11), n_type: U32::new(endian, 1), })) .unwrap(); - buffer.write(b"name1\0\0\0").unwrap(); - buffer.write(b"descriptor\0\0").unwrap(); + buffer.write_all(b"name1\0\0\0").unwrap(); + buffer.write_all(b"descriptor\0\0").unwrap(); buffer - .write(object::bytes_of(&elf::NoteHeader32 { + .write_all(object::bytes_of(&elf::NoteHeader32 { n_namesz: U32::new(endian, 6), n_descsz: U32::new(endian, 11), n_type: U32::new(endian, 2), })) .unwrap(); - buffer.write(b"name2\0\0\0").unwrap(); - buffer.write(b"descriptor\0\0").unwrap(); + buffer.write_all(b"name2\0\0\0").unwrap(); + buffer.write_all(b"descriptor\0\0").unwrap(); let section = object.add_section(Vec::new(), b".note4".to_vec(), SectionKind::Note); object.section_mut(section).set_data(buffer, 4); @@ -189,24 +188,24 @@ fn note() { let mut buffer = Vec::new(); buffer - .write(object::bytes_of(&elf::NoteHeader32 { + .write_all(object::bytes_of(&elf::NoteHeader32 { n_namesz: U32::new(endian, 6), n_descsz: U32::new(endian, 11), n_type: U32::new(endian, 1), })) .unwrap(); - buffer.write(b"name1\0\0\0\0\0\0\0").unwrap(); - buffer.write(b"descriptor\0\0\0\0\0\0").unwrap(); + buffer.write_all(b"name1\0\0\0\0\0\0\0").unwrap(); + buffer.write_all(b"descriptor\0\0\0\0\0\0").unwrap(); buffer - .write(object::bytes_of(&elf::NoteHeader32 { + .write_all(object::bytes_of(&elf::NoteHeader32 { n_namesz: U32::new(endian, 4), n_descsz: U32::new(endian, 11), n_type: U32::new(endian, 2), })) .unwrap(); - buffer.write(b"abc\0").unwrap(); - buffer.write(b"descriptor\0\0\0\0\0\0").unwrap(); + buffer.write_all(b"abc\0").unwrap(); + buffer.write_all(b"descriptor\0\0\0\0\0\0").unwrap(); let section = object.add_section(Vec::new(), b".note8".to_vec(), SectionKind::Note); object.section_mut(section).set_data(buffer, 8); diff --git a/third_party/rust/object/tests/round_trip/macho.rs b/third_party/rust/object/tests/round_trip/macho.rs index f45d3db12664..d8ed0f89469d 100644 --- a/third_party/rust/object/tests/round_trip/macho.rs +++ b/third_party/rust/object/tests/round_trip/macho.rs @@ -33,31 +33,32 @@ fn issue_552_section_file_alignment() { Endianness::Little, ); - // Odd number of sections ensures that the starting file offset is not a multiple of 32. + // The starting file offset is not a multiple of 32 (checked later). // Length of 32 ensures that the file offset of the end of this section is still not a // multiple of 32. let section = object.add_section(vec![], vec![], object::SectionKind::ReadOnlyDataWithRel); - object.append_section_data(section, &vec![0u8; 32], 1); + object.append_section_data(section, &[0u8; 32], 1); // Address is already aligned correctly, so there must not any padding, // even though file offset is not aligned. let section = object.add_section(vec![], vec![], object::SectionKind::ReadOnlyData); - object.append_section_data(section, &vec![0u8; 1], 32); - - let section = object.add_section(vec![], vec![], object::SectionKind::Text); - object.append_section_data(section, &vec![0u8; 1], 1); + object.append_section_data(section, &[0u8; 1], 32); let bytes = &*object.write().unwrap(); + //std::fs::write(&"align.o", &bytes).unwrap(); let object = read::File::parse(bytes).unwrap(); let mut sections = object.sections(); let section = sections.next().unwrap(); - assert_eq!(section.file_range(), Some((368, 32))); + let offset = section.file_range().unwrap().0; + // Check file offset is not aligned to 32. + assert_ne!(offset % 32, 0); assert_eq!(section.address(), 0); assert_eq!(section.size(), 32); let section = sections.next().unwrap(); - assert_eq!(section.file_range(), Some((400, 1))); + // Check there is no padding. + assert_eq!(section.file_range(), Some((offset + 32, 1))); assert_eq!(section.address(), 32); assert_eq!(section.size(), 1); } diff --git a/third_party/rust/object/tests/round_trip/mod.rs b/third_party/rust/object/tests/round_trip/mod.rs index cd696f608a2b..be5c96d0194e 100644 --- a/third_party/rust/object/tests/round_trip/mod.rs +++ b/third_party/rust/object/tests/round_trip/mod.rs @@ -1,10 +1,10 @@ #![cfg(all(feature = "read", feature = "write"))] use object::read::{Object, ObjectSection, ObjectSymbol}; -use object::{read, write, SectionIndex}; +use object::{read, write, SectionIndex, SubArchitecture}; use object::{ - Architecture, BinaryFormat, Endianness, RelocationEncoding, RelocationKind, SectionKind, - SymbolFlags, SymbolKind, SymbolScope, SymbolSection, + Architecture, BinaryFormat, Endianness, RelocationEncoding, RelocationFlags, RelocationKind, + SectionKind, SymbolFlags, SymbolKind, SymbolScope, SymbolSection, }; mod bss; @@ -17,100 +17,144 @@ mod section_flags; mod tls; #[test] -fn coff_x86_64() { - let mut object = - write::Object::new(BinaryFormat::Coff, Architecture::X86_64, Endianness::Little); +fn coff_any() { + for (arch, sub_arch) in [ + (Architecture::Aarch64, None), + (Architecture::Aarch64, Some(SubArchitecture::Arm64EC)), + (Architecture::Arm, None), + (Architecture::I386, None), + (Architecture::X86_64, None), + ] + .iter() + .copied() + { + let mut object = write::Object::new(BinaryFormat::Coff, arch, Endianness::Little); + object.set_sub_architecture(sub_arch); - object.add_file_symbol(b"file.c".to_vec()); + object.add_file_symbol(b"file.c".to_vec()); - let text = object.section_id(write::StandardSection::Text); - object.append_section_data(text, &[1; 30], 4); + let text = object.section_id(write::StandardSection::Text); + object.append_section_data(text, &[1; 30], 4); - let func1_offset = object.append_section_data(text, &[1; 30], 4); - assert_eq!(func1_offset, 32); - let func1_symbol = object.add_symbol(write::Symbol { - name: b"func1".to_vec(), - value: func1_offset, - size: 32, - kind: SymbolKind::Text, - scope: SymbolScope::Linkage, - weak: false, - section: write::SymbolSection::Section(text), - flags: SymbolFlags::None, - }); - object - .add_relocation( - text, - write::Relocation { - offset: 8, - size: 64, - kind: RelocationKind::Absolute, - encoding: RelocationEncoding::Generic, - symbol: func1_symbol, - addend: 0, - }, - ) - .unwrap(); + let func1_offset = object.append_section_data(text, &[1; 30], 4); + assert_eq!(func1_offset, 32); + let func1_symbol = object.add_symbol(write::Symbol { + name: b"func1".to_vec(), + value: func1_offset, + size: 32, + kind: SymbolKind::Text, + scope: SymbolScope::Linkage, + weak: false, + section: write::SymbolSection::Section(text), + flags: SymbolFlags::None, + }); + let func2_offset = object.append_section_data(text, &[1; 30], 4); + assert_eq!(func2_offset, 64); + object.add_symbol(write::Symbol { + name: b"func2_long".to_vec(), + value: func2_offset, + size: 32, + kind: SymbolKind::Text, + scope: SymbolScope::Linkage, + weak: false, + section: write::SymbolSection::Section(text), + flags: SymbolFlags::None, + }); + object + .add_relocation( + text, + write::Relocation { + offset: 8, + symbol: func1_symbol, + addend: 0, + flags: RelocationFlags::Generic { + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + size: arch.address_size().unwrap().bytes() * 8, + }, + }, + ) + .unwrap(); - let bytes = object.write().unwrap(); - let object = read::File::parse(&*bytes).unwrap(); - assert_eq!(object.format(), BinaryFormat::Coff); - assert_eq!(object.architecture(), Architecture::X86_64); - assert_eq!(object.endianness(), Endianness::Little); + let bytes = object.write().unwrap(); + let object = read::File::parse(&*bytes).unwrap(); + assert_eq!(object.format(), BinaryFormat::Coff); + assert_eq!(object.architecture(), arch); + assert_eq!(object.sub_architecture(), sub_arch); + assert_eq!(object.endianness(), Endianness::Little); - let mut sections = object.sections(); + let mut sections = object.sections(); - let text = sections.next().unwrap(); - println!("{:?}", text); - let text_index = text.index(); - assert_eq!(text.name(), Ok(".text")); - assert_eq!(text.kind(), SectionKind::Text); - assert_eq!(text.address(), 0); - assert_eq!(text.size(), 62); - assert_eq!(&text.data().unwrap()[..30], &[1; 30]); - assert_eq!(&text.data().unwrap()[32..62], &[1; 30]); + let text = sections.next().unwrap(); + println!("{:?}", text); + let text_index = text.index(); + assert_eq!(text.name(), Ok(".text")); + assert_eq!(text.kind(), SectionKind::Text); + assert_eq!(text.address(), 0); + assert_eq!(text.size(), 94); + assert_eq!(&text.data().unwrap()[..30], &[1; 30]); + assert_eq!(&text.data().unwrap()[32..62], &[1; 30]); - let mut symbols = object.symbols(); + let mut symbols = object.symbols(); - let symbol = symbols.next().unwrap(); - println!("{:?}", symbol); - assert_eq!(symbol.name(), Ok("file.c")); - assert_eq!(symbol.address(), 0); - assert_eq!(symbol.kind(), SymbolKind::File); - assert_eq!(symbol.section(), SymbolSection::None); - assert_eq!(symbol.scope(), SymbolScope::Compilation); - assert_eq!(symbol.is_weak(), false); + let symbol = symbols.next().unwrap(); + println!("{:?}", symbol); + assert_eq!(symbol.name(), Ok("file.c")); + assert_eq!(symbol.address(), 0); + assert_eq!(symbol.kind(), SymbolKind::File); + assert_eq!(symbol.section(), SymbolSection::None); + assert_eq!(symbol.scope(), SymbolScope::Compilation); + assert!(!symbol.is_weak()); - let symbol = symbols.next().unwrap(); - println!("{:?}", symbol); - let func1_symbol = symbol.index(); - assert_eq!(symbol.name(), Ok("func1")); - assert_eq!(symbol.address(), func1_offset); - assert_eq!(symbol.kind(), SymbolKind::Text); - assert_eq!(symbol.section_index(), Some(text_index)); - assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + let decorated_name = |name: &str| { + if arch == Architecture::I386 { + format!("_{name}") + } else { + name.to_owned() + } + }; - let mut relocations = text.relocations(); + let symbol = symbols.next().unwrap(); + println!("{:?}", symbol); + let func1_symbol = symbol.index(); + assert_eq!(symbol.name(), Ok(decorated_name("func1").as_str())); + assert_eq!(symbol.address(), func1_offset); + assert_eq!(symbol.kind(), SymbolKind::Text); + assert_eq!(symbol.section_index(), Some(text_index)); + assert_eq!(symbol.scope(), SymbolScope::Linkage); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); - let (offset, relocation) = relocations.next().unwrap(); - println!("{:?}", relocation); - assert_eq!(offset, 8); - assert_eq!(relocation.kind(), RelocationKind::Absolute); - assert_eq!(relocation.encoding(), RelocationEncoding::Generic); - assert_eq!(relocation.size(), 64); - assert_eq!( - relocation.target(), - read::RelocationTarget::Symbol(func1_symbol) - ); - assert_eq!(relocation.addend(), 0); + let symbol = symbols.next().unwrap(); + println!("{:?}", symbol); + assert_eq!(symbol.name(), Ok(decorated_name("func2_long").as_str())); + assert_eq!(symbol.address(), func2_offset); + assert_eq!(symbol.kind(), SymbolKind::Text); + assert_eq!(symbol.section_index(), Some(text_index)); + assert_eq!(symbol.scope(), SymbolScope::Linkage); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); - let map = object.symbol_map(); - let symbol = map.get(func1_offset + 1).unwrap(); - assert_eq!(symbol.address(), func1_offset); - assert_eq!(symbol.name(), "func1"); - assert_eq!(map.get(func1_offset - 1), None); + let mut relocations = text.relocations(); + + let (offset, relocation) = relocations.next().unwrap(); + println!("{:?}", relocation); + assert_eq!(offset, 8); + assert_eq!(relocation.kind(), RelocationKind::Absolute); + assert_eq!(relocation.encoding(), RelocationEncoding::Generic); + assert_eq!(relocation.size(), arch.address_size().unwrap().bytes() * 8); + assert_eq!( + relocation.target(), + read::RelocationTarget::Symbol(func1_symbol) + ); + assert_eq!(relocation.addend(), 0); + + let map = object.symbol_map(); + let symbol = map.get(func1_offset + 1).unwrap(); + assert_eq!(symbol.address(), func1_offset); + assert_eq!(symbol.name(), decorated_name("func1")); + assert_eq!(map.get(func1_offset - 1), None); + } } #[test] @@ -140,11 +184,13 @@ fn elf_x86_64() { text, write::Relocation { offset: 8, - size: 64, - kind: RelocationKind::Absolute, - encoding: RelocationEncoding::Generic, symbol: func1_symbol, addend: 0, + flags: RelocationFlags::Generic { + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + size: 64, + }, }, ) .unwrap(); @@ -157,13 +203,6 @@ fn elf_x86_64() { let mut sections = object.sections(); - let section = sections.next().unwrap(); - println!("{:?}", section); - assert_eq!(section.name(), Ok("")); - assert_eq!(section.kind(), SectionKind::Metadata); - assert_eq!(section.address(), 0); - assert_eq!(section.size(), 0); - let text = sections.next().unwrap(); println!("{:?}", text); let text_index = text.index(); @@ -176,16 +215,6 @@ fn elf_x86_64() { let mut symbols = object.symbols(); - let symbol = symbols.next().unwrap(); - println!("{:?}", symbol); - assert_eq!(symbol.name(), Ok("")); - assert_eq!(symbol.address(), 0); - assert_eq!(symbol.kind(), SymbolKind::Null); - assert_eq!(symbol.section_index(), None); - assert_eq!(symbol.scope(), SymbolScope::Unknown); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), true); - let symbol = symbols.next().unwrap(); println!("{:?}", symbol); assert_eq!(symbol.name(), Ok("file.c")); @@ -193,7 +222,7 @@ fn elf_x86_64() { assert_eq!(symbol.kind(), SymbolKind::File); assert_eq!(symbol.section(), SymbolSection::None); assert_eq!(symbol.scope(), SymbolScope::Compilation); - assert_eq!(symbol.is_weak(), false); + assert!(!symbol.is_weak()); let symbol = symbols.next().unwrap(); println!("{:?}", symbol); @@ -203,8 +232,8 @@ fn elf_x86_64() { assert_eq!(symbol.kind(), SymbolKind::Text); assert_eq!(symbol.section_index(), Some(text_index)); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); let mut relocations = text.relocations(); @@ -250,6 +279,8 @@ fn elf_any() { (Architecture::Riscv64, Endianness::Little), (Architecture::S390x, Endianness::Big), (Architecture::Sbf, Endianness::Little), + (Architecture::Sparc, Endianness::Big), + (Architecture::Sparc32Plus, Endianness::Big), (Architecture::Sparc64, Endianness::Big), (Architecture::Xtensa, Endianness::Little), ] @@ -267,11 +298,13 @@ fn elf_any() { section, write::Relocation { offset: 8, - size: 32, - kind: RelocationKind::Absolute, - encoding: RelocationEncoding::Generic, symbol, addend: 0, + flags: RelocationFlags::Generic { + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + size: 32, + }, }, ) .unwrap(); @@ -281,11 +314,13 @@ fn elf_any() { section, write::Relocation { offset: 16, - size: 64, - kind: RelocationKind::Absolute, - encoding: RelocationEncoding::Generic, symbol, addend: 0, + flags: RelocationFlags::Generic { + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + size: 64, + }, }, ) .unwrap(); @@ -300,13 +335,6 @@ fn elf_any() { let mut sections = object.sections(); - let section = sections.next().unwrap(); - println!("{:?}", section); - assert_eq!(section.name(), Ok("")); - assert_eq!(section.kind(), SectionKind::Metadata); - assert_eq!(section.address(), 0); - assert_eq!(section.size(), 0); - let data = sections.next().unwrap(); println!("{:?}", data); assert_eq!(data.name(), Ok(".data")); @@ -364,11 +392,13 @@ fn macho_x86_64() { text, write::Relocation { offset: 8, - size: 64, - kind: RelocationKind::Absolute, - encoding: RelocationEncoding::Generic, symbol: func1_symbol, addend: 0, + flags: RelocationFlags::Generic { + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + size: 64, + }, }, ) .unwrap(); @@ -377,11 +407,13 @@ fn macho_x86_64() { text, write::Relocation { offset: 16, - size: 32, - kind: RelocationKind::Relative, - encoding: RelocationEncoding::Generic, symbol: func1_symbol, addend: -4, + flags: RelocationFlags::Generic { + kind: RelocationKind::Relative, + encoding: RelocationEncoding::Generic, + size: 32, + }, }, ) .unwrap(); @@ -415,23 +447,11 @@ fn macho_x86_64() { assert_eq!(symbol.kind(), SymbolKind::Text); assert_eq!(symbol.section_index(), Some(text_index)); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); let mut relocations = text.relocations(); - let (offset, relocation) = relocations.next().unwrap(); - println!("{:?}", relocation); - assert_eq!(offset, 8); - assert_eq!(relocation.kind(), RelocationKind::Absolute); - assert_eq!(relocation.encoding(), RelocationEncoding::Generic); - assert_eq!(relocation.size(), 64); - assert_eq!( - relocation.target(), - read::RelocationTarget::Symbol(func1_symbol) - ); - assert_eq!(relocation.addend(), 0); - let (offset, relocation) = relocations.next().unwrap(); println!("{:?}", relocation); assert_eq!(offset, 16); @@ -444,6 +464,18 @@ fn macho_x86_64() { ); assert_eq!(relocation.addend(), -4); + let (offset, relocation) = relocations.next().unwrap(); + println!("{:?}", relocation); + assert_eq!(offset, 8); + assert_eq!(relocation.kind(), RelocationKind::Absolute); + assert_eq!(relocation.encoding(), RelocationEncoding::Generic); + assert_eq!(relocation.size(), 64); + assert_eq!( + relocation.target(), + read::RelocationTarget::Symbol(func1_symbol) + ); + assert_eq!(relocation.addend(), 0); + let map = object.symbol_map(); let symbol = map.get(func1_offset + 1).unwrap(); assert_eq!(symbol.address(), func1_offset); @@ -453,23 +485,29 @@ fn macho_x86_64() { #[test] fn macho_any() { - for (arch, endian) in [ - (Architecture::Aarch64, Endianness::Little), - (Architecture::Aarch64_Ilp32, Endianness::Little), + for (arch, subarch, endian) in [ + (Architecture::Aarch64, None, Endianness::Little), + ( + Architecture::Aarch64, + Some(SubArchitecture::Arm64E), + Endianness::Little, + ), + (Architecture::Aarch64_Ilp32, None, Endianness::Little), /* TODO: - (Architecture::Arm, Endianness::Little), + (Architecture::Arm, None, Endianness::Little), */ - (Architecture::I386, Endianness::Little), - (Architecture::X86_64, Endianness::Little), + (Architecture::I386, None, Endianness::Little), + (Architecture::X86_64, None, Endianness::Little), /* TODO: - (Architecture::PowerPc, Endianness::Big), - (Architecture::PowerPc64, Endianness::Big), + (Architecture::PowerPc, None, Endianness::Big), + (Architecture::PowerPc64, None, Endianness::Big), */ ] .iter() .copied() { let mut object = write::Object::new(BinaryFormat::MachO, arch, endian); + object.set_sub_architecture(subarch); let section = object.section_id(write::StandardSection::Data); object.append_section_data(section, &[1; 30], 4); @@ -480,11 +518,13 @@ fn macho_any() { section, write::Relocation { offset: 8, - size: 32, - kind: RelocationKind::Absolute, - encoding: RelocationEncoding::Generic, symbol, addend: 0, + flags: RelocationFlags::Generic { + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + size: 32, + }, }, ) .unwrap(); @@ -494,11 +534,13 @@ fn macho_any() { section, write::Relocation { offset: 16, - size: 64, - kind: RelocationKind::Absolute, - encoding: RelocationEncoding::Generic, symbol, addend: 0, + flags: RelocationFlags::Generic { + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + size: 64, + }, }, ) .unwrap(); @@ -509,6 +551,7 @@ fn macho_any() { println!("{:?}", object.architecture()); assert_eq!(object.format(), BinaryFormat::MachO); assert_eq!(object.architecture(), arch); + assert_eq!(object.sub_architecture(), subarch); assert_eq!(object.endianness(), endian); let mut sections = object.sections(); @@ -521,14 +564,6 @@ fn macho_any() { let mut relocations = data.relocations(); - let (offset, relocation) = relocations.next().unwrap(); - println!("{:?}", relocation); - assert_eq!(offset, 8); - assert_eq!(relocation.kind(), RelocationKind::Absolute); - assert_eq!(relocation.encoding(), RelocationEncoding::Generic); - assert_eq!(relocation.size(), 32); - assert_eq!(relocation.addend(), 0); - if arch.address_size().unwrap().bytes() >= 8 { let (offset, relocation) = relocations.next().unwrap(); println!("{:?}", relocation); @@ -538,6 +573,14 @@ fn macho_any() { assert_eq!(relocation.size(), 64); assert_eq!(relocation.addend(), 0); } + + let (offset, relocation) = relocations.next().unwrap(); + println!("{:?}", relocation); + assert_eq!(offset, 8); + assert_eq!(relocation.kind(), RelocationKind::Absolute); + assert_eq!(relocation.encoding(), RelocationEncoding::Generic); + assert_eq!(relocation.size(), 32); + assert_eq!(relocation.addend(), 0); } } @@ -570,11 +613,13 @@ fn xcoff_powerpc() { text, write::Relocation { offset: 8, - size: 64, - kind: RelocationKind::Absolute, - encoding: RelocationEncoding::Generic, symbol: func1_symbol, addend: 0, + flags: RelocationFlags::Generic { + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + size: 64, + }, }, ) .unwrap(); @@ -606,8 +651,8 @@ fn xcoff_powerpc() { assert_eq!(symbol.kind(), SymbolKind::File); assert_eq!(symbol.section_index(), None); assert_eq!(symbol.scope(), SymbolScope::Compilation); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); symbol = symbols.next().unwrap(); println!("{:?}", symbol); @@ -617,8 +662,8 @@ fn xcoff_powerpc() { assert_eq!(symbol.kind(), SymbolKind::Text); assert_eq!(symbol.section_index(), Some(SectionIndex(text_index))); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); let mut relocations = text.relocations(); diff --git a/third_party/rust/object/tests/round_trip/section_flags.rs b/third_party/rust/object/tests/round_trip/section_flags.rs index b1ca398b4cd4..791ba8f54f07 100644 --- a/third_party/rust/object/tests/round_trip/section_flags.rs +++ b/third_party/rust/object/tests/round_trip/section_flags.rs @@ -48,7 +48,6 @@ fn elf_x86_64_section_flags() { assert_eq!(object.architecture(), Architecture::X86_64); let mut sections = object.sections(); - sections.next().unwrap(); let section = sections.next().unwrap(); assert_eq!(section.name(), Ok(".text")); assert_eq!( diff --git a/third_party/rust/object/tests/round_trip/tls.rs b/third_party/rust/object/tests/round_trip/tls.rs index 999e2f18159b..fa4109675d07 100644 --- a/third_party/rust/object/tests/round_trip/tls.rs +++ b/third_party/rust/object/tests/round_trip/tls.rs @@ -41,7 +41,7 @@ fn coff_x86_64_tls() { assert_eq!(section.name(), Ok(".tls$")); assert_eq!(section.kind(), SectionKind::Data); assert_eq!(section.size(), 30); - assert_eq!(§ion.data().unwrap()[..], &[1; 30]); + assert_eq!(section.data().unwrap(), &[1; 30]); let mut symbols = object.symbols(); @@ -51,8 +51,8 @@ fn coff_x86_64_tls() { assert_eq!(symbol.kind(), SymbolKind::Data); assert_eq!(symbol.section_index(), Some(tls_index)); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); } #[test] @@ -96,17 +96,13 @@ fn elf_x86_64_tls() { let mut sections = object.sections(); - let section = sections.next().unwrap(); - println!("{:?}", section); - assert_eq!(section.name(), Ok("")); - let section = sections.next().unwrap(); println!("{:?}", section); let tdata_index = section.index(); assert_eq!(section.name(), Ok(".tdata")); assert_eq!(section.kind(), SectionKind::Tls); assert_eq!(section.size(), 30); - assert_eq!(§ion.data().unwrap()[..], &[1; 30]); + assert_eq!(section.data().unwrap(), &[1; 30]); let section = sections.next().unwrap(); println!("{:?}", section); @@ -114,22 +110,18 @@ fn elf_x86_64_tls() { assert_eq!(section.name(), Ok(".tbss")); assert_eq!(section.kind(), SectionKind::UninitializedTls); assert_eq!(section.size(), 31); - assert_eq!(§ion.data().unwrap()[..], &[]); + assert_eq!(section.data().unwrap(), &[]); let mut symbols = object.symbols(); - let symbol = symbols.next().unwrap(); - println!("{:?}", symbol); - assert_eq!(symbol.name(), Ok("")); - let symbol = symbols.next().unwrap(); println!("{:?}", symbol); assert_eq!(symbol.name(), Ok("tls1")); assert_eq!(symbol.kind(), SymbolKind::Tls); assert_eq!(symbol.section_index(), Some(tdata_index)); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.size(), 30); let symbol = symbols.next().unwrap(); @@ -138,8 +130,8 @@ fn elf_x86_64_tls() { assert_eq!(symbol.kind(), SymbolKind::Tls); assert_eq!(symbol.section_index(), Some(tbss_index)); assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); assert_eq!(symbol.size(), 31); } @@ -194,7 +186,7 @@ fn macho_x86_64_tls() { assert_eq!(thread_data.segment_name(), Ok(Some("__DATA"))); assert_eq!(thread_data.kind(), SectionKind::Tls); assert_eq!(thread_data.size(), 30); - assert_eq!(&thread_data.data().unwrap()[..], &[1; 30]); + assert_eq!(thread_data.data().unwrap(), &[1; 30]); let thread_vars = sections.next().unwrap(); println!("{:?}", thread_vars); @@ -203,7 +195,7 @@ fn macho_x86_64_tls() { assert_eq!(thread_vars.segment_name(), Ok(Some("__DATA"))); assert_eq!(thread_vars.kind(), SectionKind::TlsVariables); assert_eq!(thread_vars.size(), 2 * 3 * 8); - assert_eq!(&thread_vars.data().unwrap()[..], &[0; 48][..]); + assert_eq!(thread_vars.data().unwrap(), &[0; 48]); let thread_bss = sections.next().unwrap(); println!("{:?}", thread_bss); @@ -216,15 +208,6 @@ fn macho_x86_64_tls() { let mut symbols = object.symbols(); - let symbol = symbols.next().unwrap(); - println!("{:?}", symbol); - assert_eq!(symbol.name(), Ok("_tls1")); - assert_eq!(symbol.kind(), SymbolKind::Tls); - assert_eq!(symbol.section_index(), Some(thread_vars_index)); - assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); - let symbol = symbols.next().unwrap(); println!("{:?}", symbol); let tls1_init_symbol = symbol.index(); @@ -232,27 +215,8 @@ fn macho_x86_64_tls() { assert_eq!(symbol.kind(), SymbolKind::Tls); assert_eq!(symbol.section_index(), Some(thread_data_index)); assert_eq!(symbol.scope(), SymbolScope::Compilation); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); - - let symbol = symbols.next().unwrap(); - println!("{:?}", symbol); - let tlv_bootstrap_symbol = symbol.index(); - assert_eq!(symbol.name(), Ok("__tlv_bootstrap")); - assert_eq!(symbol.kind(), SymbolKind::Unknown); - assert_eq!(symbol.section_index(), None); - assert_eq!(symbol.scope(), SymbolScope::Unknown); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), true); - - let symbol = symbols.next().unwrap(); - println!("{:?}", symbol); - assert_eq!(symbol.name(), Ok("_tls2")); - assert_eq!(symbol.kind(), SymbolKind::Tls); - assert_eq!(symbol.section_index(), Some(thread_vars_index)); - assert_eq!(symbol.scope(), SymbolScope::Linkage); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); let symbol = symbols.next().unwrap(); println!("{:?}", symbol); @@ -261,14 +225,54 @@ fn macho_x86_64_tls() { assert_eq!(symbol.kind(), SymbolKind::Tls); assert_eq!(symbol.section_index(), Some(thread_bss_index)); assert_eq!(symbol.scope(), SymbolScope::Compilation); - assert_eq!(symbol.is_weak(), false); - assert_eq!(symbol.is_undefined(), false); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); + + let symbol = symbols.next().unwrap(); + println!("{:?}", symbol); + assert_eq!(symbol.name(), Ok("_tls1")); + assert_eq!(symbol.kind(), SymbolKind::Tls); + assert_eq!(symbol.section_index(), Some(thread_vars_index)); + assert_eq!(symbol.scope(), SymbolScope::Linkage); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); + + let symbol = symbols.next().unwrap(); + println!("{:?}", symbol); + assert_eq!(symbol.name(), Ok("_tls2")); + assert_eq!(symbol.kind(), SymbolKind::Tls); + assert_eq!(symbol.section_index(), Some(thread_vars_index)); + assert_eq!(symbol.scope(), SymbolScope::Linkage); + assert!(!symbol.is_weak()); + assert!(!symbol.is_undefined()); + + let symbol = symbols.next().unwrap(); + println!("{:?}", symbol); + let tlv_bootstrap_symbol = symbol.index(); + assert_eq!(symbol.name(), Ok("__tlv_bootstrap")); + assert_eq!(symbol.kind(), SymbolKind::Unknown); + assert_eq!(symbol.section_index(), None); + assert_eq!(symbol.scope(), SymbolScope::Unknown); + assert!(!symbol.is_weak()); + assert!(symbol.is_undefined()); let mut relocations = thread_vars.relocations(); let (offset, relocation) = relocations.next().unwrap(); println!("{:?}", relocation); - assert_eq!(offset, 0); + assert_eq!(offset, 40); + assert_eq!(relocation.kind(), RelocationKind::Absolute); + assert_eq!(relocation.encoding(), RelocationEncoding::Generic); + assert_eq!(relocation.size(), 64); + assert_eq!( + relocation.target(), + read::RelocationTarget::Symbol(tls2_init_symbol) + ); + assert_eq!(relocation.addend(), 0); + + let (offset, relocation) = relocations.next().unwrap(); + println!("{:?}", relocation); + assert_eq!(offset, 24); assert_eq!(relocation.kind(), RelocationKind::Absolute); assert_eq!(relocation.encoding(), RelocationEncoding::Generic); assert_eq!(relocation.size(), 64); @@ -292,7 +296,7 @@ fn macho_x86_64_tls() { let (offset, relocation) = relocations.next().unwrap(); println!("{:?}", relocation); - assert_eq!(offset, 24); + assert_eq!(offset, 0); assert_eq!(relocation.kind(), RelocationKind::Absolute); assert_eq!(relocation.encoding(), RelocationEncoding::Generic); assert_eq!(relocation.size(), 64); @@ -301,16 +305,4 @@ fn macho_x86_64_tls() { read::RelocationTarget::Symbol(tlv_bootstrap_symbol) ); assert_eq!(relocation.addend(), 0); - - let (offset, relocation) = relocations.next().unwrap(); - println!("{:?}", relocation); - assert_eq!(offset, 40); - assert_eq!(relocation.kind(), RelocationKind::Absolute); - assert_eq!(relocation.encoding(), RelocationEncoding::Generic); - assert_eq!(relocation.size(), 64); - assert_eq!( - relocation.target(), - read::RelocationTarget::Symbol(tls2_init_symbol) - ); - assert_eq!(relocation.addend(), 0); } diff --git a/third_party/rust/pe-unwind-info/.cargo-checksum.json b/third_party/rust/pe-unwind-info/.cargo-checksum.json new file mode 100644 index 000000000000..2bddd32f7e75 --- /dev/null +++ b/third_party/rust/pe-unwind-info/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"50262c6d6c814ee9ae3840f87e096f658f9beec8c5fbd53847b4cee8839d87b7","ChangeLog.md":"2de13509b7b0b845a99221386802aa4404601f5d957a734691b885b40ce7c744","LICENSE":"91a870c16b8223cc0135b4219c2f3ced14a8f2bd7110073a829b216b1cc5ac00","README.md":"c1bca737d0dfb4de59a0f901f310d789a957e05c27b89a35ac9df113e4b73258","src/arm64.rs":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","src/lib.rs":"5dce9b288a5badc12e176879a425e75ebcf81ac57595e6127322d45aa532f9b5","src/x86_64.rs":"b8c877f6edb9af0f416de3afba7fcfa3edf596c0f2b43ec6b9dbc57f3ca2596d"},"package":"6ec3b43050c38ffb9de87e17d874e9956e3a9131b343c9b7b7002597727c3891"} \ No newline at end of file diff --git a/third_party/rust/pe-unwind-info/Cargo.toml b/third_party/rust/pe-unwind-info/Cargo.toml new file mode 100644 index 000000000000..79deb1e0c362 --- /dev/null +++ b/third_party/rust/pe-unwind-info/Cargo.toml @@ -0,0 +1,73 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +name = "pe-unwind-info" +version = "0.2.3" +authors = ["Alex Franchuk "] +exclude = ["/fixture"] +description = "Parsers for PE unwind info" +homepage = "https://github.com/afranchuk/pe-unwind-info" +readme = "README.md" +keywords = [ + "unwind", + "pe", + "microsoft", + "x86_64", + "x64", +] +categories = ["parser-implementations"] +license = "MIT" + +[package.metadata.release] +pre-release-commit-message = "Prepare for version {{version}}" +tag-message = "Release {{crate_name}} version {{version}}" +tag-name = "{{version}}" + +[[package.metadata.release.pre-release-replacements]] +file = "ChangeLog.md" +replace = """ +## [Unreleased] + +## [{{version}}] - {{date}}""" +search = '## \[Unreleased\]' + +[[package.metadata.release.pre-release-replacements]] +file = "ChangeLog.md" +replace = """ +[Unreleased]: $1/{{tag_name}}...HEAD +[{{version}}]: $1/$2...{{tag_name}}""" +search = '\[Unreleased\]: (.*)/(.*)\.\.\.HEAD$' + +[dependencies.arrayvec] +version = "0.7" + +[dependencies.bitflags] +version = "2" + +[dependencies.thiserror] +version = "1" + +[dependencies.zerocopy] +version = "0.7.32" + +[dependencies.zerocopy-derive] +version = "0.7.32" + +[dev-dependencies.hex-literal] +version = "0.4" + +[dev-dependencies.memmap2] +version = "0.7" + +[dev-dependencies.object] +version = "0.31" diff --git a/third_party/rust/pe-unwind-info/ChangeLog.md b/third_party/rust/pe-unwind-info/ChangeLog.md new file mode 100644 index 000000000000..662c80640bfe --- /dev/null +++ b/third_party/rust/pe-unwind-info/ChangeLog.md @@ -0,0 +1,30 @@ +# Changelog + +## [Unreleased] + +## [0.2.3] - 2024-03-04 +* Fix the scaling of 32-bit large allocation unwind operations. Thanks @ishitatsuyuki! + +## [0.2.2] - 2024-02-29 +* Chained unwind info was not correctly parsed, and is now fixed. Thanks @ishitatsuyuki! + +## [0.2.1] - 2024-01-18 +* Separate `zerocopy-derive` and `zerocopy` to improve build times. Add `zerocopy::Unaligned` to + types. + +## [0.2.0] - 2024-01-18 +* Update zerocopy to 0.7.32. + +## [0.1.1] - 2023-12-14 +* Remove unsafe code ([#1](https://github.com/afranchuk/pe-unwind-info/pull/1)) + +## [0.1.0] - 2023-07-25 +* Initial release. + +[Unreleased]: https://github.com/afranchuk/pe-unwind-info/compare/0.2.3...HEAD +[0.2.3]: https://github.com/afranchuk/pe-unwind-info/compare/0.2.2...0.2.3 +[0.2.2]: https://github.com/afranchuk/pe-unwind-info/compare/0.2.1...0.2.2 +[0.2.1]: https://github.com/afranchuk/pe-unwind-info/compare/0.2.0...0.2.1 +[0.2.0]: https://github.com/afranchuk/pe-unwind-info/compare/0.1.1...0.2.0 +[0.1.1]: https://github.com/afranchuk/pe-unwind-info/compare/0.1.0...0.1.1 +[0.1.0]: https://github.com/afranchuk/pe-unwind-info/releases/tag/0.0.1 diff --git a/third_party/rust/pe-unwind-info/LICENSE b/third_party/rust/pe-unwind-info/LICENSE new file mode 100644 index 000000000000..74f26511d0f1 --- /dev/null +++ b/third_party/rust/pe-unwind-info/LICENSE @@ -0,0 +1,25 @@ +Copyright (c) 2023 Alex Franchuk + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/pe-unwind-info/README.md b/third_party/rust/pe-unwind-info/README.md new file mode 100644 index 000000000000..30195bea797d --- /dev/null +++ b/third_party/rust/pe-unwind-info/README.md @@ -0,0 +1,18 @@ +[![crates.io page](https://img.shields.io/crates/v/pe-unwind-info.svg)](https://crates.io/crates/pe-unwind-info) +[![docs.rs page](https://docs.rs/pe-unwind-info/badge.svg)](https://docs.rs/pe-unwind-info/) + +# pe-unwind-info + +A zero-copy parser for the contents of the `.pdata` section and unwind info structures (typically +addressed by the contents of the `.pdata` section). + +This library provides low-level, efficient parsers for the function tables in `.pdata` as well as +unwind info structures in other places. On top of this functionality, higher-level functionality to unwind an entire +frame (given a module's contents) is provided. This only copies data as necessary. No heap +allocations are needed. + +This currently targets `x86_64` PE modules. `ARM64` support will be added soon. + +This library assumes all information is little-endian: as far as I can tell, Windows always either +targets little-endian-only CPUs or configures CPUs which support little- and big-endian to be +little-endian. diff --git a/third_party/rust/pe-unwind-info/src/arm64.rs b/third_party/rust/pe-unwind-info/src/arm64.rs new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/third_party/rust/pe-unwind-info/src/lib.rs b/third_party/rust/pe-unwind-info/src/lib.rs new file mode 100644 index 000000000000..48332424667a --- /dev/null +++ b/third_party/rust/pe-unwind-info/src/lib.rs @@ -0,0 +1,9 @@ +//! Zero-copy parsers of the contents of the `.pdata` section and unwind information in PE +//! binaries. +//! +//! On top of these parsers, some higher-level interfaces are provided to easily unwind frames. The +//! parsers and the higher interfaces are written with efficiency in mind, doing minimal copying of +//! data. There is no heap allocation. + +//pub mod arm64; +pub mod x86_64; diff --git a/third_party/rust/pe-unwind-info/src/x86_64.rs b/third_party/rust/pe-unwind-info/src/x86_64.rs new file mode 100644 index 000000000000..a110886cda3f --- /dev/null +++ b/third_party/rust/pe-unwind-info/src/x86_64.rs @@ -0,0 +1,1205 @@ +//! Unwind information for x86_64 (usually called x64 in microsoft documentation). +//! +//! The high-level API is accessed through `FunctionTableEntries::unwind_frame`. This function +//! allows you to unwind a frame to get the return address and all updated contextual registers. + +use arrayvec::ArrayVec; +use std::ops::ControlFlow; +use thiserror::Error; +use zerocopy::{FromBytes, Ref, Unaligned, LE}; +use zerocopy_derive::{FromBytes, FromZeroes, Unaligned}; + +type U16 = zerocopy::U16; + +/// Little-endian u32. +pub type U32 = zerocopy::U32; + +/// A view over function table entries in the `.pdata` section. +#[derive(Debug, Clone, Copy)] +pub struct FunctionTableEntries<'a> { + data: &'a [u8], +} + +/// A runtime function record in the function table. +#[derive(Unaligned, FromZeroes, FromBytes, Debug, Clone, Copy)] +#[repr(C)] +pub struct RuntimeFunction { + /// The start relative virtual address of the function. + pub begin_address: U32, + /// The end relative virtual address of the function. + pub end_address: U32, + /// The relative virtual address of the unwind information related to the function. + pub unwind_info_address: U32, +} + +impl<'a> FunctionTableEntries<'a> { + /// Parse function table entries from the given `.pdata` section contents. + pub fn parse(data: &'a [u8]) -> Self { + FunctionTableEntries { data } + } + + /// Get the number of `RuntimeFunction` stored in the function table. + pub fn functions_len(&self) -> usize { + self.data.len() / std::mem::size_of::() + } + + /// Get the `RuntimeFunction`s in the function table, if the parsed data is well-aligned and + /// sized. + pub fn functions(&self) -> Option<&'a [RuntimeFunction]> { + Ref::new_slice_unaligned(self.data).map(|lv| lv.into_slice()) + } + + /// Lookup the runtime function that contains the given relative virtual address. + pub fn lookup(&self, address: u32) -> Option<&'a RuntimeFunction> { + let functions = self.functions()?; + match functions.binary_search_by_key(&address, |f| f.begin_address.get()) { + Ok(i) => Some(&functions[i]), + Err(i) if i > 0 && address < functions[i - 1].end_address.get() => { + Some(&functions[i - 1]) + } + _ => None, + } + } + + pub fn unwind_frame<'m, S: UnwindState, M>( + &self, + state: &mut S, + mut memory_at_rva: M, + address: u32, + ) -> Option + where + M: FnMut(u32) -> Option<&'m [u8]> + 'm, + { + // This implements the procedure found + // [here](https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-170#unwind-procedure). + if let Some(mut function) = self.lookup(address) { + let offset = address - function.begin_address.get(); + let mut is_chained = false; + loop { + let unwind_info = + UnwindInfo::parse(memory_at_rva(function.unwind_info_address.get())?)?; + + if !is_chained { + // Check whether the address is in the function epilog. If so, we need to + // simulate the remaining epilog instructions (unwind codes don't account for + // unwinding from the epilog). + let bytes = (function.end_address.get() - address) as usize; + let instruction = &memory_at_rva(address)?[..bytes]; + if let Ok(epilog_instructions) = FunctionEpilogInstruction::parse_sequence( + instruction, + unwind_info.frame_register(), + ) { + for instruction in epilog_instructions.iter() { + match instruction { + FunctionEpilogInstruction::AddSP(offset) => { + let rsp = state.read_register(Register::RSP); + state.write_register(Register::RSP, rsp + *offset as u64); + } + FunctionEpilogInstruction::AddSPFromFP(offset) => { + let fp = unwind_info + .frame_register() + .expect("invalid fp register offset"); + let fp = state.read_register(fp); + state.write_register(Register::RSP, fp + *offset as u64); + } + FunctionEpilogInstruction::Pop(reg) => { + let rsp = state.read_register(Register::RSP); + let val = state.read_stack(rsp)?; + state.write_register(*reg, val); + state.write_register(Register::RSP, rsp + 8); + } + } + } + break; + } + } + + for (_, op) in unwind_info + .unwind_operations() + .skip_while(|(o, _)| !is_chained && *o as u32 > offset) + { + if let ControlFlow::Break(rip) = unwind_info.resolve_operation(state, &op)? { + return Some(rip); + } + } + if let Some(UnwindInfoTrailer::ChainedUnwindInfo { chained }) = + unwind_info.trailer() + { + is_chained = true; + function = chained; + } else { + break; + } + } + } + let rsp = state.read_register(Register::RSP); + let rip = state.read_stack(rsp)?; + state.write_register(Register::RSP, rsp + 8); + Some(rip) + } + + /// Unwind a single frame at the given relative virtual address. + /// + /// This does not attempt to invoke any exception or termination handlers. + /// + /// Returns `None` if `UnwindInfo` could not be parsed, a stack value could not be read, or a + /// memory offset in the binary could not be read (whether when parsing the section table or + /// when reading memory pointed to by the section table). + pub fn unwind_frame_with_image( + &self, + state: &mut S, + image: &[u8], + address: u32, + ) -> Option { + let sections = Sections::parse(image)?; + self.unwind_frame(state, |addr| sections.memory_at_rva(addr), address) + } +} + +impl<'a> Iterator for FunctionTableEntries<'a> { + type Item = &'a RuntimeFunction; + + fn next(&mut self) -> Option { + let (rf, rest) = Ref::<_, RuntimeFunction>::new_unaligned_from_prefix(self.data)?; + self.data = rest; + Some(rf.into_ref()) + } +} + +#[derive(Debug, Clone, Copy)] +struct Sections<'a> { + image: &'a [u8], + sections: &'a [Section], +} + +impl<'a> Sections<'a> { + pub fn parse(image: &'a [u8]) -> Option { + let sig_offset = Ref::<_, U32>::new_unaligned(image.get(0x3c..0x40)?)?.get() as usize; + // Offset to the COFF header + let coff_image = image.get(sig_offset + 4..)?; + let section_count = Ref::<_, U16>::new_unaligned(coff_image.get(2..4)?)?.get() as usize; + let size_of_optional_header = + Ref::<_, U16>::new_unaligned(coff_image.get(16..18)?)?.get() as usize; + let sections = Ref::<_, [Section]>::new_slice_unaligned_from_prefix( + &coff_image[20 + size_of_optional_header..], + section_count, + )? + .0 + .into_slice(); + Some(Sections { image, sections }) + } + + pub fn memory_at_rva(&self, rva: u32) -> Option<&'a [u8]> { + let section_index = match self + .sections + .binary_search_by_key(&rva, |s| s.virtual_address.get()) + { + Ok(i) => i, + Err(i) + if i > 0 + && rva + < self.sections[i - 1].virtual_address.get() + + self.sections[i - 1].virtual_size.get() => + { + i - 1 + } + Err(_) => return None, + }; + let section = &self.sections[section_index]; + let start = section.pointer_to_raw_data.get() as usize; + let offset = (rva - section.virtual_address.get()) as usize; + Some(&self.image[start + offset..]) + } +} + +#[derive(Unaligned, FromZeroes, FromBytes, Debug, Clone, Copy)] +#[repr(C)] +struct Section { + _name: [u8; 8], + virtual_size: U32, + virtual_address: U32, + _size_of_raw_data: U32, + pointer_to_raw_data: U32, + _pointer_to_relocations: U32, + _pointer_to_line_numbers: U32, + _number_of_relocations: U16, + _number_of_line_numbers: U16, + _characteristics: U32, +} + +/// A general-purpose register. +/// +/// If converted to a u8, the resulting value matches those in the x86_64 spec for register +/// operands as well as the operation info bits in unwind codes. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(u8)] +pub enum Register { + RAX, + RCX, + RDX, + RBX, + RSP, + RBP, + RSI, + RDI, + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, +} + +impl TryFrom for Register { + type Error = (); + #[inline(always)] + fn try_from(reg: u8) -> Result { + let reg = match reg { + 0 => Self::RAX, + 1 => Self::RCX, + 2 => Self::RDX, + 3 => Self::RBX, + 4 => Self::RSP, + 5 => Self::RBP, + 6 => Self::RSI, + 7 => Self::RDI, + 8 => Self::R8, + 9 => Self::R9, + 10 => Self::R10, + 11 => Self::R11, + 12 => Self::R12, + 13 => Self::R13, + 14 => Self::R14, + 15 => Self::R15, + _ => return Err(()), + }; + Ok(reg) + } +} + +/// A 128-bit XMM register. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(u8)] +pub enum XmmRegister { + XMM0, + XMM1, + XMM2, + XMM3, + XMM4, + XMM5, + XMM6, + XMM7, + XMM8, + XMM9, + XMM10, + XMM11, + XMM12, + XMM13, + XMM14, + XMM15, +} + +impl TryFrom for XmmRegister { + type Error = (); + #[inline(always)] + fn try_from(reg: u8) -> Result { + let reg = match reg { + 0 => Self::XMM0, + 1 => Self::XMM1, + 2 => Self::XMM2, + 3 => Self::XMM3, + 4 => Self::XMM4, + 5 => Self::XMM5, + 6 => Self::XMM6, + 7 => Self::XMM7, + 8 => Self::XMM8, + 9 => Self::XMM9, + 10 => Self::XMM10, + 11 => Self::XMM11, + 12 => Self::XMM12, + 13 => Self::XMM13, + 14 => Self::XMM14, + 15 => Self::XMM15, + _ => return Err(()), + }; + Ok(reg) + } +} + +/// Fixed data at the start of [PE UnwindInfo][unwindinfo]. +/// +/// [unwindinfo]: https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-170#struct-unwind_info +#[derive(Unaligned, FromZeroes, FromBytes, Debug, Clone, Copy)] +#[repr(C)] +pub struct UnwindInfoHeader { + /// The unwind information version and flags. + pub version_and_flags: u8, + /// The length of the function prolog, in bytes. + pub prolog_size: u8, + /// The number of u16 slots in the unwind codes array. + pub unwind_codes_len: u8, + /// The frame register and offset. + pub frame_register_and_offset: u8, +} + +bitflags::bitflags! { + /// The unwind info bit flags. + /// + /// Note that while they are individual bits, it seems as if they can only be + /// mutually-exclusive. + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] + pub struct UnwindInfoFlags: u8 { + /// The function has an exception handler that should be called when looking for functions + /// that need to examine exceptions. + const EHANDLER = 0x1; + /// The function has a termination handler that should be called when unwinding an + /// exception. + const UHANDLER = 0x2; + /// This unwind info structure is not the primary one for the procedure. Instead, the + /// chained unwind info entry is the contents of a previous RuntimeFunction entry. If this + /// flag is set, then the EHANDLER and UHANDLER flags must be cleared. Also, the frame + /// register and fixed-stack allocation fields must have the same values as in the primary + /// unwind info. + const CHAININFO = 0x4; + } +} + +impl UnwindInfoHeader { + /// The UnwindInfo version. Should be `1`. + #[inline] + pub fn version(&self) -> u8 { + self.version_and_flags & 0x7 + } + + /// The raw flags bits. + #[inline] + pub fn flags_raw(&self) -> u8 { + self.version_and_flags >> 3 + } + + /// The raw frame register value. + #[inline] + pub fn frame_register_raw(&self) -> u8 { + self.frame_register_and_offset & 0xf + } + + /// The raw frame register offset value. + #[inline] + pub fn frame_register_offset_raw(&self) -> u8 { + self.frame_register_and_offset >> 4 + } + + /// The unwind info flags. + pub fn flags(&self) -> UnwindInfoFlags { + UnwindInfoFlags::from_bits_truncate(self.flags_raw()) + } + + /// The frame register, if any. + pub fn frame_register(&self) -> Option { + let reg = self.frame_register_raw(); + (reg != 0).then_some( + reg.try_into() + .expect("reg is <= 15 so this always succeeds"), + ) + } + + /// The scaled frame register offset. + pub fn frame_register_offset(&self) -> u8 { + // u8 is appropriate as the maximum value is 15 * 16 = 240 + self.frame_register_offset_raw() * 16 + } + + /// Get an absolute address from the given StackFrameOffset. + pub fn resolve_offset(&self, read_register: F, offset: StackFrameOffset) -> u64 + where + F: FnOnce(Register) -> u64, + { + match self.frame_register() { + Some(reg) => read_register(reg) - self.frame_register_offset() as u64 + offset.0 as u64, + None => read_register(Register::RSP) + offset.0 as u64, + } + } + + /// Perform the given UnwindOperation, changing `state` appropriately. + /// + /// Returns `None` when reading the stack fails. + pub fn resolve_operation( + &self, + state: &mut S, + op: &UnwindOperation, + ) -> Option> { + match op { + UnwindOperation::PopNonVolatile(reg) => { + let rsp = state.read_register(Register::RSP); + let value = state.read_stack(rsp)?; + state.write_register(*reg, value); + state.write_register(Register::RSP, rsp + 8); + } + UnwindOperation::UnStackAlloc(bytes) => { + let rsp = state.read_register(Register::RSP); + state.write_register(Register::RSP, rsp + *bytes as u64); + } + UnwindOperation::RestoreSPFromFP => { + if let Some(reg) = self.frame_register() { + let value = state.read_register(reg) - self.frame_register_offset() as u64; + state.write_register(Register::RSP, value); + } + } + UnwindOperation::ReadNonVolatile(reg, offset) => { + let addr = self.resolve_offset(|reg| state.read_register(reg), *offset); + let value = state.read_stack(addr)?; + state.write_register(*reg, value); + } + UnwindOperation::ReadXMM(reg, offset) => { + let addr = self.resolve_offset(|reg| state.read_register(reg), *offset); + let value = + state.read_stack(addr)? as u128 | ((state.read_stack(addr + 8)? as u128) << 64); + state.write_xmm_register(*reg, value); + } + UnwindOperation::PopMachineFrame { error_code } => { + let offset = if *error_code { 8 } else { 0 }; + let rsp = state.read_register(Register::RSP); + let return_address = state.read_stack(rsp + offset)?; + let rsp = state.read_stack(rsp + offset + 24)?; + state.write_register(Register::RSP, rsp); + return Some(ControlFlow::Break(return_address)); + } + } + + Some(ControlFlow::Continue(())) + } +} + +/// A virtual instruction in the function epilog, interpreted from x86_64 instructions. +#[derive(Debug, Clone, Copy)] +pub enum FunctionEpilogInstruction { + /// Add the given offset to the stack pointer. + AddSP(u32), + /// Add the given offset to the frame pointer to recover the stack pointer. + AddSPFromFP(u32), + /// Pop a value from the stack into the given register. + Pop(Register), +} + +/// An error resulting from an attempt at parsing an epilog instruction. +#[derive(Error, Debug)] +pub enum InstructionParseError { + #[error("not enough data")] + NotEnoughData, + #[error("invalid instruction found")] + InvalidInstruction, + #[error("too many instructions for epilog")] + TooManyInstructions, +} + +/// The maximum number of instructions to allow when parsing a function epilog. +/// +/// There is at most one AddSP/AddSPFromFP, and only 8 caller-saved registers (disregarding the +/// implicit RSP). We give a bit of extra space just in case, but it shouldn't be necessary. +pub const FUNCTION_EPILOG_LIMIT: usize = 12; + +impl FunctionEpilogInstruction { + /// Parse a function epilog instruction. + /// + /// Returns Ok(None) if the instruction is an epilog terminator (`ret` or `jmp`). + /// + /// `allow_add_sp` should only be true for the (potential) first instruction in an epilog. + pub fn parse( + ip: &[u8], + fpreg: Option, + allow_add_sp: bool, + ) -> Result, InstructionParseError> { + if ip.is_empty() { + return Err(InstructionParseError::NotEnoughData); + } + + // Read REX instruction byte if present. + let (rex, ip) = if ip[0] & 0xf0 == 0x40 { + (ip[0] & 0x0f, &ip[1..]) + } else { + (0, &ip[0..]) + }; + + // Both add and lea need at least 3 bytes after REX + if allow_add_sp && ip.len() >= 3 { + // add RSP,imm32 + if rex & 0x8 != 0 && ip[0] == 0x81 && ip[1] == 0xc4 { + let (val, rest) = Ref::<_, U32>::new_unaligned_from_prefix(&ip[2..]) + .ok_or(InstructionParseError::NotEnoughData)?; + return Ok(Some((FunctionEpilogInstruction::AddSP(val.get()), rest))); + } + // add RSP,imm8 + if rex & 0x8 != 0 && ip[0] == 0x83 && ip[1] == 0xc4 { + return Ok(Some(( + FunctionEpilogInstruction::AddSP(ip[2] as u32), + &ip[3..], + ))); + } + + if let Some(fpreg) = fpreg { + let fpreg = fpreg as u8; + if rex & 0x8 != 0 && (rex & 0x1 == fpreg >> 3) && ip[0] == 0x8d { + if ip[1] & 0x3f == (0x20 | (fpreg & 0b0111)) { + let op_mod = ip[1] >> 6; + // lea RSP,disp8[FP] + if op_mod == 0b01 { + return Ok(Some(( + FunctionEpilogInstruction::AddSPFromFP(ip[2] as u32), + &ip[3..], + ))); + // lea RSP,disp32[FP] + } else if op_mod == 0b10 { + let (val, rest) = Ref::<_, U32>::new_unaligned_from_prefix(&ip[2..]) + .ok_or(InstructionParseError::NotEnoughData)?; + return Ok(Some(( + FunctionEpilogInstruction::AddSPFromFP(val.get()), + rest, + ))); + } else { + // Invalid op_mod + return Err(InstructionParseError::InvalidInstruction); + } + } else { + // Invalid lea + return Err(InstructionParseError::InvalidInstruction); + } + } + } + } + + // pop r/m64 + if ip.len() >= 2 && ip[0] == 0x8f && ip[1] & 0xf8 == 0xc0 { + let reg = ip[1] & 0x7 | ((rex & 1) << 3); + return Ok(Some(( + FunctionEpilogInstruction::Pop( + reg.try_into().expect( + "`reg` is between 0 and 15, which are defined values of `Register`.", + ), + ), + &ip[2..], + ))); + } + // pop r64 + if !ip.is_empty() && ip[0] & 0xf8 == 0x58 { + let reg = ip[0] & 0x7 | ((rex & 1) << 3); + debug_assert!(reg <= 15); + return Ok(Some(( + FunctionEpilogInstruction::Pop( + reg.try_into().expect( + "`reg` is between 0 and 15, which are defined values of `Register`.", + ), + ), + &ip[1..], + ))); + } + + // ret + if !ip.is_empty() && ip[0] == 0xc3 { + return Ok(None); + } + + if ip.len() >= 2 { + // jmp with relative displacements + // + // The MS docs say epilogs only have jmp instructions with a ModRM byte, but I've seen + // relative displacement jmps too (tail calls). + if ip[0] == 0xeb || ip[0] == 0xe9 { + return Ok(None); + } + // jmp with ModRM and mod bits as 00 + if ip[0] == 0xff { + let mod_opcode = ip[1] & 0xf8; + if mod_opcode == 0x20 || mod_opcode == 0x28 { + return Ok(None); + } else { + return Err(InstructionParseError::InvalidInstruction); + } + } + } + + // not a valid epilog instruction + Err(InstructionParseError::InvalidInstruction) + } + + /// Check whether a series of instructions are a tail of a function epilog + /// and parse them into a limited sequence of epilog instructions. + /// + /// This function does not allocate memory; the result is stored in a + /// fixed-capacity `ArrayVec`. + /// + /// Returns `Err` if too many instructions were encountered or if the + /// instructions do not appear to be a function epilog. + /// + /// [Epilogs][] look like: + /// * `add RSP,` or `lea RSP,constant[FPReg]` + /// * zero or more `pop ` + /// * `ret` or `jmp` with a ModRM argument with mod field 00 + /// + /// [Epilogs]: https://learn.microsoft.com/en-us/cpp/build/prolog-and-epilog?view=msvc-170#epilog-code + pub fn parse_sequence( + ip: &[u8], + frame_register: Option, + ) -> Result, InstructionParseError> { + let mut buffer = ArrayVec::new(); + let mut instruction_and_rest = Self::parse(ip, frame_register, true)?; + + while let Some((instruction, rest)) = instruction_and_rest { + buffer + .try_push(instruction) + .map_err(|_| InstructionParseError::TooManyInstructions)?; + + instruction_and_rest = Self::parse(rest, frame_register, false)? + } + + Ok(buffer) + } +} + +/// An interface over state needed for unwinding stack frames. +pub trait UnwindState { + /// Return the value of the given register. + fn read_register(&mut self, register: Register) -> u64; + /// Return the 8-byte value at the given address on the stack, if any. + fn read_stack(&mut self, addr: u64) -> Option; + /// Write a new value to the given register, updating the unwind context. + fn write_register(&mut self, register: Register, value: u64); + /// Write a new value to the given xmm register, updating the unwind context. + fn write_xmm_register(&mut self, register: XmmRegister, value: u128); +} + +/// Optional information at the end of UnwindInfo. +pub enum UnwindInfoTrailer<'a> { + /// There is an exception handler associated with this unwind info. + ExceptionHandler { + handler_address: &'a U32, + handler_data: &'a [u8], + }, + /// There is a termination handler associated with this unwind info. + TerminationHandler { + handler_address: &'a U32, + handler_data: &'a [u8], + }, + /// There is a chained unwind info entry associated with this unwind info. + ChainedUnwindInfo { chained: &'a RuntimeFunction }, +} + +/// A function's unwind information. +#[derive(Clone, Copy)] +pub struct UnwindInfo<'a> { + header: &'a UnwindInfoHeader, + unwind_codes: &'a [u8], + rest: &'a [u8], +} + +impl std::fmt::Debug for UnwindInfo<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("UnwindInfo") + .field("header", self.header) + .field("unwind_codes", &self.unwind_codes) + .finish_non_exhaustive() + } +} + +impl<'a> UnwindInfo<'a> { + /// Read the unwind info from the given buffer. + /// + /// Returns None if there aren't enough bytes or the alignment is incorrect. + pub fn parse(data: &'a [u8]) -> Option { + let (header, rest) = Ref::<_, UnwindInfoHeader>::new_unaligned_from_prefix(data)?; + if header.version() != 1 { + return None; + } + let (unwind_codes, rest) = + Ref::new_slice_unaligned_from_prefix(rest, header.unwind_codes_len as usize * 2)?; + Some(UnwindInfo { + header: header.into_ref(), + unwind_codes: unwind_codes.into_slice(), + rest, + }) + } + + /// Get an iterator over the unwind operations. + pub fn unwind_operations(&self) -> UnwindOperations<'a> { + UnwindOperations(self.unwind_codes) + } + + /// Get the trailing information of the unwind info, if any. + pub fn trailer(&self) -> Option> { + let flags = self.flags(); + if flags.contains(UnwindInfoFlags::EHANDLER) { + let (handler_address, handler_data) = + Ref::<_, U32>::new_unaligned_from_prefix(self.rest)?; + Some(UnwindInfoTrailer::ExceptionHandler { + handler_address: handler_address.into_ref(), + handler_data, + }) + } else if flags.contains(UnwindInfoFlags::UHANDLER) { + let (handler_address, handler_data) = + Ref::<_, U32>::new_unaligned_from_prefix(self.rest)?; + Some(UnwindInfoTrailer::TerminationHandler { + handler_address: handler_address.into_ref(), + handler_data, + }) + } else if flags.contains(UnwindInfoFlags::CHAININFO) { + let (chained, _) = Ref::<_, RuntimeFunction>::new_unaligned_from_prefix(self.rest)?; + Some(UnwindInfoTrailer::ChainedUnwindInfo { + chained: chained.into_ref(), + }) + } else { + None + } + } +} + +impl std::ops::Deref for UnwindInfo<'_> { + type Target = UnwindInfoHeader; + + fn deref(&self) -> &Self::Target { + self.header + } +} + +/// An iterator over `UnwindOperation`s. +/// +/// This iterator parses the operations as it iterates, since it needs to parse them to know how +/// many slots each takes up. +#[derive(Clone, Copy, Debug)] +pub struct UnwindOperations<'a>(&'a [u8]); + +impl<'a> UnwindOperations<'a> { + /// Get the current `UnwindCode`. + pub fn unwind_code(&self) -> Option<&'a UnwindCode> { + let mut c = *self; + c.read::() + } + + fn read(&mut self) -> Option<&'a T> { + let (v, rest) = Ref::<_, T>::new_unaligned_from_prefix(self.0)?; + self.0 = rest; + Some(v.into_ref()) + } +} + +impl<'a> Iterator for UnwindOperations<'a> { + type Item = (u8, UnwindOperation); + + fn next(&mut self) -> Option { + let unwind_code = self.read::()?; + let op = match unwind_code.operation_code()? { + UnwindOperationCode::PushNonvol => { + UnwindOperation::PopNonVolatile(unwind_code.operation_info_as_register()) + } + UnwindOperationCode::AllocLarge => match unwind_code.operation_info() { + 0 => UnwindOperation::UnStackAlloc(self.read::()?.get() as u32 * 8), + 1 => UnwindOperation::UnStackAlloc(self.read::()?.get()), + _ => return None, + }, + UnwindOperationCode::AllocSmall => { + UnwindOperation::UnStackAlloc((unwind_code.operation_info() as u32 + 1) * 8) + } + UnwindOperationCode::SetFPReg => UnwindOperation::RestoreSPFromFP, + UnwindOperationCode::SaveNonvol => UnwindOperation::ReadNonVolatile( + unwind_code.operation_info_as_register(), + StackFrameOffset(self.read::()?.get() as u32 * 8), + ), + UnwindOperationCode::SaveNonvolFar => UnwindOperation::ReadNonVolatile( + unwind_code.operation_info_as_register(), + StackFrameOffset(self.read::()?.get()), + ), + UnwindOperationCode::SaveXmm128 => UnwindOperation::ReadXMM( + unwind_code.operation_info_as_xmm(), + StackFrameOffset(self.read::()?.get() as u32 * 16), + ), + UnwindOperationCode::SaveXmm128Far => UnwindOperation::ReadXMM( + unwind_code.operation_info_as_xmm(), + StackFrameOffset(self.read::()?.get()), + ), + UnwindOperationCode::PushMachframe => UnwindOperation::PopMachineFrame { + error_code: unwind_code.operation_info() == 1, + }, + }; + + Some((unwind_code.prolog_offset, op)) + } +} + +/// An offset relative to the local stack frame. +#[derive(Debug, Clone, Copy)] +pub struct StackFrameOffset(u32); + +/// An unwind operation to perform. +/// +/// These generally correspond to `UnwindOperationCode`s, however they are named based on the +/// operation that needs to be done to unwind. +#[derive(Debug, Clone, Copy)] +pub enum UnwindOperation { + /// Restore a register's value by popping from the stack (incrementing RSP by 8). + PopNonVolatile(Register), + /// Undo a stack allocation of the given size (incrementing RSP). + UnStackAlloc(u32), + /// Use the frame pointer register to restore RSP. The stack pointer should be restored from + /// the frame pointer minus UnwindInfo::frame_register_offset(). + RestoreSPFromFP, + /// Restore a register's value from the given stack frame offset. + ReadNonVolatile(Register, StackFrameOffset), + /// Restore an XMM register's value from the given stack frame offset. + ReadXMM(XmmRegister, StackFrameOffset), + /// Pop a machine frame. This restores from the stack an optional error code, and then (in + /// order from lowest to highest addresses) IP, CS, EFLAGS, the old SP, and SS. + PopMachineFrame { error_code: bool }, +} + +/// An operation represented by an `UnwindCode`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(u8)] +pub enum UnwindOperationCode { + PushNonvol, + AllocLarge, + AllocSmall, + SetFPReg, + SaveNonvol, + SaveNonvolFar, + SaveXmm128 = 8, + SaveXmm128Far, + PushMachframe, +} + +/// A single step to unwind operations done in a frame's prolog. +#[derive(Unaligned, FromZeroes, FromBytes, Debug, Clone, Copy)] +#[repr(C)] +pub struct UnwindCode { + /// The byte offset into the prolog where the operation was done. + pub prolog_offset: u8, + /// The operation code and info. + pub opcode_and_opinfo: u8, +} + +impl UnwindCode { + /// Get the raw operation code. + #[inline] + pub fn operation_code_raw(&self) -> u8 { + self.opcode_and_opinfo & 0xf + } + + /// Get the operation information bits. + #[inline] + pub fn operation_info(&self) -> u8 { + self.opcode_and_opinfo >> 4 + } + + /// Get the operation code. + pub fn operation_code(&self) -> Option { + match self.operation_code_raw() { + 0 => Some(UnwindOperationCode::PushNonvol), + 1 => Some(UnwindOperationCode::AllocLarge), + 2 => Some(UnwindOperationCode::AllocSmall), + 3 => Some(UnwindOperationCode::SetFPReg), + 4 => Some(UnwindOperationCode::SaveNonvol), + 5 => Some(UnwindOperationCode::SaveNonvolFar), + 8 => Some(UnwindOperationCode::SaveXmm128), + 9 => Some(UnwindOperationCode::SaveXmm128Far), + 10 => Some(UnwindOperationCode::PushMachframe), + _ => None, + } + } + + /// Interpret the operation info as a register. + #[inline] + fn operation_info_as_register(&self) -> Register { + let op_info = self.operation_info(); + op_info + .try_into() + .expect("`op_info` is between 0 and 15, which are defined values of `Register`.") + } + + /// Interpret the operation info as an Xmm register. + #[inline] + fn operation_info_as_xmm(&self) -> XmmRegister { + let op_info = self.operation_info(); + op_info + .try_into() + .expect("`op_info` is between 0 and 15, which are defined values of `XmmRegister`.") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use hex_literal::hex; + use memmap2::Mmap; + use object::read::{File, Object, ObjectSection}; + use std::sync::OnceLock; + + static FIXTURE: OnceLock = OnceLock::new(); + const FIXTURE_ADDRESS: u64 = 0x7ff725bf0000; + + fn get_fixture() -> &'static [u8] { + FIXTURE.get_or_init(|| unsafe { + Mmap::map( + &std::fs::File::open(concat!( + env!("CARGO_MANIFEST_DIR"), + "/fixture/binary/x86_64.exe" + )) + .unwrap(), + ) + .unwrap() + }) + } + + struct FrameContext { + registers: [u64; 16], + ip: u64, + stack: &'static [u8], + stack_base: u64, + + pub changes: RegisterChanges, + } + + #[derive(Default, Debug, Clone, PartialEq, Eq)] + struct RegisterChanges { + changes: [Option; 16], + } + + impl RegisterChanges { + pub fn new() -> Self { + Self::default() + } + + pub fn set(mut self, reg: Register, value: u64) -> Self { + self.changes[reg as usize] = Some(value); + self + } + } + + impl UnwindState for FrameContext { + fn read_register(&mut self, register: Register) -> u64 { + self.registers[register as usize] + } + + fn read_stack(&mut self, addr: u64) -> Option { + if addr > self.stack_base { + return None; + } + let offset = self.stack_base - addr; + let offset = offset as usize; + if offset < 8 || offset > self.stack.len() { + return None; + } + let index = self.stack.len() - offset; + Some(u64::from_le_bytes( + (&self.stack[index..index + 8]).try_into().unwrap(), + )) + } + + fn write_register(&mut self, register: Register, value: u64) { + self.registers[register as usize] = value; + self.changes.changes[register as usize] = Some(value); + } + + fn write_xmm_register(&mut self, _register: XmmRegister, _value: u128) { + unimplemented!() + } + } + + macro_rules! windbg_frame_context { + ( rax = $rax:literal rbx = $rbx:literal rcx = $rcx:literal + rdx = $rdx:literal rsi = $rsi:literal rdi = $rdi:literal + rip = $rip:literal rsp = $rsp:literal rbp = $rbp:literal + r8 = $r8:literal r9 = $r9:literal r10 = $r10:literal + r11 = $r11:literal r12 = $r12:literal r13 = $r13:literal + r14 = $r14:literal r15 = $r15:literal + stack_base = $stack_base:literal + stack = $stack:literal + ) => { + FrameContext { + registers: [ + $rax, $rcx, $rdx, $rbx, $rsp, $rbp, $rsi, $rdi, $r8, $r9, $r10, $r11, $r12, + $r13, $r14, $r15, + ], + ip: $rip, + stack: &hex!($stack), + stack_base: $stack_base, + changes: Default::default(), + } + }; + } + + fn assert_fixture_unwind(mut context: FrameContext, ra: u64, changes: RegisterChanges) { + let file = File::parse(get_fixture()).unwrap(); + let pdata_section = file.section_by_name(".pdata").unwrap(); + let entries = FunctionTableEntries::parse(pdata_section.data().unwrap()); + let ip_offset = (context.ip - FIXTURE_ADDRESS) as u32; + let result = entries.unwind_frame_with_image(&mut context, get_fixture(), ip_offset); + assert_eq!(result, Some(ra), "mismatched return address"); + assert_eq!(context.changes, changes, "mismatched register changes"); + } + + fn assert_fixture_frames(mut context: FrameContext, return_addrs: &[u64]) { + let file = File::parse(get_fixture()).unwrap(); + let pdata_section = file.section_by_name(".pdata").unwrap(); + let entries = FunctionTableEntries::parse(pdata_section.data().unwrap()); + + let mut ip = context.ip; + for ra in return_addrs { + let ip_offset = (ip - FIXTURE_ADDRESS) as u32; + let result = entries.unwind_frame_with_image(&mut context, get_fixture(), ip_offset); + assert_eq!(result, Some(*ra), "mismatched return address"); + ip = ra - 1; + } + } + + #[test] + fn unwind_frame_1() { + let context = windbg_frame_context! { + rax=0x000000000000001e rbx=0x0000020891345770 rcx=0x000000000000000a + rdx=0x0000000000000001 rsi=0x0000000000000001 rdi=0x0000020891349e40 + rip=0x00007ff725bf1084 rsp=0x00000070c7d3fb38 rbp=0x0000000000000000 + r8=0x0000020891349e40 r9=0x0000000000000630 r10=0x0000000000000630 + r11=0x00000070c7d3f7f0 r12=0x0000000000000000 r13=0x0000000000000000 + r14=0x0000000000000000 r15=0x0000000000000000 + stack_base = 0x70c7d3fba0 + stack = "21 10 BF 25 F7 7F 00 00 + 01 00 00 00 00 00 00 00 03 00 00 00 00 00 00 00 + 02 00 00 00 00 00 00 00 39 72 C0 25 F7 7F 00 00 + 70 57 34 91 08 02 00 00 40 9E 34 91 08 02 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 90 6F C0 25 F7 7F 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" + }; + assert_fixture_unwind( + context, + 0x7ff725bf1021, + RegisterChanges::new().set(Register::RSP, 0x70c7d3fb38 + 8), + ); + } + + #[test] + fn unwind_frame_2() { + let context = windbg_frame_context! { + rax=0x0000000000000026 rbx=0x000000000000006b rcx=0x0000000000000002 + rdx=0x0000000000000026 rsi=0x0000000000000001 rdi=0x000000000000001f + rip=0x00007ff725bf10b6 rsp=0x00000070c7d3fb38 rbp=0x0000000000000000 + r8=0x0000000000000002 r9=0x0000000000000000 r10=0x000000000000001f + r11=0x00000070c7d3f7f0 r12=0x0000000000000000 r13=0x0000000000000000 + r14=0x0000000000000026 r15=0x0000000000000003 + stack_base = 0x70c7d3fba0 + stack = "4F 10 BF 25 F7 7F 00 00 + 01 00 00 00 00 00 00 00 03 00 00 00 00 00 00 00 + 02 00 00 00 00 00 00 00 39 72 C0 25 F7 7F 00 00 + 70 57 34 91 08 02 00 00 40 9E 34 91 08 02 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 90 6F C0 25 F7 7F 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" + }; + assert_fixture_unwind( + context, + 0x7ff725bf104f, + RegisterChanges::new().set(Register::RSP, 0x70c7d3fb38 + 8), + ); + } + + #[test] + fn unwind_frame_in_prolog() { + let context = windbg_frame_context! { + rax=0x00007ffa222c07a8 rbx=0x0000020891345770 rcx=0x0000000000000001 + rdx=0x0000020891345770 rsi=0x0000000000000000 rdi=0x0000020891349e40 + rip=0x00007ff725bf1005 rsp=0x00000070c7d3fb70 rbp=0x0000000000000000 + r8=0x0000020891349e40 r9=0x0000000000000630 r10=0x0000000000000630 + r11=0x00000070c7d3f7f0 r12=0x0000000000000000 r13=0x0000000000000000 + r14=0x0000000000000000 r15=0x0000000000000000 + stack_base = 0x70c7d3fba0 + stack = " + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 90 6F C0 25 F7 7F 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" + }; + assert_fixture_unwind( + context, + 0x7ff725c06f90, + RegisterChanges::new() + .set(Register::RSI, 0) + .set(Register::R14, 0) + .set(Register::R15, 0) + .set(Register::RSP, 0x70c7d3fb70 + 8 * 4), + ); + } + + #[test] + fn unwind_frame_in_epilog_beginning() { + let context = windbg_frame_context! { + rax=0xf47e69ea626ba5db rbx=0x82bcd1c6ad5f6936 rcx=0x82bcd1c6ad5f6936 + rdx=0x0000000000000001 rsi=0x0000000000000001 rdi=0x000000000000001f + rip=0x00007ff725bf1068 rsp=0x00000070c7d3fb40 rbp=0x0000000000000000 + r8=0x82bcd1c6ad5f6936 r9=0x0000000000000003 r10=0x0000000000000045 + r11=0x00000070c7d3f7f0 r12=0x0000000000000000 r13=0x0000000000000000 + r14=0x0000000000000026 r15=0x0000000000000064 + stack_base = 0x70c7d3fba0 + stack = " + 01 00 00 00 00 00 00 00 03 00 00 00 00 00 00 00 + 02 00 00 00 00 00 00 00 39 72 C0 25 F7 7F 00 00 + 70 57 34 91 08 02 00 00 40 9E 34 91 08 02 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 90 6F C0 25 F7 7F 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" + }; + assert_fixture_unwind( + context, + 0x7ff725c06f90, + RegisterChanges::new() + .set(Register::RBX, 0x20891345770) + .set(Register::RDI, 0x20891349e40) + .set(Register::RSI, 0) + .set(Register::R14, 0) + .set(Register::R15, 0) + .set(Register::RSP, 0x70c7d3fb40 + 0x20 + 8 * 6), + ); + } + + #[test] + fn unwind_frame_in_epilog_middle() { + let context = windbg_frame_context! { + rax=0xf47e69ea626ba5db rbx=0x0000020891345770 rcx=0x82bcd1c6ad5f6936 + rdx=0x0000000000000001 rsi=0x0000000000000000 rdi=0x0000020891349e40 + rip=0x00007ff725bf106f rsp=0x00000070c7d3fb78 rbp=0x0000000000000000 + r8=0x82bcd1c6ad5f6936 r9=0x0000000000000003 r10=0x0000000000000045 + r11=0x00000070c7d3f7f0 r12=0x0000000000000000 r13=0x0000000000000000 + r14=0x0000000000000026 r15=0x0000000000000064 + stack_base = 0x70c7d3fba0 + stack = "00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 90 6F C0 25 F7 7F 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" + }; + assert_fixture_unwind( + context, + 0x7ff725c06f90, + RegisterChanges::new() + .set(Register::R14, 0) + .set(Register::R15, 0) + .set(Register::RSP, 0x70c7d3fb78 + 8 * 3), + ); + } + + #[test] + fn multiple_frames() { + let context = windbg_frame_context! { + rax=0x0000000000000026 rbx=0x000000000000006b rcx=0x0000000000000002 + rdx=0x0000000000000026 rsi=0x0000000000000001 rdi=0x000000000000001f + rip=0x00007ff725bf10b6 rsp=0x00000070c7d3fb38 rbp=0x0000000000000000 + r8=0x0000000000000002 r9=0x0000000000000000 r10=0x000000000000001f + r11=0x00000070c7d3f7f0 r12=0x0000000000000000 r13=0x0000000000000000 + r14=0x0000000000000026 r15=0x0000000000000003 + stack_base = 0x70c7d3fba0 + stack = "4F 10 BF 25 F7 7F 00 00 + 01 00 00 00 00 00 00 00 03 00 00 00 00 00 00 00 + 02 00 00 00 00 00 00 00 39 72 C0 25 F7 7F 00 00 + 70 57 34 91 08 02 00 00 40 9E 34 91 08 02 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 90 6F C0 25 F7 7F 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" + }; + assert_fixture_frames(context, &[0x7ff725bf104f, 0x7ff725c06f90]); + } +} diff --git a/toolkit/crashreporter/minidump-analyzer/Cargo.toml b/toolkit/crashreporter/minidump-analyzer/Cargo.toml new file mode 100644 index 000000000000..eff4a44d30cb --- /dev/null +++ b/toolkit/crashreporter/minidump-analyzer/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "minidump-analyzer" +description = "Analyzes minidumps on the system where they were created, using local debug information." +version = "0.1.0" +authors = ["Alex Franchuk "] +readme = "README.md" +license = "MPL-2.0" +edition = "2021" + +[[bin]] +name = "minidump-analyzer" + +[dependencies] +anyhow = "1.0.69" +async-trait = "0.1" +breakpad-symbols = "0.22" +clap = { version = "4", default-features = false, features = ["std", "cargo", "wrap_help", "derive"] } +env_logger = { version = "0.10.0", default-features = false } +futures-executor = { version = "0.3", features = ["thread-pool"] } +futures-util = { version = "0.3", features = ["channel"] } +lazy_static = "1.4.0" +log = "0.4" +minidump = "0.22" +minidump-unwind = { version = "0.22", features = ["debuginfo-unwind"] } +mozilla-central-workspace-hack = { version = "0.1", features = ["minidump-analyzer"], optional = true } +serde_json = "1" + +[dependencies.windows-sys] +version = "0.52" +features = [ + "Win32_Foundation", + "Win32_Security_Cryptography", + "Win32_Security_Cryptography_Catalog", + "Win32_Security_WinTrust", + "Win32_Storage_FileSystem", + "Win32_UI_WindowsAndMessaging", +] diff --git a/toolkit/crashreporter/minidump-analyzer/src/main.rs b/toolkit/crashreporter/minidump-analyzer/src/main.rs new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/toolkit/crashreporter/rust_minidump_writer_linux/Cargo.toml b/toolkit/crashreporter/rust_minidump_writer_linux/Cargo.toml index 8c414d8acfa0..740a0e3b9348 100644 --- a/toolkit/crashreporter/rust_minidump_writer_linux/Cargo.toml +++ b/toolkit/crashreporter/rust_minidump_writer_linux/Cargo.toml @@ -9,6 +9,6 @@ license = "MPL-2.0" [dependencies] crash-context = "0.6.1" -minidump-writer = "0.8.9" +minidump-writer = "0.9.0" libc = "0.2.74" anyhow = "1.0" diff --git a/tools/profiler/rust-helper/Cargo.toml b/tools/profiler/rust-helper/Cargo.toml index fa02796fb22c..e81d24ee99c5 100644 --- a/tools/profiler/rust-helper/Cargo.toml +++ b/tools/profiler/rust-helper/Cargo.toml @@ -10,7 +10,7 @@ rustc-demangle = "0.1" uuid = "1.0" [dependencies.object] -version = "0.32.0" +version = "0.36.0" optional = true default-features = false features = ["std", "read_core", "elf"] diff --git a/tools/profiler/rust-helper/src/elf.rs b/tools/profiler/rust-helper/src/elf.rs index 4930884f05ee..ff1dcf5526c9 100644 --- a/tools/profiler/rust-helper/src/elf.rs +++ b/tools/profiler/rust-helper/src/elf.rs @@ -12,9 +12,9 @@ use uuid::Uuid; const UUID_SIZE: usize = 16; const PAGE_SIZE: usize = 4096; -fn get_symbol_map<'a: 'b, 'b, T>(object_file: &'b T) -> HashMap +fn get_symbol_map<'a, T>(object_file: &T) -> HashMap where - T: Object<'a, 'b>, + T: Object<'a>, { object_file .dynamic_symbols()