From ec3b82b135ca9363f4cc53c1719736d3ecbd5223 Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Fri, 6 Sep 2019 17:49:58 +0000 Subject: [PATCH] Bug 1579425 - Part 2: Revendor dependencies. r=froydnj Depends on D45046 Differential Revision: https://phabricator.services.mozilla.com/D45047 --HG-- rename : third_party/rust/goblin/src/elf/dyn.rs => third_party/rust/goblin/src/elf/dynamic.rs rename : third_party/rust/object/src/lib.rs => third_party/rust/object/src/read/any.rs rename : third_party/rust/object/src/pe.rs => third_party/rust/object/src/read/pe.rs rename : third_party/rust/object/src/wasm.rs => third_party/rust/object/src/read/wasm.rs extra : moz-landing-system : lando --- third_party/rust/goblin/.cargo-checksum.json | 2 +- third_party/rust/goblin/CHANGELOG.md | 67 ++ third_party/rust/goblin/Cargo.toml | 11 +- third_party/rust/goblin/README.md | 152 +++- third_party/rust/goblin/examples/ar.rs | 2 - third_party/rust/goblin/examples/automagic.rs | 4 +- .../goblin/examples/dotnet_pe_analysis.rs | 77 ++ third_party/rust/goblin/examples/dyldinfo.rs | 18 +- third_party/rust/goblin/examples/lipo.rs | 4 +- third_party/rust/goblin/examples/rdr.rs | 4 - third_party/rust/goblin/examples/scroll.rs | 6 +- third_party/rust/goblin/src/archive/mod.rs | 141 ++- .../rust/goblin/src/elf/compression_header.rs | 61 +- .../goblin/src/elf/constants_relocation.rs | 173 +++- .../goblin/src/elf/{dyn.rs => dynamic.rs} | 392 +++++---- third_party/rust/goblin/src/elf/gnu_hash.rs | 66 +- third_party/rust/goblin/src/elf/header.rs | 152 ++-- third_party/rust/goblin/src/elf/mod.rs | 221 ++--- third_party/rust/goblin/src/elf/note.rs | 59 +- .../rust/goblin/src/elf/program_header.rs | 116 +-- third_party/rust/goblin/src/elf/reloc.rs | 309 +++++-- .../rust/goblin/src/elf/section_header.rs | 146 ++-- third_party/rust/goblin/src/elf/sym.rs | 144 ++- third_party/rust/goblin/src/error.rs | 39 +- third_party/rust/goblin/src/lib.rs | 63 +- third_party/rust/goblin/src/mach/constants.rs | 150 ++-- third_party/rust/goblin/src/mach/exports.rs | 59 +- third_party/rust/goblin/src/mach/fat.rs | 25 +- third_party/rust/goblin/src/mach/header.rs | 67 +- third_party/rust/goblin/src/mach/imports.rs | 83 +- .../rust/goblin/src/mach/load_command.rs | 66 +- third_party/rust/goblin/src/mach/mod.rs | 54 +- .../rust/goblin/src/mach/relocation.rs | 23 +- third_party/rust/goblin/src/mach/segment.rs | 68 +- third_party/rust/goblin/src/mach/symbols.rs | 140 ++- .../rust/goblin/src/pe/data_directories.rs | 10 +- third_party/rust/goblin/src/pe/debug.rs | 44 +- third_party/rust/goblin/src/pe/exception.rs | 826 ++++++++++++++++++ third_party/rust/goblin/src/pe/export.rs | 102 +-- third_party/rust/goblin/src/pe/header.rs | 93 +- third_party/rust/goblin/src/pe/import.rs | 76 +- third_party/rust/goblin/src/pe/mod.rs | 106 ++- .../rust/goblin/src/pe/optional_header.rs | 51 +- third_party/rust/goblin/src/pe/relocation.rs | 133 +++ .../rust/goblin/src/pe/section_table.rs | 247 +++++- third_party/rust/goblin/src/pe/symbol.rs | 513 +++++++++++ third_party/rust/goblin/src/pe/utils.rs | 68 +- third_party/rust/goblin/src/strtab.rs | 19 +- third_party/rust/goblin/tests/archive.rs | 24 +- third_party/rust/goblin/tests/macho.rs | 23 +- third_party/rust/object/.cargo-checksum.json | 2 +- third_party/rust/object/Cargo.toml | 32 +- third_party/rust/object/examples/nm.rs | 71 +- third_party/rust/object/examples/objcopy.rs | 116 +++ third_party/rust/object/examples/objdump.rs | 30 +- third_party/rust/object/src/common.rs | 178 ++++ third_party/rust/object/src/elf.rs | 435 --------- third_party/rust/object/src/lib.rs | 603 +------------ third_party/rust/object/src/macho.rs | 376 -------- third_party/rust/object/src/read/any.rs | 515 +++++++++++ third_party/rust/object/src/read/coff.rs | 511 +++++++++++ third_party/rust/object/src/read/elf.rs | 691 +++++++++++++++ third_party/rust/object/src/read/macho.rs | 589 +++++++++++++ third_party/rust/object/src/read/mod.rs | 250 ++++++ third_party/rust/object/src/{ => read}/pe.rs | 370 +++++--- third_party/rust/object/src/read/traits.rs | 214 +++++ .../rust/object/src/{ => read}/wasm.rs | 242 ++--- third_party/rust/object/src/traits.rs | 124 --- third_party/rust/object/src/write/coff.rs | 477 ++++++++++ third_party/rust/object/src/write/elf.rs | 728 +++++++++++++++ third_party/rust/object/src/write/macho.rs | 449 ++++++++++ third_party/rust/object/src/write/mod.rs | 532 +++++++++++ third_party/rust/object/src/write/string.rs | 140 +++ third_party/rust/object/src/write/util.rs | 14 + third_party/rust/object/tests/round_trip.rs | 246 ++++++ 75 files changed, 10126 insertions(+), 3308 deletions(-) create mode 100644 third_party/rust/goblin/examples/dotnet_pe_analysis.rs rename third_party/rust/goblin/src/elf/{dyn.rs => dynamic.rs} (57%) create mode 100644 third_party/rust/goblin/src/pe/exception.rs create mode 100644 third_party/rust/goblin/src/pe/relocation.rs create mode 100644 third_party/rust/goblin/src/pe/symbol.rs create mode 100644 third_party/rust/object/examples/objcopy.rs create mode 100644 third_party/rust/object/src/common.rs delete mode 100644 third_party/rust/object/src/elf.rs delete mode 100644 third_party/rust/object/src/macho.rs create mode 100644 third_party/rust/object/src/read/any.rs create mode 100644 third_party/rust/object/src/read/coff.rs create mode 100644 third_party/rust/object/src/read/elf.rs create mode 100644 third_party/rust/object/src/read/macho.rs create mode 100644 third_party/rust/object/src/read/mod.rs rename third_party/rust/object/src/{ => read}/pe.rs (56%) create mode 100644 third_party/rust/object/src/read/traits.rs rename third_party/rust/object/src/{ => read}/wasm.rs (64%) delete mode 100644 third_party/rust/object/src/traits.rs create mode 100644 third_party/rust/object/src/write/coff.rs create mode 100644 third_party/rust/object/src/write/elf.rs create mode 100644 third_party/rust/object/src/write/macho.rs create mode 100644 third_party/rust/object/src/write/mod.rs create mode 100644 third_party/rust/object/src/write/string.rs create mode 100644 third_party/rust/object/src/write/util.rs create mode 100644 third_party/rust/object/tests/round_trip.rs diff --git a/third_party/rust/goblin/.cargo-checksum.json b/third_party/rust/goblin/.cargo-checksum.json index 0c761524a3e0..43a900fbd07b 100644 --- a/third_party/rust/goblin/.cargo-checksum.json +++ b/third_party/rust/goblin/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"CHANGELOG.md":"02d194f3b3b9467aa4fd951ce707a48f8964a61bb0e86725fbd8437193ea3932","Cargo.toml":"6f0f2fd7076ae2f3ab734bee3c9ed1a00a63f4b73161158055c9f9f65fde4e5f","LICENSE":"036bf6b6d6fd6dd1abda2ff6cdb672a63bdf32c468048720072910f2268a965f","README.md":"0ef544f009d0c57af22b5b4fa7b01ea8c77376647e6183ca4397b98ec9bad8ec","etc/crt1.rs":"50667c3066e00f92e038bfa4fe6968f49a31d7c6986e19c7c9e695ad9b836f74","etc/crt132.rs":"e69920ceeab57958a1890979158b57fc43f33c5a135b5798d43664c989c8c686","etc/crt1a.rs":"d158350f72aaf4fecd8501735d2f33a1a226e99bf370c794b90f7e9882c8ca64","examples/ar.rs":"68ca410b7138c9aad6562dbf6f7b28696f47b6358b50a98f1579dd9a4434e1ac","examples/automagic.rs":"ef6ee1cd23659bcea004d7ee12fc9356a6659bbaddeae80f75f3c800a9c8df0a","examples/dyldinfo.rs":"3823f397153817ff705c2248cce9d14aac1f2a999da52419cd6466a724c8a0f7","examples/lipo.rs":"907bee692b129f48d1458549d7f678fe94d629222806d46682d18ad7a0b305a6","examples/rdr.rs":"ba3463c01fbdba994a5843c5a0f159306e0982d34dcf9eff97676ca265231e12","examples/scroll.rs":"64bef86c37928fa3929d2c123409ff7c068259dcc0a8a5074639a63193276aa8","src/archive/mod.rs":"4dbecbbf45703a3c49bd36eeaf402bae76087e030c6738a6f1bd20846983c368","src/elf/compression_header.rs":"9300d2c0e118e1b5778bb3137b0a4a5597f3f9a128de1ed0f34072ef05cbc60e","src/elf/constants_header.rs":"cdd0eea9f4617f86f14b57dccf5124ae62376df6efe2bf617f95e4b24c176d1d","src/elf/constants_relocation.rs":"a77ecdd1421443111d8ab2ef6a16423184108dbb605f3c2d09a0bda6ff78f6db","src/elf/dyn.rs":"4ebf733eed110c57349d5c49c456faaf246cc5b5314798a9975c61fa0039dc04","src/elf/gnu_hash.rs":"12a9563648ded7ac451bd2e19086143b07908902deda83572fbeaa6dc71b58c7","src/elf/header.rs":"c487d8ebc5486bbc386414d0ffc68255a50b680ec7c01f2e642cfcb1ce195001","src/elf/mod.rs":"015dcf26eea76b118321d2929a2ed586d96999324e55209344b4bcb281099ba0","src/elf/note.rs":"d576f1640d4ab774a1e59799cb29171b93ddfd921f159ba67469d4f68dd4bec3","src/elf/program_header.rs":"e688f267431070e54b22513e4f3cfe6b139c60836f815a00912c88cdc085279b","src/elf/reloc.rs":"ac92ed86dc34ed6f2507dc64f250e6fa19edfa43e180025fe4693e40b35e6c8b","src/elf/section_header.rs":"3a43d5790bdc83b1b0d08bbc4f8598c37d61a3d36c24c755bc17b1df9acb701b","src/elf/sym.rs":"a58a6acad5002295f690779dc971f25c8f7ccdd8284d3ba14de1113a7a3392f1","src/error.rs":"8b66a4d8df655017dfaa376fb31e9e0e71692105e93920be4fa1b202ffb1a245","src/lib.rs":"0c10f653070918585f4bdb219368ce3f0d695a3f01e481ebefb5c20378ea2f37","src/mach/bind_opcodes.rs":"2477021270083fd3d9200adebf5d2a1963b9014c7400be39fb68c8b309e4eebe","src/mach/constants.rs":"5997a13d228335575b2a1c3780d2a6e063b63c5551c8e69d74bdf8577d7bb7a6","src/mach/exports.rs":"41aa1be345dd9f241746c7ce7a5b91e0727638dc7bb966ca0c1832e2a2eb2ed0","src/mach/fat.rs":"7f9469d567b3cc1bf53d96ca25546aa4ca8498f78bec67f0517f07c5d2dc5a62","src/mach/header.rs":"1e854f63e1c37547a49880342a19f3d0bf284a9b4942cde687ee91fd8099f8df","src/mach/imports.rs":"eb8ac6cab5e40463d82cb006c4ab0d81831acb41136baf93b65c296f51fc863d","src/mach/load_command.rs":"6d18fc610ec16e9fcfa3eb9822213aae94958f809737efad0936a7f94873d86f","src/mach/mod.rs":"4319ee5ef373e1644724696c7e90c004a95b291713207ee3c45d7be287da1ef7","src/mach/relocation.rs":"a5a4f979a1ca61d65c5d0d5c0c6442e8aeb332e9d71d4dec44588210a0099198","src/mach/segment.rs":"e743a4f35a1136647f6e82ee93c39ba4582d46bdc39a4d864451665b0f5ff378","src/mach/symbols.rs":"6046079acf01fada8ed269dcf31103ae72ac17c470151ec4d8dbccc6ef8879bb","src/pe/characteristic.rs":"4fa8a7e6de20795b6d70d80fc25568eb999bb3dd9f9735d1407302a8053b3dd1","src/pe/data_directories.rs":"f3fc7757ba20559be0626a8e65cd87aab5b1f1f2ce09584a28686f82c77af88a","src/pe/debug.rs":"a68b78d4868e2b0d410c5175630f3786b84fd17cc7483b5a21ba82dae1fafb99","src/pe/export.rs":"00a5ffece1cc261ecd863de718eb2a26cc35143123f1477d43b43c0654f4c705","src/pe/header.rs":"17789046ba5d366dbab42243e000e1606170e5ecbde0cf09f74139f0e6981a97","src/pe/import.rs":"10352263dcfaf853cf0343e4b3a41892e2d630060612f2e62d19f64d8d89f3ea","src/pe/mod.rs":"c3bffa8481ab995a4992b7f97293a6918cb3390625f3ff99f52780a1f4aca193","src/pe/optional_header.rs":"bd1b6cf33ecc1bfdb59aa6dbf566e905190f6570385fab2b1c855313d3e72dc3","src/pe/section_table.rs":"6755d81ac63153f4fdad94a16a296c37ffd4cdc55e153356e5fc579f6ae8b9b9","src/pe/utils.rs":"93b7e29d517d268fc83031417368ec2a72d95ed33241870796e69908655ea4e6","src/strtab.rs":"e66163a13d43f791f94badefcccfc24a04ef6e2dc784650b68a9e46c988a702a","tests/archive.rs":"2437c669d199392e79b7ac58c2cc25847490db054e21be943a2ad3c4f5f6acdb","tests/compare_dyldinfos.rs":"bd5f3c22a8a7c1563bf23fa12d95868bd630f3ea5ba3ffe659a602de4ec26e39","tests/macho.rs":"e146650ba50531cd00e6c6f74e7ce36189f5918c65daf070641b57396dda6579"},"package":"5911d7df7b8f65ab676c5327b50acea29d3c6a1a4ad05e444cf5dce321b26db2"} \ No newline at end of file +{"files":{"CHANGELOG.md":"2687aebcf734e1a8add45fb8b009cccc63d97db7820470a518ff5000634a93e4","Cargo.toml":"d7c562d7144ff9b2a72b0a8488156920ce64ae6cd0763b1ca5d055dd2b40aff1","LICENSE":"036bf6b6d6fd6dd1abda2ff6cdb672a63bdf32c468048720072910f2268a965f","README.md":"5ba373319553c5155cfd9d75a2fc962e17b39872a03a19eab34cc872c82b7a78","etc/crt1.rs":"50667c3066e00f92e038bfa4fe6968f49a31d7c6986e19c7c9e695ad9b836f74","etc/crt132.rs":"e69920ceeab57958a1890979158b57fc43f33c5a135b5798d43664c989c8c686","etc/crt1a.rs":"d158350f72aaf4fecd8501735d2f33a1a226e99bf370c794b90f7e9882c8ca64","examples/ar.rs":"e299cdc8478148b4d20788aa7d9cac04ea9587a405380e2bf889a998e68a1d02","examples/automagic.rs":"f202c7d3c6096a6c883c03d0352a750ad9185811e897046d028dba30aa1dcaf2","examples/dotnet_pe_analysis.rs":"b85ea80e45ac8b3fe9fc6111ab2dee4738878811abad3571192924237d5cd949","examples/dyldinfo.rs":"54b2e04f2f94d5f9c9a8b19cb84f768e339d509472bdcfa317eb9bc3c47caa5f","examples/lipo.rs":"3c2ebe95ac4e38d836f795c65b6e740ae08b751e5ee615bb3feb8620934ca2c9","examples/rdr.rs":"fb1442a6e4678c62983c9a963198a1e67a19169241c43aac840d41192c57a52b","examples/scroll.rs":"2cdb39c29dafd28ed6bfe99cc980480b49fc8e62bef14ab9eb7c45d0d66866d6","src/archive/mod.rs":"8f84e19cbac174b4f34d539755dd87b32b7ef029e5cef667c11b16652664eeef","src/elf/compression_header.rs":"bb6911bccd2d97af8ae721a410f28bc8a2bc6387c412a82909395676cf4a7364","src/elf/constants_header.rs":"cdd0eea9f4617f86f14b57dccf5124ae62376df6efe2bf617f95e4b24c176d1d","src/elf/constants_relocation.rs":"a010071cd2a25ab71e0c7181eb1d9f417daa2d1ec25a09c74bd12ad944892225","src/elf/dynamic.rs":"f2cd1c40257c597058f0ffab7786e7f6d6a18234ccc20e4fb267221ca5fcffce","src/elf/gnu_hash.rs":"718851196316077c270522f4b8e14454af30c364e0ec917f6eb5c2ed6f84f1af","src/elf/header.rs":"27e09865180b20718b30ea57bb60584958c05fdadb626053b0bbc5f733943f4b","src/elf/mod.rs":"e16fcb1c3ef2bcaa2e37168229876a4f085ed420852b93874e8af10694237e73","src/elf/note.rs":"3428ddd17d4ad840ee1f9831ae1913c94b2b82e85d63b45ff7402e3e45200113","src/elf/program_header.rs":"cb14ed59bcf92595ebefaa8752d4449a6f4ec38373ce5709248bd4883690de3c","src/elf/reloc.rs":"5205c33d897de1f8dfad6ba97b597c2a609a78e93fc231508f243ba4eff7ca78","src/elf/section_header.rs":"e379b303869e410060a0f7507bd8cb49dc268105826aae0f92cebb187386b0da","src/elf/sym.rs":"5f50b5eb5c22cc4d53641f8f37106c8c321a08ed90aaedc6e9fa6e1c4175743a","src/error.rs":"308448cc0f6c72f9da0f16ff11c50be98a94f024e86f4607dd6f084374dbaa00","src/lib.rs":"e43e08e9b7cfd0ee8341dd6ed2c718978f3ddab976cab98b5fdadbab315c197c","src/mach/bind_opcodes.rs":"2477021270083fd3d9200adebf5d2a1963b9014c7400be39fb68c8b309e4eebe","src/mach/constants.rs":"36d9011c2db6fac7b561b44350f08e56885fa721329d316e79d1e112e83ebbf5","src/mach/exports.rs":"16910411ca6e13cbe87154b653a9f74ecc3664d198c24b13a6d605ed00901e35","src/mach/fat.rs":"534b01ddeb803217a5e0a8b2bbd7306fd24f5a4ea99f7615714bbeee6b8fb194","src/mach/header.rs":"91d13e0a986933eff9479aafcca46ba2f8fdb042ae6407800e02b3a240b8ae86","src/mach/imports.rs":"d0ecaa49219afa28613759eea0fdf1974d2b91f78502d137a09ccacf4f7c586a","src/mach/load_command.rs":"c04df81c03d1450be4ced724edb48c5b79e78b6c292397bed12e5f7b9109e2b4","src/mach/mod.rs":"9a776bdaf8b5f3f2e8f8a6a658d62266f3233c80c439d1c872334994e40866d1","src/mach/relocation.rs":"9e09da219bd78d9d5caba22a893622b426afa7548472686b7edb7f74aa115eb5","src/mach/segment.rs":"51d1fd608c5ca311c089bc5640daa6f2e6a2a224cca9e2c30d98c72b4b130701","src/mach/symbols.rs":"bd62ce00c94e8c5ce63293404fc51bde3965e2be3015fa0a84632506a4181bd0","src/pe/characteristic.rs":"4fa8a7e6de20795b6d70d80fc25568eb999bb3dd9f9735d1407302a8053b3dd1","src/pe/data_directories.rs":"a5de9ca2b4e23e7644a31554276f5f68eed12c8808617a3e480427cbe5df3504","src/pe/debug.rs":"5a5215f2f341eb476626c2fd457d23f82610b783819ac2667793272ebf78b650","src/pe/exception.rs":"952a4d8380a6f89592707a8c9ff4b152da2a620e53f248d378d6c49134140016","src/pe/export.rs":"720f701057ea92628828ef96f836f5f8024290f017fd9df4e8ef0b4b79ed3eda","src/pe/header.rs":"50c560d0712a2128ed3af6a93f5c7248fbcaa91d1aec58496e3d4446ec51d0ea","src/pe/import.rs":"300502b117279ea5eafea3b1a97a9809ed303bbefcdaed2abd332e4a80142c8e","src/pe/mod.rs":"95fb58479453acd6fae7dfab7d236dee2eeb7d8e9b21abc7e1cf5ccf9486c4a5","src/pe/optional_header.rs":"0d947e997d657f98e4cc737ef3c2cd7e6a4e4e5270a4403b9622ba44d1eda4cd","src/pe/relocation.rs":"c479b80bb1d6910f2168505dda4f2d8925b7edc34bed4e25d069546f88f52bb3","src/pe/section_table.rs":"92eb6ef848b701346181d9c5bd8382114bbcbfef74e67035311310ad1385bee8","src/pe/symbol.rs":"b9ac555f3ad652c39daba17afe9e0474d97ccbb34f24d5f363a9f6dbf483f6a0","src/pe/utils.rs":"624adb9e2baef91e915989ffa29433d09c8d08033b526bae697fe3cec91293ad","src/strtab.rs":"c157ab7b0033d1879ba6c991706dae397b1f202be4087b693d4879d64160ea1a","tests/archive.rs":"8736af2c5b4749067c9aa34ab03a7f063f0f850d77db2619bea4172ab725cee0","tests/compare_dyldinfos.rs":"bd5f3c22a8a7c1563bf23fa12d95868bd630f3ea5ba3ffe659a602de4ec26e39","tests/macho.rs":"4c892dd614646d3bce79c3bbae731e3a8df947ea3082498a3b7b381813d60123"},"package":"e3fa261d919c1ae9d1e4533c4a2f99e10938603c4208d56c05bec7a872b661b0"} \ No newline at end of file diff --git a/third_party/rust/goblin/CHANGELOG.md b/third_party/rust/goblin/CHANGELOG.md index 5a29f1f52cd5..e368511fb443 100644 --- a/third_party/rust/goblin/CHANGELOG.md +++ b/third_party/rust/goblin/CHANGELOG.md @@ -5,6 +5,73 @@ Before 1.0, this project does not adhere to [Semantic Versioning](http://semver. I'm sorry, I will try my best to ease breaking changes. We're almost to 1.0, don't worry! +## [0.0.24] - 2019-7-13 +### Added +- archive: new public enum type to determine which kind of archive was parsed +### Fixed +- archive: thanks @raindev + * fix parsing of windows style archives: https://github.com/m4b/goblin/pull/174 + * stricter parsing of archives with multiple indexes: https://github.com/m4b/goblin/pull/175 + +## [0.0.23] - 2019-6-30 +### Added +- pe: add write support for COFF object files!!! This is huge; we now support at a basic level writing out all major binary object formats, thanks @philipc: https://github.com/m4b/goblin/pull/159 +- elf: add more e_ident constants +- mach: add segment protection constants +- elf: add risc-v relocation constants +- elf: add constants for arm64_32 (ILP32 ABI on 64-bit arm) +- pe: coff relocations and other auxiliary symbol records + +### Fixed +- mach: fix 0 length data sections in mach-o segments, seen in some object files, thanks @raindev: https://github.com/m4b/goblin/pull/172 +- build: alloc build was fixed: https://github.com/m4b/goblin/pull/170 +- pe: fix `set_name_offset` compilation for 32-bit: https://github.com/m4b/goblin/pull/163 + +## [0.0.22] - 2019-4-13 +### Added +- Beautify debugging by using `debug_struct` in `Debug` implementation of many structs. +- PE: fix rva mask, thanks @wickawacka: https://github.com/m4b/goblin/pull/152 +- PE: add PE exception tables, thanks @jan-auer: https://github.com/m4b/goblin/pull/136 + +### Changed +- Bump lowest Rust version to 1.31.1 and transition project to Rust 2018 edition. +- BREAKING: Rename module `goblin::elf::dyn` to `goblin::elf::dynamic` due to `dyn` + become a keyword in Rust 2018 edition. +- BREAKING: Rename `mach::exports::SymbolKind::to_str(kind: SymbolKind)` -> `to_str(&self)`. +- BREAKING: Rename `strtab::Strtab::to_vec(self)` -> `to_vec(&self).` + +### Removed +- BREAKING: `goblin::error::Error::description` would be removed. Use `to_string()` method instead. + +### Fixed +- elf: handle some invalid sizes, thanks @philipc: https://github.com/m4b/goblin/pull/121 + +## [0.0.21] - 2019-2-21 +### Added +- elf: add symbol visibility. thanks @pchickey: https://github.com/m4b/goblin/pull/119 + +## [0.0.20] - 2019-2-10 +### Added +- elf: parse section header relocs even when not an object file. thanks @Techno-Coder: https://github.com/m4b/goblin/pull/118 +- pe: make utils public, add better examples for data directory usage. thanks @Pzixel: https://github.com/m4b/goblin/pull/116 + +## [0.0.19] - 2018-10-23 +### Added +- elf: fix regression when parsing dynamic symbols from some binaries, thanks @philipc: https://github.com/m4b/goblin/issues/111 + +## [0.0.18] - 2018-10-14 +### Changed + - BREAKING: updated required compiler to 1.20 (due to scroll 1.20 requirement) + - BREAKING: elf: removed bias field, as it was misleading/useless/incorrect + - BREAKING: elf: add lazy relocation iterators: Thanks @ibabushkin https://github.com/m4b/goblin/pull/102 + - BREAKING: mach: remove repr(packed) from dylib and fvmlib (this should not affect anyone): https://github.com/m4b/goblin/issues/105 +### Added + - elf: use gnu/sysv hash table to compute sizeof dynsyms more accurately: again _huge_ thanks to @philipc https://github.com/m4b/goblin/pull/109 + - elf: handle multiple load biases: _huge_ thanks @philipc: https://github.com/m4b/goblin/pull/107 + - mach: add arm64e constants: Thanks @mitsuhiko https://github.com/m4b/goblin/pull/103 + - PE: calculate read bytes using alignment: Thanks @tathanhdinh https://github.com/m4b/goblin/pull/101 + - PE: get proper names for PE sections: Thanks @roblabla https://github.com/m4b/goblin/pull/100 + ## [0.0.17] - 2018-7-16 ### Changed - BREAKING: updated required compiler to 1.19 (technically only required for tests, but assume this is required for building as well) diff --git a/third_party/rust/goblin/Cargo.toml b/third_party/rust/goblin/Cargo.toml index 6be2d9800f64..0db947b7a59b 100644 --- a/third_party/rust/goblin/Cargo.toml +++ b/third_party/rust/goblin/Cargo.toml @@ -3,7 +3,7 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g. crates.io) dependencies +# to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're @@ -11,9 +11,10 @@ # will likely look very different (and much more reasonable) [package] +edition = "2018" name = "goblin" -version = "0.0.17" -authors = ["m4b ", "seu ", "Will Glynn "] +version = "0.0.24" +authors = ["m4b ", "seu ", "Will Glynn ", "Philip Craig "] include = ["src/**/*", "Cargo.toml", "CHANGELOG.md", "LICENSE", "README.md", "etc/*", "examples/*", "tests/*", "fuzz/**/*"] description = "An impish, cross-platform, ELF, Mach-o, and PE binary parsing and loading crate" documentation = "https://docs.rs/goblin" @@ -22,8 +23,6 @@ keywords = ["binary", "elf", "mach", "pe", "archive"] categories = ["parsing", "development-tools::debugging"] license = "MIT" repository = "https://github.com/m4b/goblin" - -[lib] [dependencies.log] version = "0.4" optional = true @@ -35,8 +34,6 @@ version = "0.2.3" [dependencies.scroll] version = "0.9" default_features = false -[dev-dependencies.env_logger] -version = "0.5" [features] alloc = ["scroll/derive", "log"] diff --git a/third_party/rust/goblin/README.md b/third_party/rust/goblin/README.md index dab26ffc221d..0cfbf9346fd5 100644 --- a/third_party/rust/goblin/README.md +++ b/third_party/rust/goblin/README.md @@ -1,4 +1,11 @@ -# libgoblin [![Build Status](https://travis-ci.org/m4b/goblin.svg?branch=master)](https://travis-ci.org/m4b/goblin) [![Current Crates.io Version](https://img.shields.io/crates/v/goblin.svg)](https://crates.io/crates/goblin) +# libgoblin [![Build status][travis-badge]][travis-url] [![crates.io version][crates-goblin-badge]][crates-goblin] + + + +[travis-badge]: https://travis-ci.org/m4b/goblin.svg?branch=master +[travis-url]: https://travis-ci.org/m4b/goblin +[crates-goblin-badge]: https://img.shields.io/crates/v/goblin.svg +[crates-goblin]: https://crates.io/crates/goblin ![say the right words](https://s-media-cache-ak0.pinimg.com/736x/1b/6a/aa/1b6aaa2bae005e2fed84b1a7c32ecb1b.jpg) @@ -10,13 +17,13 @@ https://docs.rs/goblin/ ### Usage -Goblin requires `rustc` 1.19. +Goblin requires `rustc` 1.31.1. Add to your `Cargo.toml` ```toml [dependencies] -goblin = "0.0.17" +goblin = "0.0.24" ``` ### Features @@ -25,33 +32,54 @@ goblin = "0.0.17" * zero-copy, cross-platform, endian-aware, ELF64/32 implementation - wow! * zero-copy, cross-platform, endian-aware, 32/64 bit Mach-o parser - zoiks! * PE 32/64-bit parser - bing! -* a Unix _and_ BSD style archive parser (latter courtesy of [@willglynn](https://github.com/willglynn)) - huzzah! +* a Unix _and_ BSD style archive parser (latter courtesy of [@willglynn]) - huzzah! * many cfg options - it will make your head spin, and make you angry when reading the source! -* fuzzed - " I am happy to report that goblin withstood 100 million fuzzing runs, 1 million runs each for seed 1~100." - [@sanxiyn](https://github.com/sanxiyn) +* fuzzed - "I am happy to report that goblin withstood 100 million fuzzing runs, 1 million runs + each for seed 1\~100." - [@sanxiyn] * tests -`libgoblin` aims to be your one-stop shop for binary parsing, loading, -and analysis. +`libgoblin` aims to be your one-stop shop for binary parsing, loading, and analysis. ### Use-cases Goblin primarily supports the following important use cases: -1. Core, std-free `#[repr(C)]` structs, tiny compile time, 32/64 (or both) at your leisure +1. Core, std-free `#[repr(C)]` structs, tiny compile time, 32/64 (or both) at your leisure. -2. Type punning. Define a function once on a type, but have it work on 32 or 64-bit variants - without really changing anything, and no macros! See `examples/automagic.rs` for a basic example. +1. Type punning. Define a function once on a type, but have it work on 32 or 64-bit variants - + without really changing anything, and no macros! See `examples/automagic.rs` for a basic example. -3. `std` mode. This throws in read and write impls via `Pread` and `Pwrite`, reading from file, convenience allocations, extra methods, etc. This is for clients who can allocate and want to read binaries off disk. +1. `std` mode. This throws in read and write impls via `Pread` and `Pwrite`, reading from file, + convenience allocations, extra methods, etc. This is for clients who can allocate and want to + read binaries off disk. -4. `Endian_fd`. A truly terrible name :laughing: this is for binary analysis like in [panopticon](https://github.com/das-labor/panopticon) or [falcon](https://github.com/endeav0r/falcon) which needs to read binaries of foreign endianness, _or_ as a basis for constructing cross platform foreign architecture binutils, e.g. [cargo-sym](https://github.com/m4b/cargo-sym) and [bingrep](https://github.com/m4b/bingrep) are simple examples of this, but the sky is the limit. +1. `Endian_fd`. A truly terrible name :laughing: this is for binary analysis like in [panopticon] + or [falcon] which needs to read binaries of foreign endianness, _or_ as a basis for + constructing cross platform foreign architecture binutils, e.g. [cargo-sym] and [bingrep] are + simple examples of this, but the sky is the limit. Here are some things you could do with this crate (or help to implement so they could be done): -1. write a compiler and use it to [generate binaries](https://github.com/m4b/faerie) (all the raw C structs have [`Pwrite`](https://github.com/m4b/scroll) derived) -2. write a binary analysis tool which loads, parses, and analyzes various binary formats, e.g., [panopticon](https://github.com/das-labor/panopticon) or [falcon](https://github.com/endeav0r/falcon) -3. write a [semi-functioning dynamic linker](http://github.com/m4b/dryad) -4. write a [kernel](https://github.com/redox-os/redox) and load binaries using `no_std` cfg. I.e., it is essentially just struct and const defs (like a C header) - no fd, no output, no std. -5. write a bin2json tool (http://github.com/m4b/bin2json), because why shouldn't binary formats be in JSON? +1. Write a compiler and use it to [generate binaries][faerie] (all the raw C structs have + [`Pwrite`][scroll] derived). +1. Write a binary analysis tool which loads, parses, and analyzes various binary formats, e.g., + [panopticon] or [falcon]. +1. Write a [semi-functioning dynamic linker][dryad]. +1. Write a [kernel][redox-os] and load binaries using `no_std` cfg. I.e., it is essentially just + struct and const defs (like a C header) - no fd, no output, no std. +1. Write a [bin2json] tool, because why shouldn't binary formats be in JSON? + + + +[cargo-sym]: https://github.com/m4b/cargo-sym +[bingrep]: https://github.com/m4b/bingrep +[faerie]: https://github.com/m4b/faerie +[dryad]: https://github.com/m4b/dryad +[scroll]: https://github.com/m4b/scroll +[redox-os]: https://github.com/redox-os/redox +[bin2json]: https://github.com/m4b/bin2json +[panopticon]: https://github.com/das-labor/panopticon +[falcon]: https://github.com/endeav0r/falcon ### Cfgs @@ -73,30 +101,74 @@ Thank you all :heart: ! In alphabetic order: -- [@amanieu](https://github.com/amanieu) -- [@flanfly](https://github.com/flanfly) -- [@jan-auer](https://github.com/jan-auer) -- [@jdub](https://github.com/jdub) -- [@jrmuizel](https://github.com/jrmuizel) - [@kjempelodott](https://github.com/kjempelodott) -- [@le-jzr](https://github.com/le-jzr) -- [@lion128](https://github.com/lion128) -- [@llogiq](https://github.com/llogiq) -- [@mitsuhiko](https://github.com/mitsuhiko) -- [@mre](https://github.com/mre) -- [@philipc](https://github.com/philipc) -- [@rocallahan](https://github.com/rocallahan) -- [@sanxiyn](https://github.com/sanxiyn) -- [@tathanhdinh](https://github.com/tathanhdinh) -- [@ticki](https://github.com/ticki) -- [@willglynn](https://github.com/willglynn) -- [@xcoldhandsx](https://github.com/xcoldhandsx) +- [@amanieu] +- [@burjui] +- [@flanfly] +- [@ibabushkin] +- [@jan-auer] +- [@jdub] +- [@jrmuizel] +- [@kjempelodott] +- [@le-jzr] +- [@lion128] +- [@llogiq] +- [@lzutao] +- [@mitsuhiko] +- [@mre] +- [@pchickey] +- [@philipc] +- [@Pzixel] +- [@raindev] +- [@rocallahan] +- [@sanxiyn] +- [@tathanhdinh] +- [@Techno-coder] +- [@ticki] +- [@wickerwacka] +- [@willglynn] +- [@xcoldhandsx] + + + +[@m4b]: https://github.com/m4b +[@amanieu]: https://github.com/amanieu +[@flanfly]: https://github.com/flanfly +[@ibabushkin]: https://github.com/ibabushkin +[@jan-auer]: https://github.com/jan-auer +[@jdub]: https://github.com/jdub +[@jrmuizel]: https://github.com/jrmuizel +[@kjempelodott]: https://github.com/kjempelodott +[@le-jzr]: https://github.com/le-jzr +[@lion128]: https://github.com/lion128 +[@llogiq]: https://github.com/llogiq +[@mitsuhiko]: https://github.com/mitsuhiko +[@mre]: https://github.com/mre +[@pchickey]: https://github.com/pchickey +[@philipc]: https://github.com/philipc +[@Pzixel]: https://github.com/Pzixel +[@rocallahan]: https://github.com/rocallahan +[@sanxiyn]: https://github.com/sanxiyn +[@tathanhdinh]: https://github.com/tathanhdinh +[@Techno-coder]: https://github.com/Techno-coder +[@ticki]: https://github.com/ticki +[@willglynn]: https://github.com/willglynn +[@xcoldhandsx]: https://github.com/xcoldhandsx +[@lzutao]: https://github.com/lzutao +[@wickerwacka]: https://github.com/wickerwaka +[@raindev]: https://github.com/raindev +[@burjui]: https://github.com/burjui ## Contributing -1. Please prefix commits with the affected binary component; the more specific the better, e.g., if you only modify relocations in the elf module, then do "elf.reloc: added new constants for Z80" -2. Commit messages must explain their change, no generic "changed", or "fix"; if you push commits like this on a PR, be aware @m4b or someone will most likely squash them. -3. If you are making a large change to a module, please raise an issue first and lets discuss; I don't want to waste your time if its not a good technical direction, or etc. -4. If your PR is not getting attention, please respond to all relevant comments raised on the PR, and if still no response, ping @m4b, @philipc, or @willglyn in github and also feel free to email @m4b. -5. Please add tests if you are adding a new feature. Feel free to add tests even if you are not, tests are awesome and easy in rust. -6. Once cargo format is officially released, please format your _patch_ using the default settings. +1. Please prefix commits with the affected binary component; the more specific the better, e.g., + if you only modify relocations in the elf module, then do "elf.reloc: added new constants for Z80" +1. Commit messages must explain their change, no generic "changed", or "fix"; if you push commits + like this on a PR, be aware [@m4b] or someone will most likely squash them. +1. If you are making a large change to a module, please raise an issue first and lets discuss; + I don't want to waste your time if its not a good technical direction, or etc. +1. If your PR is not getting attention, please respond to all relevant comments raised on the PR, + and if still no response, ping [@m4b], [@philipc], or [@willglynn] in github and also feel free + to email [@m4b]. +1. Please add tests if you are adding a new feature. Feel free to add tests even if you are not, + tests are awesome and easy in rust. +1. Once cargo format is officially released, please format your _patch_ using the default settings. diff --git a/third_party/rust/goblin/examples/ar.rs b/third_party/rust/goblin/examples/ar.rs index 8bec3a4c34e7..9346e3e045ad 100644 --- a/third_party/rust/goblin/examples/ar.rs +++ b/third_party/rust/goblin/examples/ar.rs @@ -1,7 +1,5 @@ //cargo run --example=ar -- crt1.a -extern crate goblin; - use goblin::elf; use goblin::archive; use std::env; diff --git a/third_party/rust/goblin/examples/automagic.rs b/third_party/rust/goblin/examples/automagic.rs index c14d5d84def1..cd31fdeb9233 100644 --- a/third_party/rust/goblin/examples/automagic.rs +++ b/third_party/rust/goblin/examples/automagic.rs @@ -1,5 +1,3 @@ -extern crate goblin; - use std::default::Default; // demonstrates "automagical" elf32/64 switches via cfg on arch and pub use hacks. @@ -13,7 +11,7 @@ pub use goblin::elf64 as elf; pub use goblin::elf32 as elf; #[cfg(any(target_pointer_width = "64", target_pointer_width = "32"))] -use elf::{header, sym}; +use crate::elf::{header, sym}; #[cfg(any(target_pointer_width = "64", target_pointer_width = "32"))] fn main() { diff --git a/third_party/rust/goblin/examples/dotnet_pe_analysis.rs b/third_party/rust/goblin/examples/dotnet_pe_analysis.rs new file mode 100644 index 000000000000..63eae537fba2 --- /dev/null +++ b/third_party/rust/goblin/examples/dotnet_pe_analysis.rs @@ -0,0 +1,77 @@ +/// Demonstrates how to read additional metadata (i.e. .Net runtime ones) from PE context + +use goblin::container::Endian; +use goblin::pe::data_directories::DataDirectory; +use goblin::pe::PE; +use goblin::pe::utils::get_data; +use scroll::ctx::TryFromCtx; +use scroll::Pread; + +#[repr(C)] +#[derive(Debug, Pread)] +pub struct CliHeader { + pub cb: u32, + pub major_version: u16, + pub minor_version: u16, + pub metadata: DataDirectory, + pub flags: u32, + pub entry_point_token: u32, +} + +#[repr(C)] +#[derive(Debug)] +struct MetadataRoot<'a> { + pub signature: u32, + pub major_version: u16, + pub minor_version: u16, + _reserved: u32, + pub length: u32, + pub version: &'a str, +} + +impl<'a> TryFromCtx<'a, Endian> for MetadataRoot<'a> { + type Error = scroll::Error; + type Size = usize; + + fn try_from_ctx(src: &'a [u8], endian: Endian) -> Result<(Self, Self::Size), Self::Error> { + let offset = &mut 0; + let signature = src.gread_with(offset, endian)?; + let major_version = src.gread_with(offset, endian)?; + let minor_version = src.gread_with(offset, endian)?; + let reserved = src.gread_with(offset, endian)?; + let length = src.gread_with(offset, endian)?; + let version = src.gread(offset)?; + Ok(( + Self { + signature, + major_version, + minor_version, + _reserved: reserved, + length, + version, + }, + *offset, + )) + } +} + +fn main() { + let file = include_bytes!("../assets/dotnet_executable_example.dll"); + let file = &file[..]; + let pe = PE::parse(file).unwrap(); + if pe.header.coff_header.machine != 0x14c { + panic!("Is not a .Net executable"); + } + let optional_header = pe.header.optional_header.expect("No optional header"); + let file_alignment = optional_header.windows_fields.file_alignment; + let cli_header = optional_header + .data_directories + .get_clr_runtime_header() + .expect("No CLI header"); + let sections = &pe.sections; + + let cli_header_value: CliHeader = get_data(file, sections, cli_header, file_alignment).unwrap(); + println!("{:#?}", cli_header_value); + let metadata_root: MetadataRoot = get_data(file, sections, cli_header_value.metadata, file_alignment).unwrap(); + println!("{:#?}", metadata_root); +} diff --git a/third_party/rust/goblin/examples/dyldinfo.rs b/third_party/rust/goblin/examples/dyldinfo.rs index 6fcbb1c1fc38..384e5e59f2ea 100644 --- a/third_party/rust/goblin/examples/dyldinfo.rs +++ b/third_party/rust/goblin/examples/dyldinfo.rs @@ -1,5 +1,3 @@ -extern crate goblin; - use goblin::mach; use std::env; use std::process; @@ -15,7 +13,7 @@ fn usage() -> ! { process::exit(1); } -fn name_to_str<'a>(name: &'a [u8; 16]) -> Cow<'a, str> { +fn name_to_str(name: &[u8; 16]) -> Cow<'_, str> { for i in 0..16 { if name[i] == 0 { return String::from_utf8_lossy(&name[0..i]) @@ -38,21 +36,19 @@ fn print_binds(sections: &[mach::segment::Section], imports: &[mach::imports::Im println!("bind information:"); println!( - "{:7} {:16} {:14} {:7} {:6} {:16} {}", + "{:7} {:16} {:14} {:7} {:6} {:16} symbol", "segment", "section", "address", "type", "addend", "dylib", - "symbol" ); for import in imports.iter().filter(|i| !i.is_lazy) { // find the section that imported this symbol let section = sections.iter() - .filter(|s| import.address >= s.addr && import.address < (s.addr + s.size)) - .next(); + .find(|s| import.address >= s.addr && import.address < (s.addr + s.size)); // get &strs for its name let (segname, sectname) = section @@ -77,20 +73,18 @@ fn print_lazy_binds(sections: &[mach::segment::Section], imports: &[mach::import println!("lazy binding information (from lazy_bind part of dyld info):"); println!( - "{:7} {:16} {:10} {:6} {:16} {}", + "{:7} {:16} {:10} {:6} {:16} symbol", "segment", "section", "address", "index", "dylib", - "symbol" ); for import in imports.iter().filter(|i| i.is_lazy) { // find the section that imported this symbol let section = sections.iter() - .filter(|s| import.address >= s.addr && import.address < (s.addr + s.size)) - .next(); + .find(|s| import.address >= s.addr && import.address < (s.addr + s.size)); // get &strs for its name let (segname, sectname) = section @@ -129,7 +123,7 @@ fn main () { "-lazy_bind" => { lazy_bind = true } other => { println!("unknown flag: {}", other); - println!(""); + println!(); usage(); } } diff --git a/third_party/rust/goblin/examples/lipo.rs b/third_party/rust/goblin/examples/lipo.rs index c7c8f6b0ad92..72cf7a510791 100644 --- a/third_party/rust/goblin/examples/lipo.rs +++ b/third_party/rust/goblin/examples/lipo.rs @@ -1,5 +1,3 @@ -extern crate goblin; - use goblin::mach::{self, Mach}; use std::env; use std::process; @@ -29,7 +27,7 @@ fn main () { "-m64" => { m64 = true } other => { println!("unknown flag: {}", other); - println!(""); + println!(); usage(); } } diff --git a/third_party/rust/goblin/examples/rdr.rs b/third_party/rust/goblin/examples/rdr.rs index cf63b367fc0f..b5aaefe2c03f 100644 --- a/third_party/rust/goblin/examples/rdr.rs +++ b/third_party/rust/goblin/examples/rdr.rs @@ -1,6 +1,3 @@ -extern crate goblin; -extern crate env_logger; - use goblin::error; use std::path::Path; use std::env; @@ -21,7 +18,6 @@ fn run () -> error::Result<()> { } pub fn main () { - env_logger::init(); match run() { Ok(()) => (), Err(err) => println!("{:#}", err) diff --git a/third_party/rust/goblin/examples/scroll.rs b/third_party/rust/goblin/examples/scroll.rs index d9cdc4cfeadd..a0f7beb61e48 100644 --- a/third_party/rust/goblin/examples/scroll.rs +++ b/third_party/rust/goblin/examples/scroll.rs @@ -4,14 +4,10 @@ /// arbitrary buffers, without learning new crate specific function names /// I.e., all you need are Types + Pread = Happiness -extern crate scroll; -extern crate goblin; - use goblin::{error, elf64, elf}; use scroll::{Pwrite, Pread}; fn run () -> error::Result<()> { - use Pread; let crt1: Vec = include!("../etc/crt1.rs"); let header: elf64::header::Header = crt1.pread(0)?; assert_eq!(header.e_type, elf64::header::ET_REL); @@ -28,7 +24,7 @@ fn run () -> error::Result<()> { let elf = elf::Elf::parse(&crt1)?; println!("elf: {:#?}", &elf); Ok(()) -} +} fn main() { run().unwrap(); diff --git a/third_party/rust/goblin/src/archive/mod.rs b/third_party/rust/goblin/src/archive/mod.rs index 67152bffdbab..af1be190415d 100644 --- a/third_party/rust/goblin/src/archive/mod.rs +++ b/third_party/rust/goblin/src/archive/mod.rs @@ -6,18 +6,18 @@ //! names in the archive with a / as a sigil for the end of the name, and uses a special symbol //! index for looking up symbols faster. -use scroll::{self, Pread}; +use scroll::{Pread, Pwrite, SizeWith}; -use strtab; -use error::{Result, Error}; +use crate::strtab; +use crate::error::{Result, Error}; use core::usize; -use alloc::collections::btree_map::BTreeMap; -use alloc::vec::Vec; +use crate::alloc::collections::btree_map::BTreeMap; +use crate::alloc::vec::Vec; pub const SIZEOF_MAGIC: usize = 8; /// The magic number of a Unix Archive -pub const MAGIC: &'static [u8; SIZEOF_MAGIC] = b"!\x0A"; +pub const MAGIC: &[u8; SIZEOF_MAGIC] = b"!\x0A"; const SIZEOF_FILE_IDENTIFER: usize = 16; const SIZEOF_FILE_SIZE: usize = 10; @@ -71,7 +71,7 @@ impl MemberHeader { Ok(self.identifier.pread_with::<&str>(0, ::scroll::ctx::StrCtx::Length(SIZEOF_FILE_IDENTIFER))?) } pub fn size(&self) -> Result { - match usize::from_str_radix(self.file_size.pread_with::<&str>(0, ::scroll::ctx::StrCtx::Length(self.file_size.len()))?.trim_right(), 10) { + match usize::from_str_radix(self.file_size.pread_with::<&str>(0, ::scroll::ctx::StrCtx::Length(self.file_size.len()))?.trim_end(), 10) { Ok(file_size) => Ok(file_size), Err(err) => Err(Error::Malformed(format!("{:?} Bad file_size in header: {:?}", err, self))) } @@ -101,7 +101,7 @@ impl<'a> Member<'a> { let header_offset = *offset; let name = buffer.pread_with::<&str>(*offset, ::scroll::ctx::StrCtx::Length(SIZEOF_FILE_IDENTIFER))?; let archive_header = buffer.gread::(offset)?; - let mut header = Header { name: name, size: archive_header.size()? }; + let mut header = Header { name, size: archive_header.size()? }; // skip newline padding if we're on an uneven byte boundary if *offset & 1 == 1 { @@ -117,16 +117,16 @@ impl<'a> Member<'a> { header.size -= len; // the name may have trailing NULs which we don't really want to keep - Some(name.trim_right_matches('\0')) + Some(name.trim_end_matches('\0')) } else { None }; Ok(Member { - header: header, + header, header_offset: header_offset as u64, offset: *offset as u64, - bsd_name: bsd_name, + bsd_name, sysv_name: None, }) } @@ -142,7 +142,7 @@ impl<'a> Member<'a> { use core::str::FromStr; if name.len() > 3 && &name[0..3] == "#1/" { - let trimmed_name = &name[3..].trim_right_matches(' '); + let trimmed_name = &name[3..].trim_end_matches(' '); if let Ok(len) = usize::from_str(trimmed_name) { Some(len) } else { @@ -160,7 +160,7 @@ impl<'a> Member<'a> { } else if let Some(ref sysv_name) = self.sysv_name { sysv_name } else { - self.header.name.trim_right_matches(' ').trim_right_matches('/') + self.header.name.trim_end_matches(' ').trim_end_matches('/') } } @@ -186,12 +186,12 @@ pub struct Index<'a> { } /// SysV Archive Variant Symbol Lookup Table "Magic" Name -const INDEX_NAME: &'static str = "/ "; +const INDEX_NAME: &str = "/ "; /// SysV Archive Variant Extended Filename String Table Name -const NAME_INDEX_NAME: &'static str = "// "; +const NAME_INDEX_NAME: &str = "// "; /// BSD symbol definitions -const BSD_SYMDEF_NAME: &'static str = "__.SYMDEF"; -const BSD_SYMDEF_SORTED_NAME: &'static str = "__.SYMDEF SORTED"; +const BSD_SYMDEF_NAME: &str = "__.SYMDEF"; +const BSD_SYMDEF_SORTED_NAME: &str = "__.SYMDEF SORTED"; impl<'a> Index<'a> { /// Parses the given byte buffer into an Index. NB: the buffer must be the start of the index @@ -279,6 +279,33 @@ impl<'a> Index<'a> { strtab: strings, }) } + + // Parses Windows Second Linker Member: + // number of members (m): 4 + // member offsets: 4 * m + // number of symbols (n): 4 + // symbol member indexes: 2 * n + // followed by SysV-style string table + // https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#first-linker-member + pub fn parse_windows_linker_member(buffer: &'a [u8]) -> Result { + let offset = &mut 0; + let members = buffer.gread_with::(offset, scroll::LE)? as usize; + let mut member_offsets = Vec::with_capacity(members); + for _ in 0..members { + member_offsets.push(buffer.gread_with::(offset, scroll::LE)?); + } + let symbols = buffer.gread_with::(offset, scroll::LE)? as usize; + let mut symbol_offsets = Vec::with_capacity(symbols); + for _ in 0..symbols { + symbol_offsets.push(member_offsets[buffer.gread_with::(offset, scroll::LE)? as usize - 1]); + } + let strtab = strtab::Strtab::parse(buffer, *offset, buffer.len() - *offset, 0x0)?; + Ok(Index { + size: symbols, + symbol_indexes: symbol_offsets, + strtab: strtab.to_vec()?, + }) + } } /// Member names greater than 16 bytes are indirectly referenced using a `/ NameIndex<'a> { // This is a total hack, because strtab returns "" if idx == 0, need to change // but previous behavior might rely on this, as ELF strtab's have "" at 0th index... let hacked_size = size + 1; - let strtab = strtab::Strtab::parse(buffer, *offset-1, hacked_size, '\n' as u8)?; + let strtab = strtab::Strtab::parse(buffer, *offset-1, hacked_size, b'\n')?; // precious time was lost when refactoring because strtab::parse doesn't update the mutable seek... *offset += hacked_size - 2; Ok (NameIndex { - strtab: strtab + strtab }) } pub fn get(&self, name: &str) -> Result<&'a str> { - let idx = name.trim_left_matches('/').trim_right(); + let idx = name.trim_start_matches('/').trim_end(); match usize::from_str_radix(idx, 10) { Ok(idx) => { let name = match self.strtab.get(idx+1) { @@ -312,18 +339,33 @@ impl<'a> NameIndex<'a> { }?; if name != "" { - Ok(name.trim_right_matches('/')) + Ok(name.trim_end_matches('/')) } else { - return Err(Error::Malformed(format!("Could not find {:?} in index", name).into())); + Err(Error::Malformed(format!("Could not find {:?} in index", name))) } }, Err (_) => { - return Err(Error::Malformed(format!("Bad name index {:?} in index", name).into())); + Err(Error::Malformed(format!("Bad name index {:?} in index", name))) } } } } +#[derive(Debug, PartialEq)] +/// The type of symbol index can be present in an archive. Can serve as an indication of the +/// archive format. +pub enum IndexType { + /// No symbol index present. + None, + /// SystemV/GNU style symbol index, used on Windows as well. + SysV, + /// Windows specific extension of SysV symbol index, so called Second Linker Member. Has the + /// same member name as SysV symbol index but different structure. + Windows, + /// BSD style symbol index. + BSD, +} + // TODO: add pretty printer fmt::Display with number of members, and names of members, along with // the values of the index symbols once implemented #[derive(Debug)] @@ -337,23 +379,26 @@ pub struct Archive<'a> { member_array: Vec>, members: BTreeMap<&'a str, usize>, // symbol -> member - symbol_index: BTreeMap<&'a str, usize> + symbol_index: BTreeMap<&'a str, usize>, + /// Type of the symbol index that was found in the archive. + index_type: IndexType, } impl<'a> Archive<'a> { pub fn parse(buffer: &'a [u8]) -> Result> { + let mut magic = [0u8; SIZEOF_MAGIC]; let offset = &mut 0usize; buffer.gread_inout(offset, &mut magic)?; if &magic != MAGIC { - use scroll::Pread; - return Err(Error::BadMagic(magic.pread(0)?).into()); + return Err(Error::BadMagic(magic.pread(0)?)); } let mut member_array = Vec::new(); let mut index = Index::default(); + let mut index_type = IndexType::None; let mut sysv_name_index = NameIndex::default(); - while *offset < buffer.len() { + while *offset + 1 < buffer.len() { // realign the cursor to a word boundary, if it's not on one already if *offset & 1 == 1 { *offset += 1; @@ -367,9 +412,25 @@ impl<'a> Archive<'a> { let name = member.raw_name(); if name == INDEX_NAME { let data: &[u8] = buffer.pread_with(member.offset as usize, member.size())?; - index = Index::parse_sysv_index(data)?; + index = match index_type { + IndexType::None => { + index_type = IndexType::SysV; + Index::parse_sysv_index(data)? + }, + IndexType::SysV => { + index_type = IndexType::Windows; + // second symbol index is Microsoft's extension of SysV format + Index::parse_windows_linker_member(data)? + }, + IndexType::BSD => return Err(Error::Malformed("SysV index occurs after BSD index".into())), + IndexType::Windows => return Err(Error::Malformed("More than two Windows Linker members".into())), + } } else if member.bsd_name == Some(BSD_SYMDEF_NAME) || member.bsd_name == Some(BSD_SYMDEF_SORTED_NAME) { + if index_type != IndexType::None { + return Err(Error::Malformed("BSD index occurs after SysV index".into())); + } + index_type = IndexType::BSD; let data: &[u8] = buffer.pread_with(member.offset as usize, member.size())?; index = Index::parse_bsd_symdef(data)?; @@ -403,20 +464,18 @@ impl<'a> Archive<'a> { // build the symbol index, translating symbol names into member indexes let mut symbol_index: BTreeMap<&str, usize> = BTreeMap::new(); for (member_offset, name) in index.symbol_indexes.iter().zip(index.strtab.iter()) { - let name = name.clone(); let member_index = member_index_by_offset[member_offset]; - symbol_index.insert(name, member_index); + symbol_index.insert(&name, member_index); } - let archive = Archive { - index: index, - member_array: member_array, - sysv_name_index: sysv_name_index, - members: members, - symbol_index: symbol_index, - }; - - Ok(archive) + Ok(Archive { + index, + member_array, + sysv_name_index, + members, + symbol_index, + index_type, + }) } /// Get the member named `member` in this archive, if any @@ -434,7 +493,7 @@ impl<'a> Archive<'a> { let bytes = buffer.pread_with(member.offset as usize, member.size())?; Ok(bytes) } else { - Err(Error::Malformed(format!("Cannot extract member {:?}", member).into())) + Err(Error::Malformed(format!("Cannot extract member {:?}", member))) } } @@ -457,7 +516,7 @@ impl<'a> Archive<'a> { /// Get the list of member names in this archive pub fn members(&self) -> Vec<&'a str> { - self.members.keys().map(|s| *s).collect() + self.members.keys().cloned().collect() } /// Returns the member's name which contains the given `symbol`, if it is in the archive diff --git a/third_party/rust/goblin/src/elf/compression_header.rs b/third_party/rust/goblin/src/elf/compression_header.rs index 6e86122d2cf1..18e6dc32785e 100644 --- a/third_party/rust/goblin/src/elf/compression_header.rs +++ b/third_party/rust/goblin/src/elf/compression_header.rs @@ -6,11 +6,11 @@ macro_rules! elf_compression_header { impl ::core::fmt::Debug for CompressionHeader { fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result { - write!(f, - "ch_type: {} ch_size: 0x{} ch_addralign: 0x{:x}", - self.ch_type, - self.ch_size, - self.ch_addralign) + f.debug_struct("CompressionHeader") + .field("ch_type", &self.ch_type) + .field("ch_size", &format_args!("0x{:x}", self.ch_size)) + .field("ch_addralign", &format_args!("0x{:x}", self.ch_addralign)) + .finish() } } } @@ -19,18 +19,18 @@ macro_rules! elf_compression_header { /// ZLIB/DEFLATE algorithm. pub const ELFCOMPRESS_ZLIB: u32 = 1; /// Start of OS-specific. -pub const ELFCOMPRESS_LOOS: u32 = 0x60000000; +pub const ELFCOMPRESS_LOOS: u32 = 0x6000_0000; /// End of OS-specific. -pub const ELFCOMPRESS_HIOS: u32 = 0x6fffffff; +pub const ELFCOMPRESS_HIOS: u32 = 0x6fff_ffff; /// Start of processor-specific. -pub const ELFCOMPRESS_LOPROC: u32 = 0x70000000; +pub const ELFCOMPRESS_LOPROC: u32 = 0x7000_0000; /// End of processor-specific. -pub const ELFCOMPRESS_HIPROC: u32 = 0x7fffffff; +pub const ELFCOMPRESS_HIPROC: u32 = 0x7fff_ffff; macro_rules! elf_compression_header_std_impl { ($size:ty) => { #[cfg(test)] - mod test { + mod tests { use super::*; #[test] fn size_of() { @@ -39,12 +39,12 @@ macro_rules! elf_compression_header_std_impl { ($size:ty) => { } if_alloc! { - use elf::compression_header::CompressionHeader as ElfCompressionHeader; + use crate::elf::compression_header::CompressionHeader as ElfCompressionHeader; use plain::Plain; if_std! { - use error::Result; + use crate::error::Result; use std::fs::File; use std::io::{Read, Seek}; @@ -55,8 +55,8 @@ macro_rules! elf_compression_header_std_impl { ($size:ty) => { fn from(ch: CompressionHeader) -> Self { ElfCompressionHeader { ch_type: ch.ch_type, - ch_size: ch.ch_size as u64, - ch_addralign: ch.ch_addralign as u64, + ch_size: u64::from(ch.ch_size), + ch_addralign: u64::from(ch.ch_addralign), } } } @@ -71,9 +71,9 @@ macro_rules! elf_compression_header_std_impl { ($size:ty) => { #[cfg(feature = "std")] pub fn from_fd(fd: &mut File, offset: u64) -> Result { let mut chdr = CompressionHeader::default(); - try!(fd.seek(Start(offset))); + fd.seek(Start(offset))?; unsafe { - try!(fd.read(plain::as_mut_bytes(&mut chdr))); + fd.read_exact(plain::as_mut_bytes(&mut chdr))?; } Ok(chdr) } @@ -81,9 +81,11 @@ macro_rules! elf_compression_header_std_impl { ($size:ty) => { } // end if_alloc };} +#[cfg(feature = "alloc")] +use scroll::{Pread, Pwrite, SizeWith}; pub mod compression_header32 { - pub use elf::compression_header::*; + pub use crate::elf::compression_header::*; #[repr(C)] #[derive(Copy, Clone, Eq, PartialEq, Default)] @@ -119,7 +121,7 @@ pub mod compression_header32 { pub mod compression_header64 { - pub use elf::compression_header::*; + pub use crate::elf::compression_header::*; #[repr(C)] #[derive(Copy, Clone, Eq, PartialEq, Default)] @@ -160,11 +162,12 @@ pub mod compression_header64 { /////////////////////////////// if_alloc! { - use error; + #[cfg(feature = "endian_fd")] + use crate::error; use core::fmt; use core::result; use scroll::ctx; - use container::{Container, Ctx}; + use crate::container::{Container, Ctx}; #[derive(Default, PartialEq, Clone)] /// A unified CompressionHeader - convertable to and from 32-bit and 64-bit variants @@ -180,9 +183,9 @@ if_alloc! { impl CompressionHeader { /// Return the size of the underlying compression header, given a `container` #[inline] - pub fn size(ctx: &Ctx) -> usize { + pub fn size(ctx: Ctx) -> usize { use scroll::ctx::SizeWith; - Self::size_with(ctx) + Self::size_with(&ctx) } pub fn new() -> Self { CompressionHeader { @@ -201,11 +204,11 @@ if_alloc! { impl fmt::Debug for CompressionHeader { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, - "ch_type: {} ch_size: 0x{} ch_addralign: 0x{:x}", - self.ch_type, - self.ch_size, - self.ch_addralign) + f.debug_struct("CompressionHeader") + .field("ch_type", &self.ch_type) + .field("ch_size", &format_args!("0x{:x}", self.ch_size)) + .field("ch_addralign", &format_args!("0x{:x}", self.ch_addralign)) + .finish() } } @@ -224,7 +227,7 @@ if_alloc! { } impl<'a> ctx::TryFromCtx<'a, Ctx> for CompressionHeader { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_from_ctx(bytes: &'a [u8], Ctx {container, le}: Ctx) -> result::Result<(Self, Self::Size), Self::Error> { use scroll::Pread; @@ -241,7 +244,7 @@ if_alloc! { } impl ctx::TryIntoCtx for CompressionHeader { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_into_ctx(self, bytes: &mut [u8], Ctx {container, le}: Ctx) -> result::Result { use scroll::Pwrite; diff --git a/third_party/rust/goblin/src/elf/constants_relocation.rs b/third_party/rust/goblin/src/elf/constants_relocation.rs index 0cd8eecad833..6339de5434d6 100644 --- a/third_party/rust/goblin/src/elf/constants_relocation.rs +++ b/third_party/rust/goblin/src/elf/constants_relocation.rs @@ -794,9 +794,120 @@ pub const R_MIPS_COPY: u32 = 126; pub const R_MIPS_JUMP_SLOT: u32 = 127; pub const R_MIPS_NUM: u32 = 128; +/////////////////// +// RISC-V +// See https://github.com/riscv/riscv-elf-psabi-doc +/////////////////// +/// None +pub const R_RISCV_NONE: u32 = 0; +/// Runtime relocation: word32 = S + A +pub const R_RISCV_32: u32 = 1; +/// Runtime relocation: word64 = S + A +pub const R_RISCV_64: u32 = 2; +/// Runtime relocation: word32,64 = B + A +pub const R_RISCV_RELATIVE: u32 = 3; +/// Runtime relocation: must be in executable, not allowed in shared library +pub const R_RISCV_COPY: u32 = 4; +/// Runtime relocation: word32,64 = S; handled by PLT unless LD_BIND_NOW +pub const R_RISCV_JUMP_SLOT: u32 = 5; +/// TLS relocation: word32 = S->TLSINDEX +pub const R_RISCV_TLS_DTPMOD32: u32 = 6; +/// TLS relocation: word64 = S->TLSINDEX +pub const R_RISCV_TLS_DTPMOD64: u32 = 7; +/// TLS relocation: word32 = TLS + S + A - TLS_TP_OFFSET +pub const R_RISCV_TLS_DTPREL32: u32 = 8; +/// TLS relocation: word64 = TLS + S + A - TLS_TP_OFFSET +pub const R_RISCV_TLS_DTPREL64: u32 = 9; +/// TLS relocation: word32 = TLS + S + A + S_TLS_OFFSET - TLS_DTV_OFFSET +pub const R_RISCV_TLS_TPREL32: u32 = 10; +/// TLS relocation: word64 = TLS + S + A + S_TLS_OFFSET - TLS_DTV_OFFSET +pub const R_RISCV_TLS_TPREL64: u32 = 11; +/// PC-relative branch (SB-Type) +pub const R_RISCV_BRANCH: u32 = 16; +/// PC-relative jump (UJ-Type) +pub const R_RISCV_JAL: u32 = 17; +/// PC-relative call: MACRO call,tail (auipc+jalr pair) +pub const R_RISCV_CALL: u32 = 18; +/// PC-relative call (PLT): MACRO call,tail (auipc+jalr pair) PIC +pub const R_RISCV_CALL_PLT: u32 = 19; +/// PC-relative GOT reference: MACRO la +pub const R_RISCV_GOT_HI20: u32 = 20; +/// PC-relative TLS IE GOT offset: MACRO la.tls.ie +pub const R_RISCV_TLS_GOT_HI20: u32 = 21; +/// PC-relative TLS GD reference: MACRO la.tls.gd +pub const R_RISCV_TLS_GD_HI20: u32 = 22; +/// PC-relative reference: %pcrel_hi(symbol) (U-Type) +pub const R_RISCV_PCREL_HI20: u32 = 23; +/// PC-relative reference: %pcrel_lo(symbol) (I-Type) +pub const R_RISCV_PCREL_LO12_I: u32 = 24; +/// PC-relative reference: %pcrel_lo(symbol) (S-Type) +pub const R_RISCV_PCREL_LO12_S: u32 = 25; +/// Absolute address: %hi(symbol) (U-Type) +pub const R_RISCV_HI20: u32 = 26; +/// Absolute address: %lo(symbol) (I-Type) +pub const R_RISCV_LO12_I: u32 = 27; +/// Absolute address: %lo(symbol) (S-Type) +pub const R_RISCV_LO12_S: u32 = 28; +/// TLS LE thread offset: %tprel_hi(symbol) (U-Type) +pub const R_RISCV_TPREL_HI20: u32 = 29; +/// TLS LE thread offset: %tprel_lo(symbol) (I-Type) +pub const R_RISCV_TPREL_LO12_I: u32 = 30; +/// TLS LE thread offset: %tprel_lo(symbol) (S-Type) +pub const R_RISCV_TPREL_LO12_S: u32 = 31; +/// TLS LE thread usage: %tprel_add(symbol) +pub const R_RISCV_TPREL_ADD: u32 = 32; +/// 8-bit label addition: word8 = S + A +pub const R_RISCV_ADD8: u32 = 33; +/// 16-bit label addition: word16 = S + A +pub const R_RISCV_ADD16: u32 = 34; +/// 32-bit label addition: word32 = S + A +pub const R_RISCV_ADD32: u32 = 35; +/// 64-bit label addition: word64 = S + A +pub const R_RISCV_ADD64: u32 = 36; +/// 8-bit label subtraction: word8 = S - A +pub const R_RISCV_SUB8: u32 = 37; +/// 16-bit label subtraction: word16 = S - A +pub const R_RISCV_SUB16: u32 = 38; +/// 32-bit label subtraction: word32 = S - A +pub const R_RISCV_SUB32: u32 = 39; +/// 64-bit label subtraction: word64 = S - A +pub const R_RISCV_SUB64: u32 = 40; +/// GNU C++ vtable hierarchy +pub const R_RISCV_GNU_VTINHERIT: u32 = 41; +/// GNU C++ vtable member usage +pub const R_RISCV_GNU_VTENTRY: u32 = 42; +/// Alignment statement +pub const R_RISCV_ALIGN: u32 = 43; +/// PC-relative branch offset (CB-Type) +pub const R_RISCV_RVC_BRANCH: u32 = 44; +/// PC-relative jump offset (CJ-Type) +pub const R_RISCV_RVC_JUMP: u32 = 45; +/// Absolute address (CI-Type) +pub const R_RISCV_RVC_LUI: u32 = 46; +/// GP-relative reference (I-Type) +pub const R_RISCV_GPREL_I: u32 = 47; +/// GP-relative reference (S-Type) +pub const R_RISCV_GPREL_S: u32 = 48; +/// TP-relative TLS LE load (I-Type) +pub const R_RISCV_TPREL_I: u32 = 49; +/// TP-relative TLS LE store (S-Type) +pub const R_RISCV_TPREL_S: u32 = 50; +/// Instruction pair can be relaxed +pub const R_RISCV_RELAX: u32 = 51; +/// Local label subtraction +pub const R_RISCV_SUB6: u32 = 52; +/// Local label subtraction +pub const R_RISCV_SET6: u32 = 53; +/// Local label subtraction +pub const R_RISCV_SET8: u32 = 54; +/// Local label subtraction +pub const R_RISCV_SET16: u32 = 55; +/// Local label subtraction +pub const R_RISCV_SET32: u32 = 56; + #[inline] pub fn r_to_str(typ: u32, machine: u16) -> &'static str { - use elf::header::*; + use crate::elf::header::*; match machine { // x86 EM_386 => { match typ { @@ -1243,6 +1354,64 @@ pub fn r_to_str(typ: u32, machine: u16) -> &'static str { R_MIPS_COPY => "R_MIPS_COPY", R_MIPS_JUMP_SLOT => "R_MIPS_JUMP_SLOT", _ => "R_UNKNOWN_MIPS", - }} _ => "R_UNKNOWN" + }}, + // RISC-V + EM_RISCV => { match typ { + R_RISCV_NONE => "R_RISCV_NONE", + R_RISCV_32 => "R_RISCV_32", + R_RISCV_64 => "R_RISCV_64", + R_RISCV_RELATIVE => "R_RISCV_RELATIVE", + R_RISCV_COPY => "R_RISCV_COPY", + R_RISCV_JUMP_SLOT => "R_RISCV_JUMP_SLOT", + R_RISCV_TLS_DTPMOD32 => "R_RISCV_TLS_DTPMOD32", + R_RISCV_TLS_DTPMOD64 => "R_RISCV_TLS_DTPMOD64", + R_RISCV_TLS_DTPREL32 => "R_RISCV_TLS_DTPREL32", + R_RISCV_TLS_DTPREL64 => "R_RISCV_TLS_DTPREL64", + R_RISCV_TLS_TPREL32 => "R_RISCV_TLS_TPREL32", + R_RISCV_TLS_TPREL64 => "R_RISCV_TLS_TPREL64", + R_RISCV_BRANCH => "R_RISCV_BRANCH", + R_RISCV_JAL => "R_RISCV_JAL", + R_RISCV_CALL => "R_RISCV_CALL", + R_RISCV_CALL_PLT => "R_RISCV_CALL_PLT", + R_RISCV_GOT_HI20 => "R_RISCV_GOT_HI20", + R_RISCV_TLS_GOT_HI20 => "R_RISCV_TLS_GOT_HI20", + R_RISCV_TLS_GD_HI20 => "R_RISCV_TLS_GD_HI20", + R_RISCV_PCREL_HI20 => "R_RISCV_PCREL_HI20", + R_RISCV_PCREL_LO12_I => "R_RISCV_PCREL_LO12_I", + R_RISCV_PCREL_LO12_S => "R_RISCV_PCREL_LO12_S", + R_RISCV_HI20 => "R_RISCV_HI20", + R_RISCV_LO12_I => "R_RISCV_LO12_I", + R_RISCV_LO12_S => "R_RISCV_LO12_S", + R_RISCV_TPREL_HI20 => "R_RISCV_TPREL_HI20", + R_RISCV_TPREL_LO12_I => "R_RISCV_TPREL_LO12_I", + R_RISCV_TPREL_LO12_S => "R_RISCV_TPREL_LO12_S", + R_RISCV_TPREL_ADD => "R_RISCV_TPREL_ADD", + R_RISCV_ADD8 => "R_RISCV_ADD8", + R_RISCV_ADD16 => "R_RISCV_ADD16", + R_RISCV_ADD32 => "R_RISCV_ADD32", + R_RISCV_ADD64 => "R_RISCV_ADD64", + R_RISCV_SUB8 => "R_RISCV_SUB8", + R_RISCV_SUB16 => "R_RISCV_SUB16", + R_RISCV_SUB32 => "R_RISCV_SUB32", + R_RISCV_SUB64 => "R_RISCV_SUB64", + R_RISCV_GNU_VTINHERIT => "R_RISCV_GNU_VTINHERIT", + R_RISCV_GNU_VTENTRY => "R_RISCV_GNU_VTENTRY", + R_RISCV_ALIGN => "R_RISCV_ALIGN", + R_RISCV_RVC_BRANCH => "R_RISCV_RVC_BRANCH", + R_RISCV_RVC_JUMP => "R_RISCV_RVC_JUMP", + R_RISCV_RVC_LUI => "R_RISCV_RVC_LUI", + R_RISCV_GPREL_I => "R_RISCV_GPREL_I", + R_RISCV_GPREL_S => "R_RISCV_GPREL_S", + R_RISCV_TPREL_I => "R_RISCV_TPREL_I", + R_RISCV_TPREL_S => "R_RISCV_TPREL_S", + R_RISCV_RELAX => "R_RISCV_RELAX", + R_RISCV_SUB6 => "R_RISCV_SUB6", + R_RISCV_SET6 => "R_RISCV_SET6", + R_RISCV_SET8 => "R_RISCV_SET8", + R_RISCV_SET16 => "R_RISCV_SET16", + R_RISCV_SET32 => "R_RISCV_SET32", + _ => "R_UNKNOWN_RISCV", + }}, + _ => "R_UNKNOWN", } } diff --git a/third_party/rust/goblin/src/elf/dyn.rs b/third_party/rust/goblin/src/elf/dynamic.rs similarity index 57% rename from third_party/rust/goblin/src/elf/dyn.rs rename to third_party/rust/goblin/src/elf/dynamic.rs index ff6d2936a4d7..52ca0bd0254d 100644 --- a/third_party/rust/goblin/src/elf/dyn.rs +++ b/third_party/rust/goblin/src/elf/dynamic.rs @@ -1,6 +1,8 @@ macro_rules! elf_dyn { ($size:ty) => { + #[cfg(feature = "alloc")] + use scroll::{Pread, Pwrite, SizeWith}; #[repr(C)] #[derive(Copy, Clone, PartialEq, Default)] #[cfg_attr(feature = "alloc", derive(Pread, Pwrite, SizeWith))] @@ -94,13 +96,13 @@ pub const DT_PREINIT_ARRAYSZ: u64 = 33; /// Number used pub const DT_NUM: u64 = 34; /// Start of OS-specific -pub const DT_LOOS: u64 = 0x6000000d; +pub const DT_LOOS: u64 = 0x6000_000d; /// End of OS-specific -pub const DT_HIOS: u64 = 0x6ffff000; +pub const DT_HIOS: u64 = 0x6fff_f000; /// Start of processor-specific -pub const DT_LOPROC: u64 = 0x70000000; +pub const DT_LOPROC: u64 = 0x7000_0000; /// End of processor-specific -pub const DT_HIPROC: u64 = 0x7fffffff; +pub const DT_HIPROC: u64 = 0x7fff_ffff; // Most used by any processor // pub const DT_PROCNUM: u64 = DT_MIPS_NUM; @@ -109,49 +111,49 @@ pub const DT_HIPROC: u64 = 0x7fffffff; /// /// If any adjustment is made to the ELF object after it has been /// built these entries will need to be adjusted. -pub const DT_ADDRRNGLO: u64 = 0x6ffffe00; +pub const DT_ADDRRNGLO: u64 = 0x6fff_fe00; /// GNU-style hash table -pub const DT_GNU_HASH: u64 = 0x6ffffef5; +pub const DT_GNU_HASH: u64 = 0x6fff_fef5; /// -pub const DT_TLSDESC_PLT: u64 = 0x6ffffef6; +pub const DT_TLSDESC_PLT: u64 = 0x6fff_fef6; /// -pub const DT_TLSDESC_GOT: u64 = 0x6ffffef7; +pub const DT_TLSDESC_GOT: u64 = 0x6fff_fef7; /// Start of conflict section -pub const DT_GNU_CONFLICT: u64 = 0x6ffffef8; +pub const DT_GNU_CONFLICT: u64 = 0x6fff_fef8; /// Library list -pub const DT_GNU_LIBLIST: u64 = 0x6ffffef9; +pub const DT_GNU_LIBLIST: u64 = 0x6fff_fef9; /// Configuration information -pub const DT_CONFIG: u64 = 0x6ffffefa; +pub const DT_CONFIG: u64 = 0x6fff_fefa; /// Dependency auditing -pub const DT_DEPAUDIT: u64 = 0x6ffffefb; +pub const DT_DEPAUDIT: u64 = 0x6fff_fefb; /// Object auditing -pub const DT_AUDIT: u64 = 0x6ffffefc; +pub const DT_AUDIT: u64 = 0x6fff_fefc; /// PLT padding -pub const DT_PLTPAD: u64 = 0x6ffffefd; +pub const DT_PLTPAD: u64 = 0x6fff_fefd; /// Move table -pub const DT_MOVETAB: u64 = 0x6ffffefe; +pub const DT_MOVETAB: u64 = 0x6fff_fefe; /// Syminfo table -pub const DT_SYMINFO: u64 = 0x6ffffeff; +pub const DT_SYMINFO: u64 = 0x6fff_feff; /// -pub const DT_ADDRRNGHI: u64 = 0x6ffffeff; +pub const DT_ADDRRNGHI: u64 = 0x6fff_feff; //DT_ADDRTAGIDX(tag) (DT_ADDRRNGHI - (tag)) /* Reverse order! */ pub const DT_ADDRNUM: u64 = 11; /// The versioning entry types. The next are defined as part of the GNU extension -pub const DT_VERSYM: u64 = 0x6ffffff0; -pub const DT_RELACOUNT: u64 = 0x6ffffff9; -pub const DT_RELCOUNT: u64 = 0x6ffffffa; +pub const DT_VERSYM: u64 = 0x6fff_fff0; +pub const DT_RELACOUNT: u64 = 0x6fff_fff9; +pub const DT_RELCOUNT: u64 = 0x6fff_fffa; /// State flags, see DF_1_* below -pub const DT_FLAGS_1: u64 = 0x6ffffffb; +pub const DT_FLAGS_1: u64 = 0x6fff_fffb; /// Address of version definition table -pub const DT_VERDEF: u64 = 0x6ffffffc; +pub const DT_VERDEF: u64 = 0x6fff_fffc; /// Number of version definitions -pub const DT_VERDEFNUM: u64 = 0x6ffffffd; +pub const DT_VERDEFNUM: u64 = 0x6fff_fffd; /// Address of table with needed versions -pub const DT_VERNEED: u64 = 0x6ffffffe; +pub const DT_VERNEED: u64 = 0x6fff_fffe; /// Number of needed versions -pub const DT_VERNEEDNUM: u64 = 0x6fffffff; +pub const DT_VERNEEDNUM: u64 = 0x6fff_ffff; /// Converts a tag to its string representation. #[inline] @@ -210,75 +212,74 @@ pub fn tag_to_str(tag: u64) -> &'static str { // Values of `d_un.d_val` in the DT_FLAGS entry /// Object may use DF_ORIGIN. -pub const DF_ORIGIN: u64 = 0x00000001; +pub const DF_ORIGIN: u64 = 0x0000_0001; /// Symbol resolutions starts here. -pub const DF_SYMBOLIC: u64 = 0x00000002; +pub const DF_SYMBOLIC: u64 = 0x0000_0002; /// Object contains text relocations. -pub const DF_TEXTREL: u64 = 0x00000004; +pub const DF_TEXTREL: u64 = 0x0000_0004; /// No lazy binding for this object. -pub const DF_BIND_NOW: u64 = 0x00000008; +pub const DF_BIND_NOW: u64 = 0x0000_0008; /// Module uses the static TLS model. -pub const DF_STATIC_TLS: u64 = 0x00000010; +pub const DF_STATIC_TLS: u64 = 0x0000_0010; /// === State flags === /// selectable in the `d_un.d_val` element of the DT_FLAGS_1 entry in the dynamic section. /// /// Set RTLD_NOW for this object. -pub const DF_1_NOW: u64 = 0x00000001; +pub const DF_1_NOW: u64 = 0x0000_0001; /// Set RTLD_GLOBAL for this object. -pub const DF_1_GLOBAL: u64 = 0x00000002; +pub const DF_1_GLOBAL: u64 = 0x0000_0002; /// Set RTLD_GROUP for this object. -pub const DF_1_GROUP: u64 = 0x00000004; +pub const DF_1_GROUP: u64 = 0x0000_0004; /// Set RTLD_NODELETE for this object. -pub const DF_1_NODELETE: u64 = 0x00000008; +pub const DF_1_NODELETE: u64 = 0x0000_0008; /// Trigger filtee loading at runtime. -pub const DF_1_LOADFLTR: u64 = 0x00000010; +pub const DF_1_LOADFLTR: u64 = 0x0000_0010; /// Set RTLD_INITFIRST for this object. -pub const DF_1_INITFIRST: u64 = 0x00000020; +pub const DF_1_INITFIRST: u64 = 0x0000_0020; /// Set RTLD_NOOPEN for this object. -pub const DF_1_NOOPEN: u64 = 0x00000040; +pub const DF_1_NOOPEN: u64 = 0x0000_0040; /// $ORIGIN must be handled. -pub const DF_1_ORIGIN: u64 = 0x00000080; +pub const DF_1_ORIGIN: u64 = 0x0000_0080; /// Direct binding enabled. -pub const DF_1_DIRECT: u64 = 0x00000100; -pub const DF_1_TRANS: u64 = 0x00000200; +pub const DF_1_DIRECT: u64 = 0x0000_0100; +pub const DF_1_TRANS: u64 = 0x0000_0200; /// Object is used to interpose. -pub const DF_1_INTERPOSE: u64 = 0x00000400; +pub const DF_1_INTERPOSE: u64 = 0x0000_0400; /// Ignore default lib search path. -pub const DF_1_NODEFLIB: u64 = 0x00000800; +pub const DF_1_NODEFLIB: u64 = 0x0000_0800; /// Object can't be dldump'ed. -pub const DF_1_NODUMP: u64 = 0x00001000; +pub const DF_1_NODUMP: u64 = 0x0000_1000; /// Configuration alternative created. -pub const DF_1_CONFALT: u64 = 0x00002000; +pub const DF_1_CONFALT: u64 = 0x0000_2000; /// Filtee terminates filters search. -pub const DF_1_ENDFILTEE: u64 = 0x00004000; +pub const DF_1_ENDFILTEE: u64 = 0x0000_4000; /// Disp reloc applied at build time. -pub const DF_1_DISPRELDNE: u64 = 0x00008000; +pub const DF_1_DISPRELDNE: u64 = 0x0000_8000; /// Disp reloc applied at run-time. -pub const DF_1_DISPRELPND: u64 = 0x00010000; +pub const DF_1_DISPRELPND: u64 = 0x0001_0000; /// Object has no-direct binding. -pub const DF_1_NODIRECT: u64 = 0x00020000; -pub const DF_1_IGNMULDEF: u64 = 0x00040000; -pub const DF_1_NOKSYMS: u64 = 0x00080000; -pub const DF_1_NOHDR: u64 = 0x00100000; +pub const DF_1_NODIRECT: u64 = 0x0002_0000; +pub const DF_1_IGNMULDEF: u64 = 0x0004_0000; +pub const DF_1_NOKSYMS: u64 = 0x0008_0000; +pub const DF_1_NOHDR: u64 = 0x0010_0000; /// Object is modified after built. -pub const DF_1_EDITED: u64 = 0x00200000; -pub const DF_1_NORELOC: u64 = 0x00400000; +pub const DF_1_EDITED: u64 = 0x0020_0000; +pub const DF_1_NORELOC: u64 = 0x0040_0000; /// Object has individual interposers. -pub const DF_1_SYMINTPOSE: u64 = 0x00800000; +pub const DF_1_SYMINTPOSE: u64 = 0x0080_0000; /// Global auditing required. -pub const DF_1_GLOBAUDIT: u64 = 0x01000000; +pub const DF_1_GLOBAUDIT: u64 = 0x0100_0000; /// Singleton dyn are used. -pub const DF_1_SINGLETON: u64 = 0x02000000; +pub const DF_1_SINGLETON: u64 = 0x0200_0000; if_alloc! { use core::fmt; use scroll::ctx; use core::result; - use container::{Ctx, Container}; - use strtab::Strtab; - use self::dyn32::{DynamicInfo}; - use alloc::vec::Vec; + use crate::container::{Ctx, Container}; + use crate::strtab::Strtab; + use crate::alloc::vec::Vec; #[derive(Default, PartialEq, Clone)] pub struct Dyn { @@ -296,10 +297,10 @@ if_alloc! { impl fmt::Debug for Dyn { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, - "d_tag: {} d_val: 0x{:x}", - tag_to_str(self.d_tag as u64), - self.d_val) + f.debug_struct("Dyn") + .field("d_tag", &tag_to_str(self.d_tag)) + .field("d_val", &format_args!("0x{:x}", self.d_val)) + .finish() } } @@ -318,11 +319,11 @@ if_alloc! { } impl<'a> ctx::TryFromCtx<'a, Ctx> for Dyn { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_from_ctx(bytes: &'a [u8], Ctx { container, le}: Ctx) -> result::Result<(Self, Self::Size), Self::Error> { use scroll::Pread; - let dyn = match container { + let dynamic = match container { Container::Little => { (bytes.pread_with::(0, le)?.into(), dyn32::SIZEOF_DYN) }, @@ -330,23 +331,23 @@ if_alloc! { (bytes.pread_with::(0, le)?.into(), dyn64::SIZEOF_DYN) } }; - Ok(dyn) + Ok(dynamic) } } impl ctx::TryIntoCtx for Dyn { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_into_ctx(self, bytes: &mut [u8], Ctx { container, le}: Ctx) -> result::Result { use scroll::Pwrite; match container { Container::Little => { - let dyn: dyn32::Dyn = self.into(); - Ok(bytes.pwrite_with(dyn, 0, le)?) + let dynamic: dyn32::Dyn = self.into(); + Ok(bytes.pwrite_with(dynamic, 0, le)?) }, Container::Big => { - let dyn: dyn64::Dyn = self.into(); - Ok(bytes.pwrite_with(dyn, 0, le)?) + let dynamic: dyn64::Dyn = self.into(); + Ok(bytes.pwrite_with(dynamic, 0, le)?) } } } @@ -362,27 +363,32 @@ if_alloc! { impl Dynamic { #[cfg(feature = "endian_fd")] /// Returns a vector of dynamic entries from the underlying byte `bytes`, with `endianness`, using the provided `phdrs` - pub fn parse(bytes: &[u8], phdrs: &[::elf::program_header::ProgramHeader], bias: usize, ctx: Ctx) -> ::error::Result> { + pub fn parse(bytes: &[u8], phdrs: &[crate::elf::program_header::ProgramHeader], ctx: Ctx) -> crate::error::Result> { use scroll::ctx::SizeWith; use scroll::Pread; - use elf::program_header; + use crate::elf::program_header; for phdr in phdrs { if phdr.p_type == program_header::PT_DYNAMIC { + let offset = phdr.p_offset as usize; let filesz = phdr.p_filesz as usize; + // Ensure offset and filesz are valid. + let bytes = bytes + .pread_with::<&[u8]>(offset, filesz) + .map_err(|_| crate::error::Error::Malformed(format!("Invalid PT_DYNAMIC size (offset {:#x}, filesz {:#x})", + offset, filesz)))?; let size = Dyn::size_with(&ctx); let count = filesz / size; let mut dyns = Vec::with_capacity(count); - let mut offset = phdr.p_offset as usize; + let mut offset = 0; for _ in 0..count { - let dyn = bytes.gread_with::(&mut offset, ctx)?; - let tag = dyn.d_tag; - dyns.push(dyn); + let dynamic = bytes.gread_with::(&mut offset, ctx)?; + let tag = dynamic.d_tag; + dyns.push(dynamic); if tag == DT_NULL { break } } let mut info = DynamicInfo::default(); - for dyn in &dyns { - let dyn: dyn32::Dyn = dyn.clone().into(); - info.update(bias, &dyn); + for dynamic in &dyns { + info.update(phdrs, dynamic); } let count = dyns.len(); return Ok(Some(Dynamic { dyns: dyns, info: info, count: count })); @@ -392,14 +398,15 @@ if_alloc! { } pub fn get_libraries<'a>(&self, strtab: &Strtab<'a>) -> Vec<&'a str> { + use log::warn; let count = self.info.needed_count; let mut needed = Vec::with_capacity(count); - for dyn in &self.dyns { - if dyn.d_tag as u64 == DT_NEEDED { - match strtab.get(dyn.d_val as usize) { - Some(Ok(lib)) => needed.push(lib), - // FIXME: warn! here - _ => (), + for dynamic in &self.dyns { + if dynamic.d_tag as u64 == DT_NEEDED { + if let Some(Ok(lib)) = strtab.get(dynamic.d_val as usize) { + needed.push(lib) + } else { + warn!("Invalid DT_NEEDED {}", dynamic.d_val) } } } @@ -412,7 +419,7 @@ macro_rules! elf_dyn_std_impl { ($size:ident, $phdr:ty) => { #[cfg(test)] - mod test { + mod tests { use super::*; #[test] fn size_of() { @@ -423,73 +430,43 @@ macro_rules! elf_dyn_std_impl { if_alloc! { use core::fmt; use core::slice; - use alloc::vec::Vec; + use crate::alloc::vec::Vec; - use elf::program_header::{PT_DYNAMIC}; - use strtab::Strtab; + use crate::elf::program_header::{PT_DYNAMIC}; + use crate::strtab::Strtab; - use elf::dyn::Dyn as ElfDyn; + use crate::elf::dynamic::Dyn as ElfDyn; if_std! { use std::fs::File; use std::io::{Read, Seek}; use std::io::SeekFrom::Start; - use error::Result; + use crate::error::Result; } impl From for Dyn { - fn from(dyn: ElfDyn) -> Self { + fn from(dynamic: ElfDyn) -> Self { Dyn { - d_tag: dyn.d_tag as $size, - d_val: dyn.d_val as $size, + d_tag: dynamic.d_tag as $size, + d_val: dynamic.d_val as $size, } } } impl From for ElfDyn { - fn from(dyn: Dyn) -> Self { + fn from(dynamic: Dyn) -> Self { ElfDyn { - d_tag: dyn.d_tag as u64, - d_val: dyn.d_val as u64, + d_tag: u64::from(dynamic.d_tag), + d_val: u64::from(dynamic.d_val), } } } impl fmt::Debug for Dyn { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, - "d_tag: {} d_val: 0x{:x}", - tag_to_str(self.d_tag as u64), - self.d_val) - } - } - - impl fmt::Debug for DynamicInfo { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let gnu_hash = if let Some(addr) = self.gnu_hash { addr } else { 0 }; - let hash = if let Some(addr) = self.hash { addr } else { 0 }; - let pltgot = if let Some(addr) = self.pltgot { addr } else { 0 }; - write!(f, "rela: 0x{:x} relasz: {} relaent: {} relacount: {} gnu_hash: 0x{:x} hash: 0x{:x} strtab: 0x{:x} strsz: {} symtab: 0x{:x} syment: {} pltgot: 0x{:x} pltrelsz: {} pltrel: {} jmprel: 0x{:x} verneed: 0x{:x} verneednum: {} versym: 0x{:x} init: 0x{:x} fini: 0x{:x} needed_count: {}", - self.rela, - self.relasz, - self.relaent, - self.relacount, - gnu_hash, - hash, - self.strtab, - self.strsz, - self.symtab, - self.syment, - pltgot, - self.pltrelsz, - self.pltrel, - self.jmprel, - self.verneed, - self.verneednum, - self.versym, - self.init, - self.fini, - self.needed_count, - ) + f.debug_struct("Dyn") + .field("d_tag", &tag_to_str(u64::from(self.d_tag))) + .field("d_val", &format_args!("0x{:x}", self.d_val)) + .finish() } } @@ -498,12 +475,13 @@ macro_rules! elf_dyn_std_impl { pub fn from_fd(mut fd: &File, phdrs: &[$phdr]) -> Result>> { for phdr in phdrs { if phdr.p_type == PT_DYNAMIC { + // FIXME: validate filesz before allocating let filesz = phdr.p_filesz as usize; let dync = filesz / SIZEOF_DYN; let mut dyns = vec![Dyn::default(); dync]; - try!(fd.seek(Start(phdr.p_offset as u64))); + fd.seek(Start(u64::from(phdr.p_offset)))?; unsafe { - try!(fd.read(plain::as_mut_bytes(&mut *dyns))); + fd.read_exact(plain::as_mut_bytes(&mut *dyns))?; } dyns.dedup(); return Ok(Some(dyns)); @@ -516,7 +494,7 @@ macro_rules! elf_dyn_std_impl { pub unsafe fn from_raw<'a>(bias: usize, vaddr: usize) -> &'a [Dyn] { let dynp = vaddr.wrapping_add(bias) as *const Dyn; let mut idx = 0; - while (*dynp.offset(idx)).d_tag as u64 != DT_NULL { + while u64::from((*dynp.offset(idx)).d_tag) != DT_NULL { idx += 1; } slice::from_raw_parts(dynp, idx as usize) @@ -538,15 +516,32 @@ macro_rules! elf_dyn_std_impl { /// Gets the needed libraries from the `_DYNAMIC` array, with the str slices lifetime tied to the dynamic array/strtab's lifetime(s) pub unsafe fn get_needed<'a>(dyns: &[Dyn], strtab: *const Strtab<'a>, count: usize) -> Vec<&'a str> { let mut needed = Vec::with_capacity(count); - for dyn in dyns { - if dyn.d_tag as u64 == DT_NEEDED { - let lib = &(*strtab)[dyn.d_val as usize]; + for dynamic in dyns { + if u64::from(dynamic.d_tag) == DT_NEEDED { + let lib = &(*strtab)[dynamic.d_val as usize]; needed.push(lib); } } needed } } + }; +} + +macro_rules! elf_dynamic_info_std_impl { + ($size:ident, $phdr:ty) => { + /// Convert a virtual memory address to a file offset + fn vm_to_offset(phdrs: &[$phdr], address: $size) -> Option<$size> { + for ph in phdrs { + if address >= ph.p_vaddr { + let offset = address - ph.p_vaddr; + if offset < ph.p_memsz { + return ph.p_offset.checked_add(offset ); + } + } + } + None + } /// Important dynamic linking info generated via a single pass through the `_DYNAMIC` array #[derive(Default)] @@ -587,73 +582,108 @@ macro_rules! elf_dyn_std_impl { impl DynamicInfo { #[inline] - pub fn update(&mut self, bias: usize, dyn: &Dyn) { - match dyn.d_tag as u64 { - DT_RELA => self.rela = dyn.d_val.wrapping_add(bias as _) as usize, // .rela.dyn - DT_RELASZ => self.relasz = dyn.d_val as usize, - DT_RELAENT => self.relaent = dyn.d_val as _, - DT_RELACOUNT => self.relacount = dyn.d_val as usize, - DT_REL => self.rel = dyn.d_val.wrapping_add(bias as _) as usize, // .rel.dyn - DT_RELSZ => self.relsz = dyn.d_val as usize, - DT_RELENT => self.relent = dyn.d_val as _, - DT_RELCOUNT => self.relcount = dyn.d_val as usize, - DT_GNU_HASH => self.gnu_hash = Some(dyn.d_val.wrapping_add(bias as _)), - DT_HASH => self.hash = Some(dyn.d_val.wrapping_add(bias as _)) as _, - DT_STRTAB => self.strtab = dyn.d_val.wrapping_add(bias as _) as usize, - DT_STRSZ => self.strsz = dyn.d_val as usize, - DT_SYMTAB => self.symtab = dyn.d_val.wrapping_add(bias as _) as usize, - DT_SYMENT => self.syment = dyn.d_val as usize, - DT_PLTGOT => self.pltgot = Some(dyn.d_val.wrapping_add(bias as _)) as _, - DT_PLTRELSZ => self.pltrelsz = dyn.d_val as usize, - DT_PLTREL => self.pltrel = dyn.d_val as _, - DT_JMPREL => self.jmprel = dyn.d_val.wrapping_add(bias as _) as usize, // .rela.plt - DT_VERNEED => self.verneed = dyn.d_val.wrapping_add(bias as _) as _, - DT_VERNEEDNUM => self.verneednum = dyn.d_val as _, - DT_VERSYM => self.versym = dyn.d_val.wrapping_add(bias as _) as _, - DT_INIT => self.init = dyn.d_val.wrapping_add(bias as _) as _, - DT_FINI => self.fini = dyn.d_val.wrapping_add(bias as _) as _, - DT_INIT_ARRAY => self.init_array = dyn.d_val.wrapping_add(bias as _) as _, - DT_INIT_ARRAYSZ => self.init_arraysz = dyn.d_val as _, - DT_FINI_ARRAY => self.fini_array = dyn.d_val.wrapping_add(bias as _) as _, - DT_FINI_ARRAYSZ => self.fini_arraysz = dyn.d_val as _, + pub fn update(&mut self, phdrs: &[$phdr], dynamic: &Dyn) { + match u64::from(dynamic.d_tag) { + DT_RELA => self.rela = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0) as usize, // .rela.dyn + DT_RELASZ => self.relasz = dynamic.d_val as usize, + DT_RELAENT => self.relaent = dynamic.d_val as _, + DT_RELACOUNT => self.relacount = dynamic.d_val as usize, + DT_REL => self.rel = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0) as usize, // .rel.dyn + DT_RELSZ => self.relsz = dynamic.d_val as usize, + DT_RELENT => self.relent = dynamic.d_val as _, + DT_RELCOUNT => self.relcount = dynamic.d_val as usize, + DT_GNU_HASH => self.gnu_hash = vm_to_offset(phdrs, dynamic.d_val), + DT_HASH => self.hash = vm_to_offset(phdrs, dynamic.d_val), + DT_STRTAB => self.strtab = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0) as usize, + DT_STRSZ => self.strsz = dynamic.d_val as usize, + DT_SYMTAB => self.symtab = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0) as usize, + DT_SYMENT => self.syment = dynamic.d_val as usize, + DT_PLTGOT => self.pltgot = vm_to_offset(phdrs, dynamic.d_val), + DT_PLTRELSZ => self.pltrelsz = dynamic.d_val as usize, + DT_PLTREL => self.pltrel = dynamic.d_val as _, + DT_JMPREL => self.jmprel = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0) as usize, // .rela.plt + DT_VERNEED => self.verneed = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0), + DT_VERNEEDNUM => self.verneednum = dynamic.d_val as _, + DT_VERSYM => self.versym = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0), + DT_INIT => self.init = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0), + DT_FINI => self.fini = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0), + DT_INIT_ARRAY => self.init_array = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0), + DT_INIT_ARRAYSZ => self.init_arraysz = dynamic.d_val as _, + DT_FINI_ARRAY => self.fini_array = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0), + DT_FINI_ARRAYSZ => self.fini_arraysz = dynamic.d_val as _, DT_NEEDED => self.needed_count += 1, - DT_FLAGS => self.flags = dyn.d_val as _, - DT_FLAGS_1 => self.flags_1 = dyn.d_val as _, - DT_SONAME => self.soname = dyn.d_val as _, + DT_FLAGS => self.flags = dynamic.d_val as _, + DT_FLAGS_1 => self.flags_1 = dynamic.d_val as _, + DT_SONAME => self.soname = dynamic.d_val as _, DT_TEXTREL => self.textrel = true, _ => (), } } - pub fn new(dynamic: &[Dyn], bias: usize) -> DynamicInfo { + pub fn new(dynamic: &[Dyn], phdrs: &[$phdr]) -> DynamicInfo { let mut info = DynamicInfo::default(); - for dyn in dynamic { - info.update(bias, &dyn); + for dyna in dynamic { + info.update(phdrs, &dyna); } info } - } // end if_std + } + + if_alloc! { + impl fmt::Debug for DynamicInfo { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let gnu_hash = self.gnu_hash.unwrap_or(0); + let hash = self.hash.unwrap_or(0); + let pltgot = self.pltgot.unwrap_or(0); + f.debug_struct("DynamicInfo") + .field("rela", &format_args!("0x{:x}", self.rela)) + .field("relasz", &self.relasz) + .field("relaent", &self.relaent) + .field("relacount", &self.relacount) + .field("gnu_hash", &format_args!("0x{:x}", gnu_hash)) + .field("hash", &format_args!("0x{:x}", hash)) + .field("strtab", &format_args!("0x{:x}", self.strtab)) + .field("strsz", &self.strsz) + .field("symtab", &format_args!("0x{:x}", self.symtab)) + .field("syment", &self.syment) + .field("pltgot", &format_args!("0x{:x}", pltgot)) + .field("pltrelsz", &self.pltrelsz) + .field("pltrel", &self.pltrel) + .field("jmprel", &format_args!("0x{:x}", self.jmprel)) + .field("verneed", &format_args!("0x{:x}", self.verneed)) + .field("verneednum", &self.verneednum) + .field("versym", &format_args!("0x{:x}", self.versym)) + .field("init", &format_args!("0x{:x}", self.init)) + .field("fini", &format_args!("0x{:x}", self.fini)) + .field("needed_count", &self.needed_count) + .finish() + } + } + } }; } +if_alloc! { + elf_dynamic_info_std_impl!(u64, crate::elf::program_header::ProgramHeader); +} pub mod dyn32 { - pub use elf::dyn::*; + pub use crate::elf::dynamic::*; elf_dyn!(u32); pub const SIZEOF_DYN: usize = 8; - elf_dyn_std_impl!(u32, ::elf32::program_header::ProgramHeader); - + elf_dyn_std_impl!(u32, crate::elf32::program_header::ProgramHeader); + elf_dynamic_info_std_impl!(u32, crate::elf::program_header::program_header32::ProgramHeader); } - pub mod dyn64 { - pub use elf::dyn::*; + pub use crate::elf::dynamic::*; elf_dyn!(u64); pub const SIZEOF_DYN: usize = 16; - elf_dyn_std_impl!(u64, ::elf64::program_header::ProgramHeader); + elf_dyn_std_impl!(u64, crate::elf64::program_header::ProgramHeader); + elf_dynamic_info_std_impl!(u64, crate::elf::program_header::program_header64::ProgramHeader); } diff --git a/third_party/rust/goblin/src/elf/gnu_hash.rs b/third_party/rust/goblin/src/elf/gnu_hash.rs index 3cc5cdf969fe..9f050e2638a5 100644 --- a/third_party/rust/goblin/src/elf/gnu_hash.rs +++ b/third_party/rust/goblin/src/elf/gnu_hash.rs @@ -12,27 +12,41 @@ //! 3. maskwords //! 4. shift2 //! -//! See: https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections +//! See: https://blogs.oracle.com/solaris/gnu-hash-elf-sections-v2 + +/// GNU hash function: takes a string and returns the u32 hash of that string +pub fn hash(symbol: &str) -> u32 { + const HASH_SEED: u32 = 5381; + let mut hash = HASH_SEED; + for b in symbol.as_bytes() { + hash = hash + .wrapping_mul(33) + .wrapping_add(u32::from(*b)); + } + hash +} + +#[cfg(test)] +mod tests { + use super::hash; + #[test] + fn test_hash() { + assert_eq!(hash("") , 0x00001505); + assert_eq!(hash("printf") , 0x156b2bb8); + assert_eq!(hash("exit") , 0x7c967e3f); + assert_eq!(hash("syscall") , 0xbac212a0); + assert_eq!(hash("flapenguin.me"), 0x8ae9f18e); + } +} macro_rules! elf_gnu_hash_impl { ($size:ty) => { use core::slice; use core::mem; - use strtab::Strtab; + use crate::strtab::Strtab; use super::sym; - /// GNU hash function: takes a string and returns the u32 hash of that string - pub fn hash(symbol: &str) -> u32 { - let bytes = symbol.as_bytes(); - const HASH_SEED: u32 = 5381; - let mut hash = HASH_SEED; - for b in bytes { - hash = hash.wrapping_mul(32).wrapping_add(*b as u32).wrapping_add(hash); - } - hash - } - pub struct GnuHash<'process> { nbuckets: u32, symindex: usize, @@ -48,25 +62,25 @@ macro_rules! elf_gnu_hash_impl { impl<'process> GnuHash<'process> { pub unsafe fn new(hashtab: *const u32, total_dynsyms: usize, symtab: &'process [sym::Sym]) -> GnuHash<'process> { let nbuckets = *hashtab; - let symindex = *hashtab.offset(1) as usize; - let maskwords = *hashtab.offset(2) as usize; // how many words our bloom filter mask has - let shift2 = *hashtab.offset(3); - let bloomwords_ptr = hashtab.offset(4) as *const $size; - let buckets_ptr = bloomwords_ptr.offset(maskwords as isize) as *const u32; + let symindex = *hashtab.add(1) as usize; + let maskwords = *hashtab.add(2) as usize; // how many words our bloom filter mask has + let shift2 = *hashtab.add(3); + let bloomwords_ptr = hashtab.add(4) as *const $size; + let buckets_ptr = bloomwords_ptr.add(maskwords) as *const u32; let buckets = slice::from_raw_parts(buckets_ptr, nbuckets as usize); - let hashvalues_ptr = buckets_ptr.offset(nbuckets as isize); + let hashvalues_ptr = buckets_ptr.add(nbuckets as usize); let hashvalues = slice::from_raw_parts(hashvalues_ptr, total_dynsyms - symindex); let bloomwords = slice::from_raw_parts(bloomwords_ptr, maskwords); GnuHash { - nbuckets: nbuckets, - symindex: symindex, - shift2: shift2, + nbuckets, + symindex, + shift2, maskbits: mem::size_of::() as u32, - bloomwords: bloomwords, - hashvalues: hashvalues, - buckets: buckets, + bloomwords, + hashvalues, + buckets, maskwords_bitmask: ((maskwords as i32) - 1) as u32, - symtab: symtab, + symtab, } } diff --git a/third_party/rust/goblin/src/elf/header.rs b/third_party/rust/goblin/src/elf/header.rs index f020ff40f521..046ba84e7462 100644 --- a/third_party/rust/goblin/src/elf/header.rs +++ b/third_party/rust/goblin/src/elf/header.rs @@ -51,24 +51,22 @@ macro_rules! elf_header { } impl fmt::Debug for Header { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, - "e_ident: {:?} e_type: {} e_machine: 0x{:x} e_version: 0x{:x} e_entry: 0x{:x} \ - e_phoff: 0x{:x} e_shoff: 0x{:x} e_flags: {:x} e_ehsize: {} e_phentsize: {} \ - e_phnum: {} e_shentsize: {} e_shnum: {} e_shstrndx: {}", - self.e_ident, - et_to_str(self.e_type), - self.e_machine, - self.e_version, - self.e_entry, - self.e_phoff, - self.e_shoff, - self.e_flags, - self.e_ehsize, - self.e_phentsize, - self.e_phnum, - self.e_shentsize, - self.e_shnum, - self.e_shstrndx) + f.debug_struct("Header") + .field("e_ident", &format_args!("{:?}", self.e_ident)) + .field("e_type", &et_to_str(self.e_type)) + .field("e_machine", &format_args!("0x{:x}", self.e_machine)) + .field("e_version", &format_args!("0x{:x}", self.e_version)) + .field("e_entry", &format_args!("0x{:x}", self.e_entry)) + .field("e_phoff", &format_args!("0x{:x}", self.e_phoff)) + .field("e_shoff", &format_args!("0x{:x}", self.e_shoff)) + .field("e_flags", &format_args!("{:x}", self.e_flags)) + .field("e_ehsize", &self.e_ehsize) + .field("e_phentsize", &self.e_phentsize) + .field("e_phnum", &self.e_phnum) + .field("e_shentsize", &self.e_shentsize) + .field("e_shnum", &self.e_shnum) + .field("e_shstrndx", &self.e_shstrndx) + .finish() } } } @@ -88,7 +86,7 @@ pub const ET_CORE: u16 = 4; pub const ET_NUM: u16 = 5; /// The ELF magic number. -pub const ELFMAG: &'static [u8; 4] = b"\x7FELF"; +pub const ELFMAG: &[u8; 4] = b"\x7FELF"; /// Sizeof ELF magic number. pub const SELFMAG: usize = 4; @@ -111,6 +109,20 @@ pub const ELFDATANONE: u8 = 0; pub const ELFDATA2LSB: u8 = 1; /// 2's complement, big endian. pub const ELFDATA2MSB: u8 = 2; + +/// File version byte index. +pub const EI_VERSION: usize = 6; +/// Current ELF version. +pub const EV_CURRENT: u8 = 1; + +/// OS ABI byte index. +pub const EI_OSABI: usize = 7; +/// UNIX System V ABI. +pub const ELFOSABI_NONE: u8 = 0; + +/// ABI version byte index. +pub const EI_ABIVERSION: usize = 8; + /// Number of bytes in an identifier. pub const SIZEOF_IDENT: usize = 16; @@ -140,11 +152,11 @@ pub fn et_to_str(et: u16) -> &'static str { } if_alloc! { - use error::{self}; - use scroll::{self, ctx, Endian}; + use crate::error; + use scroll::{ctx, Endian}; use core::fmt; - use container::{Ctx, Container}; - use alloc::string::ToString; + use crate::container::{Ctx, Container}; + use crate::alloc::string::ToString; #[derive(Copy, Clone, PartialEq)] /// An ELF header @@ -168,13 +180,13 @@ if_alloc! { impl Header { /// Return the size of the underlying program header, given a `container` #[inline] - pub fn size(ctx: &Ctx) -> usize { + pub fn size(ctx: Ctx) -> usize { use scroll::ctx::SizeWith; - Self::size_with(ctx) + Self::size_with(&ctx) } /// Returns the container type this header specifies pub fn container(&self) -> error::Result { - use error::Error; + use crate::error::Error; match self.e_ident[EI_CLASS] { ELFCLASS32 => { Ok(Container::Little) }, ELFCLASS64 => { Ok(Container::Big) }, @@ -183,7 +195,7 @@ if_alloc! { } /// Returns the byte order this header specifies pub fn endianness(&self) -> error::Result { - use error::Error; + use crate::error::Error; match self.e_ident[EI_DATA] { ELFDATA2LSB => { Ok(scroll::LE) }, ELFDATA2MSB => { Ok(scroll::BE) }, @@ -191,8 +203,8 @@ if_alloc! { } } pub fn new(ctx: Ctx) -> Self { - use elf32; - use elf64; + use crate::elf32; + use crate::elf64; let (typ, ehsize, phentsize, shentsize) = match ctx.container { Container::Little => { (ELFCLASS32, header32::SIZEOF_EHDR, @@ -214,8 +226,8 @@ if_alloc! { 70, typ, byteorder, - 1, - 0, + EV_CURRENT, + ELFOSABI_NONE, 0, 0, 0, @@ -244,30 +256,28 @@ if_alloc! { impl fmt::Debug for Header { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, - "e_ident: {:?} e_type: {} e_machine: 0x{:x} e_version: 0x{:x} e_entry: 0x{:x} \ - e_phoff: 0x{:x} e_shoff: 0x{:x} e_flags: {:x} e_ehsize: {} e_phentsize: {} \ - e_phnum: {} e_shentsize: {} e_shnum: {} e_shstrndx: {}", - self.e_ident, - et_to_str(self.e_type), - self.e_machine, - self.e_version, - self.e_entry, - self.e_phoff, - self.e_shoff, - self.e_flags, - self.e_ehsize, - self.e_phentsize, - self.e_phnum, - self.e_shentsize, - self.e_shnum, - self.e_shstrndx) + f.debug_struct("Header") + .field("e_ident", &format_args!("{:?}", self.e_ident)) + .field("e_type", &et_to_str(self.e_type)) + .field("e_machine", &format_args!("0x{:x}", self.e_machine)) + .field("e_version", &format_args!("0x{:x}", self.e_version)) + .field("e_entry", &format_args!("0x{:x}", self.e_entry)) + .field("e_phoff", &format_args!("0x{:x}", self.e_phoff)) + .field("e_shoff", &format_args!("0x{:x}", self.e_shoff)) + .field("e_flags", &format_args!("{:x}", self.e_flags)) + .field("e_ehsize", &self.e_ehsize) + .field("e_phentsize", &self.e_phentsize) + .field("e_phnum", &self.e_phnum) + .field("e_shentsize", &self.e_shentsize) + .field("e_shnum", &self.e_shnum) + .field("e_shstrndx", &self.e_shstrndx) + .finish() } } - impl ctx::SizeWith<::container::Ctx> for Header { + impl ctx::SizeWith for Header { type Units = usize; - fn size_with(ctx: &::container::Ctx) -> usize { + fn size_with(ctx: &crate::container::Ctx) -> usize { match ctx.container { Container::Little => { header32::SIZEOF_EHDR @@ -280,7 +290,7 @@ if_alloc! { } impl<'a> ctx::TryFromCtx<'a, scroll::Endian> for Header { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_from_ctx(bytes: &'a [u8], _ctx: scroll::Endian) -> error::Result<(Self, Self::Size)> { use scroll::Pread; @@ -290,7 +300,7 @@ if_alloc! { let ident: &[u8] = &bytes[..SIZEOF_IDENT]; if &ident[0..SELFMAG] != ELFMAG { let magic: u64 = ident.pread_with(0, scroll::LE)?; - return Err(error::Error::BadMagic(magic).into()); + return Err(error::Error::BadMagic(magic)); } let class = ident[EI_CLASS]; match class { @@ -301,7 +311,7 @@ if_alloc! { Ok((Header::from(bytes.pread::(0)?), header64::SIZEOF_EHDR)) }, _ => { - Err(error::Error::Malformed(format!("invalid ELF class {:x}", class)).into()) + Err(error::Error::Malformed(format!("invalid ELF class {:x}", class))) } } } @@ -309,7 +319,7 @@ if_alloc! { // TODO: i think we should remove this forcing of the information in the header, it causes too many conflicts impl ctx::TryIntoCtx for Header { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_into_ctx(self, bytes: &mut [u8], _ctx: scroll::Endian) -> Result { use scroll::Pwrite; @@ -323,8 +333,8 @@ if_alloc! { } } } - impl ctx::IntoCtx<::container::Ctx> for Header { - fn into_ctx(self, bytes: &mut [u8], ctx: ::container::Ctx) -> () { + impl ctx::IntoCtx for Header { + fn into_ctx(self, bytes: &mut [u8], ctx: crate::container::Ctx) { use scroll::Pwrite; match ctx.container { Container::Little => { @@ -342,12 +352,12 @@ macro_rules! elf_header_std_impl { ($size:expr, $width:ty) => { if_alloc! { - use elf::header::Header as ElfHeader; - use error::Error; + use crate::elf::header::Header as ElfHeader; + use crate::error::Error; #[cfg(any(feature = "std", feature = "endian_fd"))] - use error::Result; + use crate::error::Result; - use scroll::{self, ctx, Pread}; + use scroll::{ctx, Pread}; use core::result; @@ -384,9 +394,9 @@ macro_rules! elf_header_std_impl { e_type: eh.e_type, e_machine: eh.e_machine, e_version: eh.e_version, - e_entry: eh.e_entry as u64, - e_phoff: eh.e_phoff as u64, - e_shoff: eh.e_shoff as u64, + e_entry: u64::from(eh.e_entry), + e_phoff: u64::from(eh.e_phoff), + e_shoff: u64::from(eh.e_shoff), e_flags: eh.e_flags, e_ehsize: eh.e_ehsize, e_phentsize: eh.e_phentsize, @@ -399,7 +409,7 @@ macro_rules! elf_header_std_impl { } impl<'a> ctx::TryFromCtx<'a, scroll::Endian> for Header { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_from_ctx(bytes: &'a [u8], _: scroll::Endian) -> result::Result<(Self, Self::Size), Self::Error> { let mut elf_header = Header::default(); @@ -429,7 +439,7 @@ macro_rules! elf_header_std_impl { } impl ctx::TryIntoCtx for Header { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; /// a Pwrite impl for Header: **note** we use the endianness value in the header, and not a parameter fn try_into_ctx(self, bytes: &mut [u8], _endianness: scroll::Endian) -> result::Result { @@ -466,7 +476,7 @@ macro_rules! elf_header_std_impl { #[cfg(feature = "std")] pub fn from_fd(bytes: &mut File) -> Result
{ let mut elf_header = [0; $size]; - bytes.read(&mut elf_header)?; + bytes.read_exact(&mut elf_header)?; Ok(*Header::from_bytes(&elf_header)) } @@ -511,12 +521,12 @@ macro_rules! elf_header_std_impl { macro_rules! elf_header_test { ($class:expr) => { #[cfg(test)] - mod test { + mod tests { use scroll::{Pwrite, Pread}; - use elf::header::Header as ElfHeader; + use crate::elf::header::Header as ElfHeader; use super::*; - use container::{Ctx, Container}; - use alloc::vec::Vec; + use crate::container::{Ctx, Container}; + use crate::alloc::vec::Vec; #[test] fn size_of() { assert_eq!(::std::mem::size_of::
(), SIZEOF_EHDR); diff --git a/third_party/rust/goblin/src/elf/mod.rs b/third_party/rust/goblin/src/elf/mod.rs index d93040f24254..0b2ab7f167c5 100644 --- a/third_party/rust/goblin/src/elf/mod.rs +++ b/third_party/rust/goblin/src/elf/mod.rs @@ -13,6 +13,7 @@ //! let entry = binary.entry; //! for ph in binary.program_headers { //! if ph.p_type == goblin::elf::program_header::PT_LOAD { +//! // TODO: you should validate p_filesz before allocating. //! let mut _buf = vec![0u8; ph.p_filesz as usize]; //! // read responsibly //! } @@ -37,7 +38,7 @@ //! `endian_fd` features if you disable `default`. #[macro_use] -mod gnu_hash; +pub(crate) mod gnu_hash; // These are shareable values for the 32/64 bit implementations. // @@ -48,7 +49,7 @@ pub mod section_header; pub mod compression_header; #[macro_use] pub mod sym; -pub mod dyn; +pub mod dynamic; #[macro_use] pub mod reloc; pub mod note; @@ -61,20 +62,22 @@ macro_rules! if_sylvan { } if_sylvan! { - use scroll::{self, ctx, Pread, Endian}; - use strtab::Strtab; - use error; - use container::{Container, Ctx}; - use alloc::vec::Vec; + use scroll::{ctx, Pread, Endian}; + use crate::strtab::Strtab; + use crate::error; + use crate::container::{Container, Ctx}; + use crate::alloc::vec::Vec; + use core::cmp; pub type Header = header::Header; pub type ProgramHeader = program_header::ProgramHeader; pub type SectionHeader = section_header::SectionHeader; pub type Symtab<'a> = sym::Symtab<'a>; pub type Sym = sym::Sym; - pub type Dyn = dyn::Dyn; - pub type Dynamic = dyn::Dynamic; + pub type Dyn = dynamic::Dyn; + pub type Dynamic = dynamic::Dynamic; pub type Reloc = reloc::Reloc; + pub type RelocSection<'a> = reloc::RelocSection<'a>; pub type ProgramHeaders = Vec; pub type SectionHeaders = Vec; @@ -106,13 +109,13 @@ if_sylvan! { /// Contains dynamic linking information, with the _DYNAMIC array + a preprocessed DynamicInfo for that array pub dynamic: Option, /// The dynamic relocation entries (strings, copy-data, etc.) with an addend - pub dynrelas: Vec, + pub dynrelas: RelocSection<'a>, /// The dynamic relocation entries without an addend - pub dynrels: Vec, + pub dynrels: RelocSection<'a>, /// The plt relocation entries (procedure linkage table). For 32-bit binaries these are usually Rel (no addend) - pub pltrelocs: Vec, + pub pltrelocs: RelocSection<'a>, /// Section relocations by section index (only present if this is a relocatable object file) - pub shdr_relocs: Vec<(ShdrIdx, Vec)>, + pub shdr_relocs: Vec<(ShdrIdx, RelocSection<'a>)>, /// The binary's soname, if it has one pub soname: Option<&'a str>, /// The binary's program interpreter (e.g., dynamic linker), if it has one @@ -124,8 +127,6 @@ if_sylvan! { pub is_lib: bool, /// The binaries entry point address, if it has one pub entry: u64, - /// The bias used to overflow virtual memory addresses into physical byte offsets into the binary - pub bias: u64, /// Whether the binary is little endian or not pub little_endian: bool, ctx: Ctx, @@ -211,7 +212,7 @@ if_sylvan! { if class != header::ELFCLASS64 && class != header::ELFCLASS32 { return Err(error::Error::Malformed(format!("Unknown values in ELF ident header: class: {} endianness: {}", class, - header.e_ident[header::EI_DATA])).into()); + header.e_ident[header::EI_DATA]))); } let is_64 = class == header::ELFCLASS64; let container = if is_64 { Container::Big } else { Container::Little }; @@ -219,27 +220,6 @@ if_sylvan! { let program_headers = ProgramHeader::parse(bytes, header.e_phoff as usize, header.e_phnum as usize, ctx)?; - let mut bias: usize = 0; - for ph in &program_headers { - if ph.p_type == program_header::PT_LOAD { - // NB this _only_ works on the first load address, and the GOT values (usually at base + 2000) will be incorrect binary offsets... - // this is an overflow hack that allows us to use virtual memory addresses - // as though they're in the file by generating a fake load bias which is then - // used to overflow the values in the dynamic array, and in a few other places - // (see Dyn::DynamicInfo), to generate actual file offsets; you may have to - // marinate a bit on why this works. i am unsure whether it works in every - // conceivable case. i learned this trick from reading too much dynamic linker - // C code (a whole other class of C code) and having to deal with broken older - // kernels on VMs. enjoi - bias = match container { - Container::Little => (::core::u32::MAX - (ph.p_vaddr as u32)).wrapping_add(1) as usize, - Container::Big => (::core::u64::MAX - ph.p_vaddr).wrapping_add(1) as usize, - }; - // we must grab only the first one, otherwise the bias will be incorrect - break; - } - } - let mut interpreter = None; for ph in &program_headers { if ph.p_type == program_header::PT_INTERP && ph.p_filesz != 0 { @@ -279,11 +259,11 @@ if_sylvan! { let mut soname = None; let mut libraries = vec![]; let mut dynsyms = Symtab::default(); - let mut dynrelas = vec![]; - let mut dynrels = vec![]; - let mut pltrelocs = vec![]; + let mut dynrelas = RelocSection::default(); + let mut dynrels = RelocSection::default(); + let mut pltrelocs = RelocSection::default(); let mut dynstrtab = Strtab::default(); - let dynamic = Dynamic::parse(bytes, &program_headers, bias, ctx)?; + let dynamic = Dynamic::parse(bytes, &program_headers, ctx)?; if let Some(ref dynamic) = dynamic { let dyn_info = &dynamic.info; dynstrtab = Strtab::parse(bytes, @@ -298,55 +278,59 @@ if_sylvan! { if dyn_info.needed_count > 0 { libraries = dynamic.get_libraries(&dynstrtab); } - let num_syms = if dyn_info.syment == 0 { 0 } else { if dyn_info.strtab <= dyn_info.symtab { 0 } else { (dyn_info.strtab - dyn_info.symtab) / dyn_info.syment }}; - dynsyms = Symtab::parse(bytes, dyn_info.symtab, num_syms, ctx)?; // parse the dynamic relocations - dynrelas = Reloc::parse(bytes, dyn_info.rela, dyn_info.relasz, true, ctx)?; - dynrels = Reloc::parse(bytes, dyn_info.rel, dyn_info.relsz, false, ctx)?; - let is_rela = dyn_info.pltrel as u64 == dyn::DT_RELA; - pltrelocs = Reloc::parse(bytes, dyn_info.jmprel, dyn_info.pltrelsz, is_rela, ctx)?; + dynrelas = RelocSection::parse(bytes, dyn_info.rela, dyn_info.relasz, true, ctx)?; + dynrels = RelocSection::parse(bytes, dyn_info.rel, dyn_info.relsz, false, ctx)?; + let is_rela = dyn_info.pltrel as u64 == dynamic::DT_RELA; + pltrelocs = RelocSection::parse(bytes, dyn_info.jmprel, dyn_info.pltrelsz, is_rela, ctx)?; + + let mut num_syms = if let Some(gnu_hash) = dyn_info.gnu_hash { + gnu_hash_len(bytes, gnu_hash as usize, ctx)? + } else if let Some(hash) = dyn_info.hash { + hash_len(bytes, hash as usize, header.e_machine, ctx)? + } else { + 0 + }; + let max_reloc_sym = dynrelas.iter() + .chain(dynrels.iter()) + .chain(pltrelocs.iter()) + .fold(0, |num, reloc| cmp::max(num, reloc.r_sym)); + if max_reloc_sym != 0 { + num_syms = cmp::max(num_syms, max_reloc_sym + 1); + } + dynsyms = Symtab::parse(bytes, dyn_info.symtab, num_syms, ctx)?; } - // iterate through shdrs again iff we're an ET_REL - let shdr_relocs = { - let mut relocs = vec![]; - if header.e_type == header::ET_REL { - for (idx, section) in section_headers.iter().enumerate() { - if section.sh_type == section_header::SHT_REL { - section.check_size(bytes.len())?; - let sh_relocs = Reloc::parse(bytes, section.sh_offset as usize, section.sh_size as usize, false, ctx)?; - relocs.push((idx, sh_relocs)); - } - if section.sh_type == section_header::SHT_RELA { - section.check_size(bytes.len())?; - let sh_relocs = Reloc::parse(bytes, section.sh_offset as usize, section.sh_size as usize, true, ctx)?; - relocs.push((idx, sh_relocs)); - } - } + let mut shdr_relocs = vec![]; + for (idx, section) in section_headers.iter().enumerate() { + let is_rela = section.sh_type == section_header::SHT_RELA; + if is_rela || section.sh_type == section_header::SHT_REL { + section.check_size(bytes.len())?; + let sh_relocs = RelocSection::parse(bytes, section.sh_offset as usize, section.sh_size as usize, is_rela, ctx)?; + shdr_relocs.push((idx, sh_relocs)); } - relocs - }; + } + Ok(Elf { - header: header, - program_headers: program_headers, - section_headers: section_headers, - shdr_strtab: shdr_strtab, - dynamic: dynamic, - dynsyms: dynsyms, - dynstrtab: dynstrtab, - syms: syms, - strtab: strtab, - dynrelas: dynrelas, - dynrels: dynrels, - pltrelocs: pltrelocs, - shdr_relocs: shdr_relocs, - soname: soname, - interpreter: interpreter, - libraries: libraries, - is_64: is_64, - is_lib: is_lib, + header, + program_headers, + section_headers, + shdr_strtab, + dynamic, + dynsyms, + dynstrtab, + syms, + strtab, + dynrelas, + dynrels, + pltrelocs, + shdr_relocs, + soname, + interpreter, + libraries, + is_64, + is_lib, entry: entry as u64, - bias: bias as u64, little_endian: is_lsb, ctx, }) @@ -354,13 +338,56 @@ if_sylvan! { } impl<'a> ctx::TryFromCtx<'a, (usize, Endian)> for Elf<'a> { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_from_ctx(src: &'a [u8], (_, _): (usize, Endian)) -> Result<(Elf<'a>, Self::Size), Self::Error> { let elf = Elf::parse(src)?; Ok((elf, src.len())) } } + + fn gnu_hash_len(bytes: &[u8], offset: usize, ctx: Ctx) -> error::Result { + let buckets_num = bytes.pread_with::(offset, ctx.le)? as usize; + let min_chain = bytes.pread_with::(offset + 4, ctx.le)? as usize; + let bloom_size = bytes.pread_with::(offset + 8, ctx.le)? as usize; + // We could handle min_chain==0 if we really had to, but it shouldn't happen. + if buckets_num == 0 || min_chain == 0 || bloom_size == 0 { + return Err(error::Error::Malformed(format!("Invalid DT_GNU_HASH: buckets_num={} min_chain={} bloom_size={}", + buckets_num, min_chain, bloom_size))); + } + // Find the last bucket. + let buckets_offset = offset + 16 + bloom_size * if ctx.container.is_big() { 8 } else { 4 }; + let mut max_chain = 0; + for bucket in 0..buckets_num { + let chain = bytes.pread_with::(buckets_offset + bucket * 4, ctx.le)? as usize; + if max_chain < chain { + max_chain = chain; + } + } + if max_chain < min_chain { + return Ok(0); + } + // Find the last chain within the bucket. + let mut chain_offset = buckets_offset + buckets_num * 4 + (max_chain - min_chain) * 4; + loop { + let hash = bytes.pread_with::(chain_offset, ctx.le)?; + max_chain += 1; + chain_offset += 4; + if hash & 1 != 0 { + return Ok(max_chain); + } + } + } + + fn hash_len(bytes: &[u8], offset: usize, machine: u16, ctx: Ctx) -> error::Result { + // Based on readelf code. + let nchain = if (machine == header::EM_FAKE_ALPHA || machine == header::EM_S390) && ctx.container.is_big() { + bytes.pread_with::(offset + 4, ctx.le)? as usize + } else { + bytes.pread_with::(offset + 4, ctx.le)? as usize + }; + Ok(nchain) + } } #[cfg(test)] @@ -375,26 +402,22 @@ mod tests { assert!(binary.is_64); assert!(!binary.is_lib); assert_eq!(binary.entry, 0); - assert_eq!(binary.bias, 0); assert!(binary.syms.get(1000).is_none()); assert!(binary.syms.get(5).is_some()); let syms = binary.syms.to_vec(); - let mut i = 0; - assert!(binary.section_headers.len() != 0); - for sym in &syms { + assert!(!binary.section_headers.is_empty()); + for (i, sym) in syms.iter().enumerate() { if i == 11 { let symtab = binary.strtab; println!("sym: {:?}", &sym); assert_eq!(&symtab[sym.st_name], "_start"); break; } - i += 1; } - assert!(syms.len() != 0); + assert!(!syms.is_empty()); }, Err (err) => { - println!("failed: {}", err); - assert!(false) + panic!("failed: {}", err); } } } @@ -407,26 +430,22 @@ mod tests { assert!(!binary.is_64); assert!(!binary.is_lib); assert_eq!(binary.entry, 0); - assert_eq!(binary.bias, 0); assert!(binary.syms.get(1000).is_none()); assert!(binary.syms.get(5).is_some()); let syms = binary.syms.to_vec(); - let mut i = 0; - assert!(binary.section_headers.len() != 0); - for sym in &syms { + assert!(!binary.section_headers.is_empty()); + for (i, sym) in syms.iter().enumerate() { if i == 11 { let symtab = binary.strtab; println!("sym: {:?}", &sym); assert_eq!(&symtab[sym.st_name], "__libc_csu_fini"); break; } - i += 1; } - assert!(syms.len() != 0); + assert!(!syms.is_empty()); }, Err (err) => { - println!("failed: {}", err); - assert!(false) + panic!("failed: {}", err); } } } diff --git a/third_party/rust/goblin/src/elf/note.rs b/third_party/rust/goblin/src/elf/note.rs index 2ea894cba058..ea10a8100159 100644 --- a/third_party/rust/goblin/src/elf/note.rs +++ b/third_party/rust/goblin/src/elf/note.rs @@ -1,40 +1,52 @@ // Defined note types for GNU systems. -// ABI information. The descriptor consists of words: -// word 0: OS descriptor -// word 1: major version of the ABI -// word 2: minor version of the ABI -// word 3: subminor version of the ABI +#[cfg(feature = "log")] +use log::debug; +#[cfg(feature = "alloc")] +use scroll::{Pread, Pwrite, IOread, IOwrite, SizeWith}; +/// ABI information. +/// +/// The descriptor consists of words: +/// * word 0: OS descriptor +/// * word 1: major version of the ABI +/// * word 2: minor version of the ABI +/// * word 3: subminor version of the ABI pub const NT_GNU_ABI_TAG: u32 = 1; -// Old name + +/// Old name pub const ELF_NOTE_ABI: u32 = NT_GNU_ABI_TAG; // Known OSes. These values can appear in word 0 of an -// NT_GNU_ABI_TAG note section entry. +// `NT_GNU_ABI_TAG` note section entry. pub const ELF_NOTE_OS_LINUX: u32 = 0; pub const ELF_NOTE_OS_GNU: u32 = 1; pub const ELF_NOTE_OS_SOLARIS2: u32 = 2; pub const ELF_NOTE_OS_FREEBSD: u32 = 3; -// Synthetic hwcap information. The descriptor begins with two words: -// word 0: number of entries -// word 1: bitmask of enabled entries -// Then follow variable-length entries, one byte followed by a -// '\0'-terminated hwcap name string. The byte gives the bit -// number to test if enabled, (1U << bit) & bitmask. +/// Synthetic `hwcap` information. +/// +/// The descriptor begins with two words: +/// * word 0: number of entries +/// * word 1: bitmask of enabled entries +/// +/// Then follow variable-length entries, one byte followed by a '\0'-terminated +/// `hwcap` name string. The byte gives the bit number to test if enabled, +/// `(1U << bit) & bitmask`. pub const NT_GNU_HWCAP: u32 = 2; -// Build ID bits as generated by ld --build-id. -// The descriptor consists of any nonzero number of bytes. +/// Build ID bits as generated by ld --build-id. +/// +/// The descriptor consists of any nonzero number of bytes. pub const NT_GNU_BUILD_ID: u32 = 3; -// Version note generated by GNU gold containing a version string. +/// Version note generated by GNU gold containing a version string. pub const NT_GNU_GOLD_VERSION: u32 = 4; #[derive(Clone, Copy, Debug)] #[cfg_attr(feature = "alloc", derive(Pread, Pwrite, IOread, IOwrite, SizeWith))] #[repr(C)] -/// Note section contents. Each entry in the note section begins with a header of a fixed form. +/// Note section contents. Each entry in the note section begins with a header +/// of a fixed form. pub struct Nhdr32 { /// Length of the note's name (includes the terminator) pub n_namesz: u32, @@ -47,7 +59,8 @@ pub struct Nhdr32 { #[derive(Clone, Copy, Debug)] #[cfg_attr(feature = "alloc", derive(Pread, Pwrite, IOread, IOwrite, SizeWith))] #[repr(C)] -/// Note section contents. Each entry in the note section begins with a header of a fixed form. +/// Note section contents. Each entry in the note section begins with a header +/// of a fixed form. pub struct Nhdr64 { /// Length of the note's name (includes the terminator) pub n_namesz: u64, @@ -58,10 +71,10 @@ pub struct Nhdr64 { } if_alloc! { - use error; - use container; - use scroll::{ctx, Pread}; - use alloc::vec::Vec; + use crate::error; + use crate::container; + use scroll::ctx; + use crate::alloc::vec::Vec; /// An iterator over ELF binary notes in a note section or segment pub struct NoteDataIterator<'a> { @@ -80,7 +93,7 @@ if_alloc! { debug!("NoteIterator - {:#x}", self.offset); match self.data.gread_with(&mut self.offset, self.ctx) { Ok(res) => Some(Ok(res)), - Err(e) => Some(Err(e.into())) + Err(e) => Some(Err(e)) } } } diff --git a/third_party/rust/goblin/src/elf/program_header.rs b/third_party/rust/goblin/src/elf/program_header.rs index 69a907f4d4b5..f8ab76381e29 100644 --- a/third_party/rust/goblin/src/elf/program_header.rs +++ b/third_party/rust/goblin/src/elf/program_header.rs @@ -1,3 +1,5 @@ +/* Legal values for p_type (segment type). */ + /// Program header table entry unused pub const PT_NULL: u32 = 0; /// Loadable program segment @@ -17,36 +19,36 @@ pub const PT_TLS: u32 = 7; /// Number of defined types pub const PT_NUM: u32 = 8; /// Start of OS-specific -pub const PT_LOOS: u32 = 0x60000000; +pub const PT_LOOS: u32 = 0x6000_0000; /// GCC .eh_frame_hdr segment -pub const PT_GNU_EH_FRAME: u32 = 0x6474e550; +pub const PT_GNU_EH_FRAME: u32 = 0x6474_e550; /// Indicates stack executability -pub const PT_GNU_STACK: u32 = 0x6474e551; +pub const PT_GNU_STACK: u32 = 0x6474_e551; /// Read-only after relocation -pub const PT_GNU_RELRO: u32 = 0x6474e552; +pub const PT_GNU_RELRO: u32 = 0x6474_e552; /// Sun Specific segment -pub const PT_LOSUNW: u32 = 0x6ffffffa; +pub const PT_LOSUNW: u32 = 0x6fff_fffa; /// Sun Specific segment -pub const PT_SUNWBSS: u32 = 0x6ffffffa; +pub const PT_SUNWBSS: u32 = 0x6fff_fffa; /// Stack segment -pub const PT_SUNWSTACK: u32 = 0x6ffffffb; +pub const PT_SUNWSTACK: u32 = 0x6fff_fffb; /// End of OS-specific -pub const PT_HISUNW: u32 = 0x6fffffff; +pub const PT_HISUNW: u32 = 0x6fff_ffff; /// End of OS-specific -pub const PT_HIOS: u32 = 0x6fffffff; +pub const PT_HIOS: u32 = 0x6fff_ffff; /// Start of processor-specific -pub const PT_LOPROC: u32 = 0x70000000; +pub const PT_LOPROC: u32 = 0x7000_0000; /// ARM unwind segment -pub const PT_ARM_EXIDX: u32 = 0x70000001; +pub const PT_ARM_EXIDX: u32 = 0x7000_0001; /// End of processor-specific -pub const PT_HIPROC: u32 = 0x7fffffff; +pub const PT_HIPROC: u32 = 0x7fff_ffff; + +/* Legal values for p_flags (segment flags). */ /// Segment is executable -pub const PF_X: u32 = 1 << 0; - +pub const PF_X: u32 = 1; /// Segment is writable pub const PF_W: u32 = 1 << 1; - /// Segment is readable pub const PF_R: u32 = 1 << 2; @@ -80,8 +82,8 @@ if_alloc! { use scroll::ctx; use core::result; use core::ops::Range; - use container::{Ctx, Container}; - use alloc::vec::Vec; + use crate::container::{Ctx, Container}; + use crate::alloc::vec::Vec; #[derive(Default, PartialEq, Clone)] /// A unified ProgramHeader - convertable to and from 32-bit and 64-bit variants @@ -99,9 +101,9 @@ if_alloc! { impl ProgramHeader { /// Return the size of the underlying program header, given a `Ctx` #[inline] - pub fn size(ctx: &Ctx) -> usize { + pub fn size(ctx: Ctx) -> usize { use scroll::ctx::SizeWith; - Self::size_with(ctx) + Self::size_with(&ctx) } /// Create a new `PT_LOAD` ELF program header pub fn new() -> Self { @@ -150,7 +152,7 @@ if_alloc! { self.p_flags & PF_W != 0 } #[cfg(feature = "endian_fd")] - pub fn parse(bytes: &[u8], mut offset: usize, count: usize, ctx: Ctx) -> ::error::Result> { + pub fn parse(bytes: &[u8], mut offset: usize, count: usize, ctx: Ctx) -> crate::error::Result> { use scroll::Pread; let mut program_headers = Vec::with_capacity(count); for _ in 0..count { @@ -163,17 +165,16 @@ if_alloc! { impl fmt::Debug for ProgramHeader { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, - "p_type: {} p_flags 0x{:x} p_offset: 0x{:x} p_vaddr: 0x{:x} p_paddr: 0x{:x} \ - p_filesz: 0x{:x} p_memsz: 0x{:x} p_align: {}", - pt_to_str(self.p_type), - self.p_flags, - self.p_offset, - self.p_vaddr, - self.p_paddr, - self.p_filesz, - self.p_memsz, - self.p_align) + f.debug_struct("ProgramHeader") + .field("p_type", &pt_to_str(self.p_type)) + .field("p_flags", &format_args!("0x{:x}", self.p_flags)) + .field("p_offset", &format_args!("0x{:x}", self.p_offset)) + .field("p_vaddr", &format_args!("0x{:x}", self.p_vaddr)) + .field("p_paddr", &format_args!("0x{:x}", self.p_paddr)) + .field("p_filesz", &format_args!("0x{:x}", self.p_filesz)) + .field("p_memsz", &format_args!("0x{:x}", self.p_memsz)) + .field("p_align", &self.p_align) + .finish() } } @@ -192,7 +193,7 @@ if_alloc! { } impl<'a> ctx::TryFromCtx<'a, Ctx> for ProgramHeader { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_from_ctx(bytes: &'a [u8], Ctx { container, le}: Ctx) -> result::Result<(Self, Self::Size), Self::Error> { use scroll::Pread; @@ -209,7 +210,7 @@ if_alloc! { } impl ctx::TryIntoCtx for ProgramHeader { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_into_ctx(self, bytes: &mut [u8], Ctx {container, le}: Ctx) -> result::Result { use scroll::Pwrite; @@ -230,7 +231,7 @@ if_alloc! { macro_rules! elf_program_header_std_impl { ($size:ty) => { #[cfg(test)] - mod test { + mod tests { use super::*; #[test] fn size_of() { @@ -240,9 +241,9 @@ macro_rules! elf_program_header_std_impl { ($size:ty) => { if_alloc! { - use elf::program_header::ProgramHeader as ElfProgramHeader; + use crate::elf::program_header::ProgramHeader as ElfProgramHeader; #[cfg(any(feature = "std", feature = "endian_fd"))] - use error::Result; + use crate::error::Result; use core::slice; use core::fmt; @@ -260,12 +261,12 @@ macro_rules! elf_program_header_std_impl { ($size:ty) => { ElfProgramHeader { p_type : ph.p_type, p_flags : ph.p_flags, - p_offset : ph.p_offset as u64, - p_vaddr : ph.p_vaddr as u64, - p_paddr : ph.p_paddr as u64, - p_filesz : ph.p_filesz as u64, - p_memsz : ph.p_memsz as u64, - p_align : ph.p_align as u64, + p_offset : u64::from(ph.p_offset), + p_vaddr : u64::from(ph.p_vaddr), + p_paddr : u64::from(ph.p_paddr), + p_filesz : u64::from(ph.p_filesz), + p_memsz : u64::from(ph.p_memsz), + p_align : u64::from(ph.p_align), } } } @@ -287,17 +288,16 @@ macro_rules! elf_program_header_std_impl { ($size:ty) => { impl fmt::Debug for ProgramHeader { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, - "p_type: {} p_flags 0x{:x} p_offset: 0x{:x} p_vaddr: 0x{:x} p_paddr: 0x{:x} \ - p_filesz: 0x{:x} p_memsz: 0x{:x} p_align: {}", - pt_to_str(self.p_type), - self.p_flags, - self.p_offset, - self.p_vaddr, - self.p_paddr, - self.p_filesz, - self.p_memsz, - self.p_align) + f.debug_struct("ProgramHeader") + .field("p_type", &pt_to_str(self.p_type)) + .field("p_flags", &format_args!("0x{:x}", self.p_flags)) + .field("p_offset", &format_args!("0x{:x}", self.p_offset)) + .field("p_vaddr", &format_args!("0x{:x}", self.p_vaddr)) + .field("p_paddr", &format_args!("0x{:x}", self.p_paddr)) + .field("p_filesz", &format_args!("0x{:x}", self.p_filesz)) + .field("p_memsz", &format_args!("0x{:x}", self.p_memsz)) + .field("p_align", &self.p_align) + .finish() } } @@ -326,9 +326,9 @@ macro_rules! elf_program_header_std_impl { ($size:ty) => { #[cfg(feature = "std")] pub fn from_fd(fd: &mut File, offset: u64, count: usize) -> Result> { let mut phdrs = vec![ProgramHeader::default(); count]; - try!(fd.seek(Start(offset))); + fd.seek(Start(offset))?; unsafe { - try!(fd.read(plain::as_mut_bytes(&mut *phdrs))); + fd.read_exact(plain::as_mut_bytes(&mut *phdrs))?; } Ok(phdrs) } @@ -336,9 +336,11 @@ macro_rules! elf_program_header_std_impl { ($size:ty) => { } // end if_alloc };} +#[cfg(feature = "alloc")] +use scroll::{Pread, Pwrite, SizeWith}; pub mod program_header32 { - pub use elf::program_header::*; + pub use crate::elf::program_header::*; #[repr(C)] #[derive(Copy, Clone, PartialEq, Default)] @@ -374,7 +376,7 @@ pub mod program_header32 { pub mod program_header64 { - pub use elf::program_header::*; + pub use crate::elf::program_header::*; #[repr(C)] #[derive(Copy, Clone, PartialEq, Default)] diff --git a/third_party/rust/goblin/src/elf/reloc.rs b/third_party/rust/goblin/src/elf/reloc.rs index a96c664bd834..512b8f62ea31 100644 --- a/third_party/rust/goblin/src/elf/reloc.rs +++ b/third_party/rust/goblin/src/elf/reloc.rs @@ -1,41 +1,60 @@ //! # Relocation computations +//! +//! The following notation is used to describe relocation computations +//! specific to x86_64 ELF. +//! +//! * A: The addend used to compute the value of the relocatable field. +//! * B: The base address at which a shared object is loaded into memory +//! during execution. Generally, a shared object file is built with a +//! base virtual address of 0. However, the execution address of the +//! shared object is different. +//! * G: The offset into the global offset table at which the address of +//! the relocation entry's symbol resides during execution. +//! * GOT: The address of the global offset table. +//! * L: The section offset or address of the procedure linkage table entry +//! for a symbol. +//! * P: The section offset or address of the storage unit being relocated, +//! computed using r_offset. +//! * S: The value of the symbol whose index resides in the relocation entry. +//! * Z: The size of the symbol whose index resides in the relocation entry. +//! //! Below are some common x86_64 relocation computations you might find useful: //! -//! | Relocation | Value | Size | Formula | -//! |:-----------|:------|:-----|:-------| -//! | R_X86_64_NONE | 0 | none | none | -//! | R_X86_64_64 | 1 | word64 | S + A | -//! | R_X86_64_PC32 | 2 | word32 | S + A - P | -//! | R_X86_64_GOT32 | 3 | word32 | G + A | -//! | R_X86_64_PLT32 | 4 | word32 | L + A - P | -//! | R_X86_64_COPY | 5 | none | none | -//! | R_X86_64_GLOB_DAT | 6 | word64 | S | -//! | R_X86_64_JUMP_SLOT | 7 | word64 | S | -//! | R_X86_64_RELATIVE | 8 | word64 | B + A | -//! | R_X86_64_GOTPCREL | 9 | word32 | G + GOT + A - P | -//! | R_X86_64_32 | 10 | word32 | S + A | -//! | R_X86_64_32S | 11 | word32 | S + A | -//! | R_X86_64_16 | 12 | word16 | S + A | -//! | R_X86_64_PC16 | 13 | word16 | S + A - P | -//! | R_X86_64_8 | 14 | word8 | S + A | -//! | R_X86_64_PC8 | 15 | word8 | S + A - P | -//! | R_X86_64_DTPMOD64 | 16 | word64 | | -//! | R_X86_64_DTPOFF64 | 17 | word64 | | -//! | R_X86_64_TPOFF64 | 18 | word64 | | -//! | R_X86_64_TLSGD | 19 | word32 | | -//! | R_X86_64_TLSLD | 20 | word32 | | -//! | R_X86_64_DTPOFF32 | 21 | word32 | | -//! | R_X86_64_GOTTPOFF | 22 | word32 | | -//! | R_X86_64_TPOFF32 | 23 | word32 | | -//! | R_X86_64_PC64 | 24 | word64 | S + A - P | -//! | R_X86_64_GOTOFF64 | 25 | word64 | S + A - GOT | -//! | R_X86_64_GOTPC32 | 26 | word32 | GOT + A - P | -//! | R_X86_64_SIZE32 | 32 | word32 | Z + A | -//! | R_X86_64_SIZE64 | 33 | word64 | Z + A | -//! | R_X86_64_GOTPC32_TLSDESC | 34 | word32 | | -//! | R_X86_64_TLSDESC_CALL | 35 | none| | -//! | R_X86_64_TLSDESC | 36 | word64×2 | | -//! | R_X86_64_IRELATIVE | 37 | word64 | indirect (B + A) | +//! | Relocation | Value | Size | Formula | +//! |:--------------------------|:------|:----------|:------------------| +//! | `R_X86_64_NONE` | 0 | NONE | NONE | +//! | `R_X86_64_64` | 1 | 64 | S + A | +//! | `R_X86_64_PC32` | 2 | 32 | S + A - P | +//! | `R_X86_64_GOT32` | 3 | 32 | G + A | +//! | `R_X86_64_PLT32` | 4 | 32 | L + A - P | +//! | `R_X86_64_COPY` | 5 | NONE | NONE | +//! | `R_X86_64_GLOB_DAT` | 6 | 64 | S | +//! | `R_X86_64_JUMP_SLOT` | 7 | 64 | S | +//! | `R_X86_64_RELATIVE` | 8 | 64 | B + A | +//! | `R_X86_64_GOTPCREL` | 9 | 32 | G + GOT + A - P | +//! | `R_X86_64_32` | 10 | 32 | S + A | +//! | `R_X86_64_32S` | 11 | 32 | S + A | +//! | `R_X86_64_16` | 12 | 16 | S + A | +//! | `R_X86_64_PC16` | 13 | 16 | S + A - P | +//! | `R_X86_64_8` | 14 | 8 | S + A | +//! | `R_X86_64_PC8` | 15 | 8 | S + A - P | +//! | `R_X86_64_DTPMOD64` | 16 | 64 | | +//! | `R_X86_64_DTPOFF64` | 17 | 64 | | +//! | `R_X86_64_TPOFF64` | 18 | 64 | | +//! | `R_X86_64_TLSGD` | 19 | 32 | | +//! | `R_X86_64_TLSLD` | 20 | 32 | | +//! | `R_X86_64_DTPOFF32` | 21 | 32 | | +//! | `R_X86_64_GOTTPOFF` | 22 | 32 | | +//! | `R_X86_64_TPOFF32` | 23 | 32 | | +//! | `R_X86_64_PC64` | 24 | 64 | S + A - P | +//! | `R_X86_64_GOTOFF64` | 25 | 64 | S + A - GOT | +//! | `R_X86_64_GOTPC32` | 26 | 32 | GOT + A - P | +//! | `R_X86_64_SIZE32` | 32 | 32 | Z + A | +//! | `R_X86_64_SIZE64` | 33 | 64 | Z + A | +//! | `R_X86_64_GOTPC32_TLSDESC` 34 | 32 | | +//! | `R_X86_64_TLSDESC_CALL` | 35 | NONE | | +//! | `R_X86_64_TLSDESC` | 36 | 64 × 2 | | +//! | `R_X86_64_IRELATIVE` | 37 | 64 | indirect (B + A) | //! //! TLS information is at http://people.redhat.com/aoliva/writeups/TLS/RFC-TLSDESC-x86.txt //! @@ -43,12 +62,16 @@ //! the value used in this relocation is the program address returned by the function, //! which takes no arguments, at the address of the result of the corresponding //! `R_X86_64_RELATIVE` relocation. +//! +//! Read more https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-54839.html include!("constants_relocation.rs"); macro_rules! elf_reloc { ($size:ident, $isize:ty) => { use core::fmt; + #[cfg(feature = "alloc")] + use scroll::{Pread, Pwrite, SizeWith}; #[repr(C)] #[derive(Clone, Copy, PartialEq, Default)] #[cfg_attr(feature = "alloc", derive(Pread, Pwrite, SizeWith))] @@ -79,24 +102,25 @@ macro_rules! elf_reloc { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let sym = r_sym(self.r_info); let typ = r_type(self.r_info); - write!(f, - "r_offset: {:x} r_typ: {} r_sym: {} r_addend: {:x}", - self.r_offset, - typ, - sym, - self.r_addend) + f.debug_struct("Rela") + .field("r_offset", &format_args!("{:x}", self.r_offset)) + .field("r_info", &format_args!("{:x}", self.r_info)) + .field("r_addend", &format_args!("{:x}", self.r_addend)) + .field("r_typ", &typ) + .field("r_sym", &sym) + .finish() } } impl fmt::Debug for Rel { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let sym = r_sym(self.r_info); let typ = r_type(self.r_info); - write!(f, - "r_offset: {:x} r_typ: {} r_sym: {}", - self.r_offset, - typ, - sym - ) + f.debug_struct("Rel") + .field("r_offset", &format_args!("{:x}", self.r_offset)) + .field("r_info", &format_args!("{:x}", self.r_info)) + .field("r_typ", &typ) + .field("r_sym", &sym) + .finish() } } }; @@ -105,12 +129,12 @@ macro_rules! elf_reloc { macro_rules! elf_rela_std_impl { ($size:ident, $isize:ty) => { if_alloc! { - use elf::reloc::Reloc; + use crate::elf::reloc::Reloc; use core::slice; if_std! { - use error::Result; + use crate::error::Result; use std::fs::File; use std::io::{Read, Seek}; @@ -120,8 +144,8 @@ macro_rules! elf_rela_std_impl { ($size:ident, $isize:ty) => { impl From for Reloc { fn from(rela: Rela) -> Self { Reloc { - r_offset: rela.r_offset as u64, - r_addend: Some(rela.r_addend as i64), + r_offset: u64::from(rela.r_offset), + r_addend: Some(i64::from(rela.r_addend)), r_sym: r_sym(rela.r_info) as usize, r_type: r_type(rela.r_info), } @@ -131,7 +155,7 @@ macro_rules! elf_rela_std_impl { ($size:ident, $isize:ty) => { impl From for Reloc { fn from(rel: Rel) -> Self { Reloc { - r_offset: rel.r_offset as u64, + r_offset: u64::from(rel.r_offset), r_addend: None, r_sym: r_sym(rel.r_info) as usize, r_type: r_type(rel.r_info), @@ -141,7 +165,7 @@ macro_rules! elf_rela_std_impl { ($size:ident, $isize:ty) => { impl From for Rela { fn from(rela: Reloc) -> Self { - let r_info = r_info(rela.r_sym as $size, rela.r_type as $size); + let r_info = r_info(rela.r_sym as $size, $size::from(rela.r_type)); Rela { r_offset: rela.r_offset as $size, r_info: r_info, @@ -152,7 +176,7 @@ macro_rules! elf_rela_std_impl { ($size:ident, $isize:ty) => { impl From for Rel { fn from(rel: Reloc) -> Self { - let r_info = r_info(rel.r_sym as $size, rel.r_type as $size); + let r_info = r_info(rel.r_sym as $size, $size::from(rel.r_type)); Rel { r_offset: rel.r_offset as $size, r_info: r_info, @@ -186,7 +210,7 @@ macro_rules! elf_rela_std_impl { ($size:ident, $isize:ty) => { let mut relocs = vec![Rela::default(); count]; fd.seek(Start(offset as u64))?; unsafe { - fd.read(plain::as_mut_bytes(&mut *relocs))?; + fd.read_exact(plain::as_mut_bytes(&mut *relocs))?; } Ok(relocs) } @@ -197,7 +221,7 @@ macro_rules! elf_rela_std_impl { ($size:ident, $isize:ty) => { pub mod reloc32 { - pub use elf::reloc::*; + pub use crate::elf::reloc::*; elf_reloc!(u32, i32); @@ -224,7 +248,7 @@ pub mod reloc32 { pub mod reloc64 { - pub use elf::reloc::*; + pub use crate::elf::reloc::*; elf_reloc!(u64, i64); @@ -238,7 +262,7 @@ pub mod reloc64 { #[inline(always)] pub fn r_type(info: u64) -> u32 { - (info & 0xffffffff) as u32 + (info & 0xffff_ffff) as u32 } #[inline(always)] @@ -253,12 +277,13 @@ pub mod reloc64 { // Generic Reloc ///////////////////////////// if_alloc! { + use scroll::{ctx, Pread}; + use scroll::ctx::SizeWith; use core::fmt; use core::result; - use scroll::ctx; - use container::{Ctx, Container}; + use crate::container::{Ctx, Container}; #[cfg(feature = "endian_fd")] - use alloc::vec::Vec; + use crate::alloc::vec::Vec; #[derive(Clone, Copy, PartialEq, Default)] /// A unified ELF relocation structure @@ -278,18 +303,6 @@ if_alloc! { use scroll::ctx::SizeWith; Reloc::size_with(&(is_rela, ctx)) } - #[cfg(feature = "endian_fd")] - pub fn parse(bytes: &[u8], mut offset: usize, filesz: usize, is_rela: bool, ctx: Ctx) -> ::error::Result> { - use scroll::Pread; - let count = filesz / Reloc::size(is_rela, ctx); - let mut relocs = Vec::with_capacity(count); - let offset = &mut offset; - for _ in 0..count { - let reloc = bytes.gread_with::(offset, (is_rela, ctx))?; - relocs.push(reloc); - } - Ok(relocs) - } } type RelocCtx = (bool, Ctx); @@ -309,7 +322,7 @@ if_alloc! { } impl<'a> ctx::TryFromCtx<'a, RelocCtx> for Reloc { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_from_ctx(bytes: &'a [u8], (is_rela, Ctx { container, le }): RelocCtx) -> result::Result<(Self, Self::Size), Self::Error> { use scroll::Pread; @@ -334,7 +347,7 @@ if_alloc! { } impl ctx::TryIntoCtx for Reloc { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; // TODO: I think this is a bad idea /// Writes the relocation into `bytes` @@ -373,22 +386,140 @@ if_alloc! { impl fmt::Debug for Reloc { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if let Some(addend) = self.r_addend { - write!(f, - "r_offset: {:x} r_typ: {} r_sym: {} r_addend: {:x}", - self.r_offset, - self.r_type, - self.r_sym, - addend, - ) + f.debug_struct("Reloc") + .field("r_offset", &format_args!("{:x}", self.r_offset)) + .field("r_addend", &format_args!("{:x}", self.r_addend.unwrap_or(0))) + .field("r_sym", &self.r_sym) + .field("r_type", &self.r_type) + .finish() + } + } + + #[derive(Default)] + /// An ELF section containing relocations, allowing lazy iteration over symbols. + pub struct RelocSection<'a> { + bytes: &'a [u8], + count: usize, + ctx: RelocCtx, + start: usize, + end: usize, + } + + impl<'a> fmt::Debug for RelocSection<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + let len = self.bytes.len(); + fmt.debug_struct("RelocSection") + .field("bytes", &len) + .field("range", &format!("{:#x}..{:#x}", self.start, self.end)) + .field("count", &self.count) + .field("Relocations", &self.to_vec()) + .finish() + } + } + + impl<'a> RelocSection<'a> { + #[cfg(feature = "endian_fd")] + /// Parse a REL or RELA section of size `filesz` from `offset`. + pub fn parse(bytes: &'a [u8], offset: usize, filesz: usize, is_rela: bool, ctx: Ctx) -> crate::error::Result> { + // TODO: better error message when too large (see symtab implementation) + let bytes = bytes.pread_with(offset, filesz)?; + + Ok(RelocSection { + bytes: bytes, + count: filesz / Reloc::size(is_rela, ctx), + ctx: (is_rela, ctx), + start: offset, + end: offset + filesz, + }) + } + + /// Try to parse a single relocation from the binary, at `index`. + #[inline] + pub fn get(&self, index: usize) -> Option { + if index >= self.count { + None } else { - write!(f, - "r_offset: {:x} r_typ: {} r_sym: {}", - self.r_offset, - self.r_type, - self.r_sym, - ) + Some(self.bytes.pread_with(index * Reloc::size_with(&self.ctx), self.ctx).unwrap()) + } + } + + /// The number of relocations in the section. + #[inline] + pub fn len(&self) -> usize { + self.count + } + + /// Returns true if section has no relocations. + #[inline] + pub fn is_empty(&self) -> bool { + self.count == 0 + } + + /// Iterate over all relocations. + pub fn iter(&self) -> RelocIterator<'a> { + self.into_iter() + } + + /// Parse all relocations into a vector. + pub fn to_vec(&self) -> Vec { + self.iter().collect() + } + } + + impl<'a, 'b> IntoIterator for &'b RelocSection<'a> { + type Item = as Iterator>::Item; + type IntoIter = RelocIterator<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + RelocIterator { + bytes: self.bytes, + offset: 0, + index: 0, + count: self.count, + ctx: self.ctx, } } } + + pub struct RelocIterator<'a> { + bytes: &'a [u8], + offset: usize, + index: usize, + count: usize, + ctx: RelocCtx, + } + + impl<'a> fmt::Debug for RelocIterator<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("RelocIterator") + .field("bytes", &"<... redacted ...>") + .field("offset", &self.offset) + .field("index", &self.index) + .field("count", &self.count) + .field("ctx", &self.ctx) + .finish() + } + } + + impl<'a> Iterator for RelocIterator<'a> { + type Item = Reloc; + + #[inline] + fn next(&mut self) -> Option { + if self.index >= self.count { + None + } else { + self.index += 1; + Some(self.bytes.gread_with(&mut self.offset, self.ctx).unwrap()) + } + } + } + + impl<'a> ExactSizeIterator for RelocIterator<'a> { + #[inline] + fn len(&self) -> usize { + self.count - self.index + } + } } // end if_alloc diff --git a/third_party/rust/goblin/src/elf/section_header.rs b/third_party/rust/goblin/src/elf/section_header.rs index 2834609a5a2d..2eb9143c6b60 100644 --- a/third_party/rust/goblin/src/elf/section_header.rs +++ b/third_party/rust/goblin/src/elf/section_header.rs @@ -1,5 +1,7 @@ macro_rules! elf_section_header { ($size:ident) => { + #[cfg(feature = "alloc")] + use scroll::{Pread, Pwrite, SizeWith}; #[repr(C)] #[derive(Copy, Clone, Eq, PartialEq, Default)] #[cfg_attr(feature = "alloc", derive(Pread, Pwrite, SizeWith))] @@ -35,19 +37,18 @@ macro_rules! elf_section_header { impl ::core::fmt::Debug for SectionHeader { fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result { - write!(f, - "sh_name: {} sh_type {} sh_flags: 0x{:x} sh_addr: 0x{:x} sh_offset: 0x{:x} \ - sh_size: 0x{:x} sh_link: 0x{:x} sh_info: 0x{:x} sh_addralign 0x{:x} sh_entsize 0x{:x}", - self.sh_name, - sht_to_str(self.sh_type as u32), - self.sh_flags, - self.sh_addr, - self.sh_offset, - self.sh_size, - self.sh_link, - self.sh_info, - self.sh_addralign, - self.sh_entsize) + f.debug_struct("SectionHeader") + .field("sh_name", &self.sh_name) + .field("sh_type", &sht_to_str(self.sh_type)) + .field("sh_flags", &format_args!("0x{:x}", self.sh_flags)) + .field("sh_addr", &format_args!("0x{:x}", self.sh_addr)) + .field("sh_offset", &format_args!("0x{:x}", self.sh_offset)) + .field("sh_size", &format_args!("0x{:x}", self.sh_size)) + .field("sh_link", &format_args!("0x{:x}", self.sh_link)) + .field("sh_info", &format_args!("0x{:x}", self.sh_info)) + .field("sh_addralign", &format_args!("0x{:x}", self.sh_addralign)) + .field("sh_entsize", &format_args!("0x{:x}", self.sh_entsize)) + .finish() } } } @@ -116,38 +117,38 @@ pub const SHT_SYMTAB_SHNDX: u32 = 18; /// Number of defined types. pub const SHT_NUM: u32 = 19; /// Start OS-specific. -pub const SHT_LOOS: u32 = 0x60000000; +pub const SHT_LOOS: u32 = 0x6000_0000; /// Object attributes. -pub const SHT_GNU_ATTRIBUTES: u32 = 0x6ffffff5; +pub const SHT_GNU_ATTRIBUTES: u32 = 0x6fff_fff5; /// GNU-style hash table. -pub const SHT_GNU_HASH: u32 = 0x6ffffff6; +pub const SHT_GNU_HASH: u32 = 0x6fff_fff6; /// Prelink library list. -pub const SHT_GNU_LIBLIST: u32 = 0x6ffffff7; +pub const SHT_GNU_LIBLIST: u32 = 0x6fff_fff7; /// Checksum for DSO content. -pub const SHT_CHECKSUM: u32 = 0x6ffffff8; +pub const SHT_CHECKSUM: u32 = 0x6fff_fff8; /// Sun-specific low bound. -pub const SHT_LOSUNW: u32 = 0x6ffffffa; -pub const SHT_SUNW_MOVE: u32 = 0x6ffffffa; -pub const SHT_SUNW_COMDAT: u32 = 0x6ffffffb; -pub const SHT_SUNW_SYMINFO: u32 = 0x6ffffffc; +pub const SHT_LOSUNW: u32 = 0x6fff_fffa; +pub const SHT_SUNW_MOVE: u32 = 0x6fff_fffa; +pub const SHT_SUNW_COMDAT: u32 = 0x6fff_fffb; +pub const SHT_SUNW_SYMINFO: u32 = 0x6fff_fffc; /// Version definition section. -pub const SHT_GNU_VERDEF: u32 = 0x6ffffffd; +pub const SHT_GNU_VERDEF: u32 = 0x6fff_fffd; /// Version needs section. -pub const SHT_GNU_VERNEED: u32 = 0x6ffffffe; +pub const SHT_GNU_VERNEED: u32 = 0x6fff_fffe; /// Version symbol table. -pub const SHT_GNU_VERSYM: u32 = 0x6fffffff; +pub const SHT_GNU_VERSYM: u32 = 0x6fff_ffff; /// Sun-specific high bound. -pub const SHT_HISUNW: u32 = 0x6fffffff; +pub const SHT_HISUNW: u32 = 0x6fff_ffff; /// End OS-specific type. -pub const SHT_HIOS: u32 = 0x6fffffff; +pub const SHT_HIOS: u32 = 0x6fff_ffff; /// Start of processor-specific. -pub const SHT_LOPROC: u32 = 0x70000000; +pub const SHT_LOPROC: u32 = 0x7000_0000; /// End of processor-specific. -pub const SHT_HIPROC: u32 = 0x7fffffff; +pub const SHT_HIPROC: u32 = 0x7fff_ffff; /// Start of application-specific. -pub const SHT_LOUSER: u32 = 0x80000000; +pub const SHT_LOUSER: u32 = 0x8000_0000; /// End of application-specific. -pub const SHT_HIUSER: u32 = 0x8fffffff; +pub const SHT_HIUSER: u32 = 0x8fff_ffff; // Legal values for sh_flags (section flags) /// Writable. @@ -173,9 +174,9 @@ pub const SHF_TLS: u32 = 0x400; /// Section with compressed data. pub const SHF_COMPRESSED: u32 = 0x800; /// OS-specific.. -pub const SHF_MASKOS: u32 = 0x0ff00000; +pub const SHF_MASKOS: u32 = 0x0ff0_0000; /// Processor-specific. -pub const SHF_MASKPROC: u32 = 0xf0000000; +pub const SHF_MASKPROC: u32 = 0xf000_0000; /// Special ordering requirement (Solaris). pub const SHF_ORDERED: u32 = 1 << 30; /// Number of "regular" section header flags @@ -259,7 +260,7 @@ pub fn shf_to_str(shf: u32) -> &'static str { macro_rules! elf_section_header_std_impl { ($size:ty) => { #[cfg(test)] - mod test { + mod tests { use super::*; #[test] fn size_of() { @@ -268,13 +269,13 @@ macro_rules! elf_section_header_std_impl { ($size:ty) => { } if_alloc! { - use elf::section_header::SectionHeader as ElfSectionHeader; + use crate::elf::section_header::SectionHeader as ElfSectionHeader; use plain::Plain; - use alloc::vec::Vec; + use crate::alloc::vec::Vec; if_std! { - use error::Result; + use crate::error::Result; use std::fs::File; use std::io::{Read, Seek}; @@ -286,14 +287,14 @@ macro_rules! elf_section_header_std_impl { ($size:ty) => { ElfSectionHeader { sh_name: sh.sh_name as usize, sh_type: sh.sh_type, - sh_flags: sh.sh_flags as u64, - sh_addr: sh.sh_addr as u64, - sh_offset: sh.sh_offset as u64, - sh_size: sh.sh_size as u64, + sh_flags: u64::from(sh.sh_flags), + sh_addr: u64::from(sh.sh_addr), + sh_offset: u64::from(sh.sh_offset), + sh_size: u64::from(sh.sh_size), sh_link: sh.sh_link, sh_info: sh.sh_info, - sh_addralign: sh.sh_addralign as u64, - sh_entsize: sh.sh_entsize as u64, + sh_addralign: u64::from(sh.sh_addralign), + sh_entsize: u64::from(sh.sh_entsize), } } } @@ -315,6 +316,7 @@ macro_rules! elf_section_header_std_impl { ($size:ty) => { } impl SectionHeader { + // FIXME: > 65535 sections pub fn from_bytes(bytes: &[u8], shnum: usize) -> Vec { let mut shdrs = vec![SectionHeader::default(); shnum]; shdrs.copy_from_bytes(bytes).expect("buffer is too short for given number of entries"); @@ -322,11 +324,12 @@ macro_rules! elf_section_header_std_impl { ($size:ty) => { } #[cfg(feature = "std")] + // FIXME: > 65535 sections pub fn from_fd(fd: &mut File, offset: u64, shnum: usize) -> Result> { let mut shdrs = vec![SectionHeader::default(); shnum]; - try!(fd.seek(Start(offset))); + fd.seek(Start(offset))?; unsafe { - try!(fd.read(plain::as_mut_bytes(&mut *shdrs))); + fd.read_exact(plain::as_mut_bytes(&mut *shdrs))?; } Ok(shdrs) } @@ -336,7 +339,7 @@ macro_rules! elf_section_header_std_impl { ($size:ty) => { pub mod section_header32 { - pub use elf::section_header::*; + pub use crate::elf::section_header::*; elf_section_header!(u32); @@ -348,7 +351,7 @@ pub mod section_header32 { pub mod section_header64 { - pub use elf::section_header::*; + pub use crate::elf::section_header::*; elf_section_header!(u64); @@ -362,15 +365,15 @@ pub mod section_header64 { /////////////////////////////// if_alloc! { - use error; + use crate::error; use core::fmt; use core::result; use core::ops::Range; use scroll::ctx; - use container::{Container, Ctx}; + use crate::container::{Container, Ctx}; #[cfg(feature = "endian_fd")] - use alloc::vec::Vec; + use crate::alloc::vec::Vec; #[derive(Default, PartialEq, Clone)] /// A unified SectionHeader - convertable to and from 32-bit and 64-bit variants @@ -400,15 +403,15 @@ if_alloc! { impl SectionHeader { /// Return the size of the underlying program header, given a `container` #[inline] - pub fn size(ctx: &Ctx) -> usize { + pub fn size(ctx: Ctx) -> usize { use scroll::ctx::SizeWith; - Self::size_with(ctx) + Self::size_with(&ctx) } pub fn new() -> Self { SectionHeader { sh_name: 0, sh_type: SHT_PROGBITS, - sh_flags: SHF_ALLOC as u64, + sh_flags: u64::from(SHF_ALLOC), sh_addr: 0, sh_offset: 0, sh_size: 0, @@ -431,7 +434,13 @@ if_alloc! { pub fn parse(bytes: &[u8], mut offset: usize, count: usize, ctx: Ctx) -> error::Result> { use scroll::Pread; let mut section_headers = Vec::with_capacity(count); - for _ in 0..count { + let mut nsection_headers = count; + let empty_sh = bytes.gread_with::(&mut offset, ctx)?; + if count == 0 as usize { + nsection_headers = empty_sh.sh_size as usize; + } + section_headers.push(empty_sh); + for _ in 1..nsection_headers { let shdr = bytes.gread_with(&mut offset, ctx)?; section_headers.push(shdr); } @@ -465,19 +474,18 @@ if_alloc! { impl fmt::Debug for SectionHeader { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, - "sh_name: {} sh_type {} sh_flags: 0x{:x} sh_addr: 0x{:x} sh_offset: 0x{:x} \ - sh_size: 0x{:x} sh_link: 0x{:x} sh_info: 0x{:x} sh_addralign 0x{:x} sh_entsize 0x{:x}", - self.sh_name, - sht_to_str(self.sh_type as u32), - self.sh_flags, - self.sh_addr, - self.sh_offset, - self.sh_size, - self.sh_link, - self.sh_info, - self.sh_addralign, - self.sh_entsize) + f.debug_struct("SectionHeader") + .field("sh_name", &self.sh_name) + .field("sh_type", &sht_to_str(self.sh_type)) + .field("sh_flags", &format_args!("0x{:x}", self.sh_flags)) + .field("sh_addr", &format_args!("0x{:x}", self.sh_addr)) + .field("sh_offset", &format_args!("0x{:x}", self.sh_offset)) + .field("sh_size", &format_args!("0x{:x}", self.sh_size)) + .field("sh_link", &format_args!("0x{:x}", self.sh_link)) + .field("sh_info", &format_args!("0x{:x}", self.sh_info)) + .field("sh_addralign", &format_args!("0x{:x}", self.sh_addralign)) + .field("sh_entsize", &format_args!("0x{:x}", self.sh_entsize)) + .finish() } } @@ -496,7 +504,7 @@ if_alloc! { } impl<'a> ctx::TryFromCtx<'a, Ctx> for SectionHeader { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_from_ctx(bytes: &'a [u8], Ctx {container, le}: Ctx) -> result::Result<(Self, Self::Size), Self::Error> { use scroll::Pread; @@ -513,7 +521,7 @@ if_alloc! { } impl ctx::TryIntoCtx for SectionHeader { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_into_ctx(self, bytes: &mut [u8], Ctx {container, le}: Ctx) -> result::Result { use scroll::Pwrite; diff --git a/third_party/rust/goblin/src/elf/sym.rs b/third_party/rust/goblin/src/elf/sym.rs index ce620e3fd2cd..0a5949042942 100644 --- a/third_party/rust/goblin/src/elf/sym.rs +++ b/third_party/rust/goblin/src/elf/sym.rs @@ -46,9 +46,31 @@ pub const STT_LOPROC: u8 = 13; /// End of processor-specific. pub const STT_HIPROC: u8 = 15; +/// === Sym visibility === +/// Default: Visibility is specified by the symbol's binding type +pub const STV_DEFAULT: u8 = 0; +/// Internal: use of this attribute is currently reserved. +pub const STV_INTERNAL: u8 = 1; +/// Hidden: Not visible to other components, necessarily protected. Binding scope becomes local +/// when the object is included in an executable or shared object. +pub const STV_HIDDEN: u8 = 2; +/// Protected: Symbol defined in current component is visible in other components, but cannot be preempted. +/// Any reference from within the defining component must be resolved to the definition in that +/// component. +pub const STV_PROTECTED: u8 = 3; +/// Exported: ensures a symbol remains global, cannot be demoted or eliminated by any other symbol +/// visibility technique. +pub const STV_EXPORTED: u8 = 4; +/// Singleton: ensures a symbol remains global, and that a single instance of the definition is +/// bound to by all references within a process. Cannot be demoted or eliminated. +pub const STV_SINGLETON: u8 = 5; +/// Eliminate: extends the hidden attribute. Not written in any symbol table of a dynamic +/// executable or shared object. +pub const STV_ELIMINATE: u8 = 6; + /// Get the ST bind. /// -/// This is the first four bits of the byte. +/// This is the first four bits of the "info" byte. #[inline] pub fn st_bind(info: u8) -> u8 { info >> 4 @@ -56,12 +78,20 @@ pub fn st_bind(info: u8) -> u8 { /// Get the ST type. /// -/// This is the last four bits of the byte. +/// This is the last four bits of the "info" byte. #[inline] pub fn st_type(info: u8) -> u8 { info & 0xf } +/// Get the ST visibility. +/// +/// This is the last three bits of the "other" byte. +#[inline] +pub fn st_visibility(other: u8) -> u8 { + other & 0x7 +} + /// Is this information defining an import? #[inline] pub fn is_import(info: u8, value: u64) -> bool { @@ -105,11 +135,27 @@ pub fn type_to_str(typ: u8) -> &'static str { } } +/// Get the string for some visibility +#[inline] +pub fn visibility_to_str(typ: u8) -> &'static str { + match typ { + STV_DEFAULT => "DEFAULT", + STV_INTERNAL => "INTERNAL", + STV_HIDDEN => "HIDDEN", + STV_PROTECTED => "PROTECTED", + STV_EXPORTED => "EXPORTED", + STV_SINGLETON => "SINGLETON", + STV_ELIMINATE => "ELIMINATE", + _ => "UNKNOWN_STV", + } +} + + macro_rules! elf_sym_std_impl { ($size:ty) => { #[cfg(test)] - mod test { + mod tests { use super::*; #[test] fn size_of() { @@ -118,13 +164,13 @@ macro_rules! elf_sym_std_impl { } if_alloc! { - use elf::sym::Sym as ElfSym; + use crate::elf::sym::Sym as ElfSym; use core::fmt; use core::slice; if_std! { - use error::Result; + use crate::error::Result; use std::fs::File; use std::io::{Read, Seek}; @@ -153,8 +199,8 @@ macro_rules! elf_sym_std_impl { st_info: sym.st_info, st_other: sym.st_other, st_shndx: sym.st_shndx as usize, - st_value: sym.st_value as u64, - st_size: sym.st_size as u64, + st_value: u64::from(sym.st_value), + st_size: u64::from(sym.st_size), } } } @@ -177,15 +223,15 @@ macro_rules! elf_sym_std_impl { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let bind = st_bind(self.st_info); let typ = st_type(self.st_info); - write!(f, - "st_name: {} {} {} st_other: {} st_shndx: {} st_value: {:x} st_size: {}", - self.st_name, - bind_to_str(bind), - type_to_str(typ), - self.st_other, - self.st_shndx, - self.st_value, - self.st_size) + let vis = st_visibility(self.st_other); + f.debug_struct("Sym") + .field("st_name", &self.st_name) + .field("st_value", &format_args!("{:x}", self.st_value)) + .field("st_size", &self.st_size) + .field("st_info", &format_args!("{:x} {} {}", self.st_info, bind_to_str(bind), type_to_str(typ))) + .field("st_other", &format_args!("{} {}", self.st_other, visibility_to_str(vis))) + .field("st_shndx", &self.st_shndx) + .finish() } } @@ -198,9 +244,9 @@ macro_rules! elf_sym_std_impl { pub fn from_fd(fd: &mut File, offset: usize, count: usize) -> Result> { // TODO: AFAIK this shouldn't work, since i pass in a byte size... let mut syms = vec![Sym::default(); count]; - try!(fd.seek(Start(offset as u64))); + fd.seek(Start(offset as u64))?; unsafe { - try!(fd.read(plain::as_mut_bytes(&mut *syms))); + fd.read_exact(plain::as_mut_bytes(&mut *syms))?; } syms.dedup(); Ok(syms) @@ -209,8 +255,11 @@ macro_rules! elf_sym_std_impl { }; } +#[cfg(feature = "alloc")] +use scroll::{Pread, Pwrite, SizeWith}; + pub mod sym32 { - pub use elf::sym::*; + pub use crate::elf::sym::*; #[repr(C)] #[derive(Clone, Copy, PartialEq, Default)] @@ -241,7 +290,7 @@ pub mod sym32 { } pub mod sym64 { - pub use elf::sym::*; + pub use crate::elf::sym::*; #[repr(C)] #[derive(Clone, Copy, PartialEq, Default)] @@ -272,13 +321,13 @@ pub mod sym64 { } if_alloc! { - use scroll::{ctx, Pread}; + use scroll::ctx; use scroll::ctx::SizeWith; use core::fmt::{self, Debug}; use core::result; - use container::{Ctx, Container}; - use error::Result; - use alloc::vec::Vec; + use crate::container::{Ctx, Container}; + use crate::error::Result; + use crate::alloc::vec::Vec; #[derive(Default, PartialEq, Clone)] /// A unified Sym definition - convertable to and from 32-bit and 64-bit variants @@ -310,17 +359,24 @@ if_alloc! { } /// Get the ST bind. /// - /// This is the first four bits of the byte. + /// This is the first four bits of the "info" byte. #[inline] pub fn st_bind(&self) -> u8 { self.st_info >> 4 } /// Get the ST type. /// - /// This is the last four bits of the byte. + /// This is the last four bits of the "info" byte. #[inline] pub fn st_type(&self) -> u8 { - self.st_info & 0xf + st_type(self.st_info) + } + /// Get the ST visibility. + /// + /// This is the last three bits of the "other" byte. + #[inline] + pub fn st_visibility(&self) -> u8 { + st_visibility(self.st_other) } #[cfg(feature = "endian_fd")] /// Parse `count` vector of ELF symbols from `offset` @@ -339,15 +395,15 @@ if_alloc! { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let bind = self.st_bind(); let typ = self.st_type(); - write!(f, - "st_name: {} {} {} st_other: {} st_shndx: {} st_value: {:x} st_size: {}", - self.st_name, - bind_to_str(bind), - type_to_str(typ), - self.st_other, - self.st_shndx, - self.st_value, - self.st_size) + let vis = self.st_visibility(); + f.debug_struct("Sym") + .field("st_name", &self.st_name) + .field("st_info", &format_args!("0x{:x} {} {}", self.st_info, bind_to_str(bind), type_to_str(typ))) + .field("st_other", &format_args!("{} {}", self.st_other, visibility_to_str(vis))) + .field("st_shndx", &self.st_shndx) + .field("st_value", &format_args!("0x{:x}", self.st_value)) + .field("st_size", &self.st_size) + .finish() } } @@ -367,7 +423,7 @@ if_alloc! { } impl<'a> ctx::TryFromCtx<'a, Ctx> for Sym { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; #[inline] fn try_from_ctx(bytes: &'a [u8], Ctx { container, le}: Ctx) -> result::Result<(Self, Self::Size), Self::Error> { @@ -385,7 +441,7 @@ if_alloc! { } impl ctx::TryIntoCtx for Sym { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; #[inline] fn try_into_ctx(self, bytes: &mut [u8], Ctx {container, le}: Ctx) -> result::Result { @@ -445,7 +501,11 @@ if_alloc! { impl<'a> Symtab<'a> { /// Parse a table of `count` ELF symbols from `offset`. pub fn parse(bytes: &'a [u8], offset: usize, count: usize, ctx: Ctx) -> Result> { - let size = count * Sym::size_with(&ctx); + let size = count + .checked_mul(Sym::size_with(&ctx)) + .ok_or_else(|| crate::error::Error::Malformed( + format!("Too many ELF symbols (offset {:#x}, count {})", offset, count) + ))?; // TODO: make this a better error message when too large let bytes = bytes.pread_with(offset, size)?; Ok(Symtab { bytes, count, ctx, start: offset, end: offset+size }) @@ -467,6 +527,12 @@ if_alloc! { self.count } + /// Returns true if table has no symbols. + #[inline] + pub fn is_empty(&self) -> bool { + self.count == 0 + } + /// Iterate over all symbols. #[inline] pub fn iter(&self) -> SymIterator<'a> { diff --git a/third_party/rust/goblin/src/error.rs b/third_party/rust/goblin/src/error.rs index 17b9e2046f2a..b7f4c195f578 100644 --- a/third_party/rust/goblin/src/error.rs +++ b/third_party/rust/goblin/src/error.rs @@ -3,8 +3,8 @@ use scroll; use core::result; -use core::fmt::{self, Display}; -use alloc::string::String; +use core::fmt; +use crate::alloc::string::String; #[cfg(feature = "std")] use std::{error, io}; @@ -22,29 +22,14 @@ pub enum Error { IO(io::Error), } -impl Error { - pub fn description(&self) -> &str { - match *self { - #[cfg(feature = "std")] - Error::IO(_) => { "IO error" } - Error::Scroll(_) => { "Scroll error" } - Error::BadMagic(_) => { "Invalid magic number" } - Error::Malformed(_) => { "Entity is malformed in some way" } - } - } -} - #[cfg(feature = "std")] impl error::Error for Error { - fn description(&self) -> &str { - Error::description(self) - } - fn cause(&self) -> Option<&error::Error> { + fn source(&self) -> Option<&(dyn error::Error + 'static)> { match *self { - Error::IO(ref io) => { io.cause() } - Error::Scroll(ref scroll) => { scroll.cause() } - Error::BadMagic(_) => { None } - Error::Malformed(_) => { None } + Error::IO(ref io) => Some(io), + Error::Scroll(ref scroll) => Some(scroll), + Error::BadMagic(_) => None, + Error::Malformed(_) => None, } } } @@ -62,14 +47,14 @@ impl From for Error { } } -impl Display for Error { +impl fmt::Display for Error { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { match *self { #[cfg(feature = "std")] - Error::IO(ref err) => { write!(fmt, "{}", err) }, - Error::Scroll(ref err) => { write!(fmt, "{}", err) }, - Error::BadMagic(magic) => { write! (fmt, "Invalid magic number: 0x{:x}", magic) }, - Error::Malformed(ref msg) => { write! (fmt, "Malformed entity: {}", msg) }, + Error::IO(ref err) => write!(fmt, "{}", err), + Error::Scroll(ref err) => write!(fmt, "{}", err), + Error::BadMagic(magic) => write!(fmt, "Invalid magic number: 0x{:x}", magic), + Error::Malformed(ref msg) => write!(fmt, "Malformed entity: {}", msg), } } } diff --git a/third_party/rust/goblin/src/lib.rs b/third_party/rust/goblin/src/lib.rs index 3571124ab442..6db1127423a5 100644 --- a/third_party/rust/goblin/src/lib.rs +++ b/third_party/rust/goblin/src/lib.rs @@ -10,7 +10,7 @@ //! * A PE32/PE32+ (64-bit) parser, and raw C structs //! * A Unix archive parser and loader //! -//! Goblin _should_ require at least `rustc` 1.16, but is developed on stable. +//! Goblin requires at least `rustc` 1.31.1, uses the 2018 rust edition, and is developed on stable. //! //! Goblin primarily supports the following important use cases: //! @@ -81,14 +81,6 @@ #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(all(feature = "alloc", not(feature = "std")), feature(alloc))] -extern crate plain; -#[cfg_attr(feature = "alloc", macro_use)] -extern crate scroll; - -#[cfg(feature = "log")] -#[macro_use] -extern crate log; - #[cfg(feature = "std")] extern crate core; @@ -133,7 +125,7 @@ pub mod strtab; /// Binary container size information and byte-order context pub mod container { use scroll; - pub use scroll::Endian as Endian; + pub use scroll::Endian; #[derive(Debug, Copy, Clone, PartialEq)] /// The size of a binary container @@ -144,8 +136,8 @@ pub mod container { impl Container { /// Is this a 64-bit container or not? - pub fn is_big(&self) -> bool { - *self == Container::Big + pub fn is_big(self) -> bool { + self == Container::Big } } @@ -173,19 +165,19 @@ pub mod container { impl Ctx { /// Whether this binary container context is "big" or not - pub fn is_big(&self) -> bool { + pub fn is_big(self) -> bool { self.container.is_big() } /// Whether this binary container context is little endian or not - pub fn is_little_endian(&self) -> bool { + pub fn is_little_endian(self) -> bool { self.le.is_little() } /// Create a new binary container context pub fn new (container: Container, le: scroll::Endian) -> Self { - Ctx { container: container, le: le } + Ctx { container, le } } /// Return a dubious pointer/address byte size for the container - pub fn size(&self) -> usize { + pub fn size(self) -> usize { match self.container { // TODO: require pointer size initialization/setting or default to container size with these values, e.g., avr pointer width will be smaller iirc Container::Little => 4, @@ -196,13 +188,13 @@ pub mod container { impl From for Ctx { fn from(container: Container) -> Self { - Ctx { container: container, le: scroll::Endian::default() } + Ctx { container, le: scroll::Endian::default() } } } impl From for Ctx { fn from(le: scroll::Endian) -> Self { - Ctx { container: CONTAINER, le: le } + Ctx { container: CONTAINER, le } } } @@ -244,7 +236,7 @@ if_everything! { /// Peeks at `bytes`, and returns a `Hint` pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result { use scroll::{Pread, LE, BE}; - use mach::{fat, header}; + use crate::mach::{fat, header}; if &bytes[0..elf::header::SELFMAG] == elf::header::ELFMAG { let class = bytes[elf::header::EI_CLASS]; let is_lsb = bytes[elf::header::EI_DATA] == elf::header::ELFDATA2LSB; @@ -272,7 +264,7 @@ if_everything! { if let Some(ctx) = maybe_ctx { Ok(Hint::Mach(HintData { is_lsb: ctx.le.is_little(), is_64: Some(ctx.container.is_big()) })) } else { - Err(error::Error::Malformed(format!("Correct mach magic {:#x} does not have a matching parsing context!", magic).into())) + Err(error::Error::Malformed(format!("Correct mach magic {:#x} does not have a matching parsing context!", magic))) } }, // its something else @@ -293,6 +285,7 @@ if_everything! { } #[derive(Debug)] + #[allow(clippy::large_enum_variant)] /// A parseable object that goblin understands pub enum Object<'a> { /// An ELF32/ELF64! @@ -335,15 +328,16 @@ pub mod elf; #[cfg(feature = "elf32")] /// The ELF 32-bit struct definitions and associated values, re-exported for easy "type-punning" pub mod elf32 { - pub use elf::header::header32 as header; - pub use elf::program_header::program_header32 as program_header; - pub use elf::section_header::section_header32 as section_header; - pub use elf::dyn::dyn32 as dyn; - pub use elf::sym::sym32 as sym; - pub use elf::reloc::reloc32 as reloc; - pub use elf::note::Nhdr32 as Note; + pub use crate::elf::header::header32 as header; + pub use crate::elf::program_header::program_header32 as program_header; + pub use crate::elf::section_header::section_header32 as section_header; + pub use crate::elf::dynamic::dyn32 as dynamic; + pub use crate::elf::sym::sym32 as sym; + pub use crate::elf::reloc::reloc32 as reloc; + pub use crate::elf::note::Nhdr32 as Note; pub mod gnu_hash { + pub use crate::elf::gnu_hash::hash; elf_gnu_hash_impl!(u32); } } @@ -351,15 +345,16 @@ pub mod elf32 { #[cfg(feature = "elf64")] /// The ELF 64-bit struct definitions and associated values, re-exported for easy "type-punning" pub mod elf64 { - pub use elf::header::header64 as header; - pub use elf::program_header::program_header64 as program_header; - pub use elf::section_header::section_header64 as section_header; - pub use elf::dyn::dyn64 as dyn; - pub use elf::sym::sym64 as sym; - pub use elf::reloc::reloc64 as reloc; - pub use elf::note::Nhdr64 as Note; + pub use crate::elf::header::header64 as header; + pub use crate::elf::program_header::program_header64 as program_header; + pub use crate::elf::section_header::section_header64 as section_header; + pub use crate::elf::dynamic::dyn64 as dynamic; + pub use crate::elf::sym::sym64 as sym; + pub use crate::elf::reloc::reloc64 as reloc; + pub use crate::elf::note::Nhdr64 as Note; pub mod gnu_hash { + pub use crate::elf::gnu_hash::hash; elf_gnu_hash_impl!(u64); } } diff --git a/third_party/rust/goblin/src/mach/constants.rs b/third_party/rust/goblin/src/mach/constants.rs index d51ba1db0cf4..b5afc427ec62 100644 --- a/third_party/rust/goblin/src/mach/constants.rs +++ b/third_party/rust/goblin/src/mach/constants.rs @@ -1,36 +1,36 @@ //! Miscellaneous constants used inside of and when constructing, Mach-o binaries // Convienence constants for return values from dyld_get_sdk_version() and friends. -pub const DYLD_MACOSX_VERSION_10_4: u32 = 0x000A0400; -pub const DYLD_MACOSX_VERSION_10_5: u32 = 0x000A0500; -pub const DYLD_MACOSX_VERSION_10_6: u32 = 0x000A0600; -pub const DYLD_MACOSX_VERSION_10_7: u32 = 0x000A0700; -pub const DYLD_MACOSX_VERSION_10_8: u32 = 0x000A0800; -pub const DYLD_MACOSX_VERSION_10_9: u32 = 0x000A0900; -pub const DYLD_MACOSX_VERSION_10_10: u32 = 0x000A0A00; -pub const DYLD_MACOSX_VERSION_10_11: u32 = 0x000A0B00; -pub const DYLD_MACOSX_VERSION_10_12: u32 = 0x000A0C00; -pub const DYLD_MACOSX_VERSION_10_13: u32 = 0x000A0D00; +pub const DYLD_MACOSX_VERSION_10_4: u32 = 0x000A_0400; +pub const DYLD_MACOSX_VERSION_10_5: u32 = 0x000A_0500; +pub const DYLD_MACOSX_VERSION_10_6: u32 = 0x000A_0600; +pub const DYLD_MACOSX_VERSION_10_7: u32 = 0x000A_0700; +pub const DYLD_MACOSX_VERSION_10_8: u32 = 0x000A_0800; +pub const DYLD_MACOSX_VERSION_10_9: u32 = 0x000A_0900; +pub const DYLD_MACOSX_VERSION_10_10: u32 = 0x000A_0A00; +pub const DYLD_MACOSX_VERSION_10_11: u32 = 0x000A_0B00; +pub const DYLD_MACOSX_VERSION_10_12: u32 = 0x000A_0C00; +pub const DYLD_MACOSX_VERSION_10_13: u32 = 0x000A_0D00; -pub const DYLD_IOS_VERSION_2_0: u32 = 0x00020000; -pub const DYLD_IOS_VERSION_2_1: u32 = 0x00020100; -pub const DYLD_IOS_VERSION_2_2: u32 = 0x00020200; -pub const DYLD_IOS_VERSION_3_0: u32 = 0x00030000; -pub const DYLD_IOS_VERSION_3_1: u32 = 0x00030100; -pub const DYLD_IOS_VERSION_3_2: u32 = 0x00030200; -pub const DYLD_IOS_VERSION_4_0: u32 = 0x00040000; -pub const DYLD_IOS_VERSION_4_1: u32 = 0x00040100; -pub const DYLD_IOS_VERSION_4_2: u32 = 0x00040200; -pub const DYLD_IOS_VERSION_4_3: u32 = 0x00040300; -pub const DYLD_IOS_VERSION_5_0: u32 = 0x00050000; -pub const DYLD_IOS_VERSION_5_1: u32 = 0x00050100; -pub const DYLD_IOS_VERSION_6_0: u32 = 0x00060000; -pub const DYLD_IOS_VERSION_6_1: u32 = 0x00060100; -pub const DYLD_IOS_VERSION_7_0: u32 = 0x00070000; -pub const DYLD_IOS_VERSION_7_1: u32 = 0x00070100; -pub const DYLD_IOS_VERSION_8_0: u32 = 0x00080000; -pub const DYLD_IOS_VERSION_9_0: u32 = 0x00090000; -pub const DYLD_IOS_VERSION_10_0: u32 = 0x000A0000; -pub const DYLD_IOS_VERSION_11_0: u32 = 0x000B0000; +pub const DYLD_IOS_VERSION_2_0: u32 = 0x0002_0000; +pub const DYLD_IOS_VERSION_2_1: u32 = 0x0002_0100; +pub const DYLD_IOS_VERSION_2_2: u32 = 0x0002_0200; +pub const DYLD_IOS_VERSION_3_0: u32 = 0x0003_0000; +pub const DYLD_IOS_VERSION_3_1: u32 = 0x0003_0100; +pub const DYLD_IOS_VERSION_3_2: u32 = 0x0003_0200; +pub const DYLD_IOS_VERSION_4_0: u32 = 0x0004_0000; +pub const DYLD_IOS_VERSION_4_1: u32 = 0x0004_0100; +pub const DYLD_IOS_VERSION_4_2: u32 = 0x0004_0200; +pub const DYLD_IOS_VERSION_4_3: u32 = 0x0004_0300; +pub const DYLD_IOS_VERSION_5_0: u32 = 0x0005_0000; +pub const DYLD_IOS_VERSION_5_1: u32 = 0x0005_0100; +pub const DYLD_IOS_VERSION_6_0: u32 = 0x0006_0000; +pub const DYLD_IOS_VERSION_6_1: u32 = 0x0006_0100; +pub const DYLD_IOS_VERSION_7_0: u32 = 0x0007_0000; +pub const DYLD_IOS_VERSION_7_1: u32 = 0x0007_0100; +pub const DYLD_IOS_VERSION_8_0: u32 = 0x0008_0000; +pub const DYLD_IOS_VERSION_9_0: u32 = 0x0009_0000; +pub const DYLD_IOS_VERSION_10_0: u32 = 0x000A_0000; +pub const DYLD_IOS_VERSION_11_0: u32 = 0x000B_0000; // Segment and Section Constants @@ -39,9 +39,9 @@ pub const DYLD_IOS_VERSION_11_0: u32 = 0x000B0000; // can only have one type) but the section attributes are not (it may have more // than one attribute). /// 256 section types -pub const SECTION_TYPE: u32 = 0x000000ff; +pub const SECTION_TYPE: u32 = 0x0000_00ff; /// 24 section attributes -pub const SECTION_ATTRIBUTES: u32 = 0xffffff00; +pub const SECTION_ATTRIBUTES: u32 = 0xffff_ff00; // Constants for the type of a section /// regular section @@ -106,19 +106,19 @@ pub const S_THREAD_LOCAL_INIT_FUNCTION_POINTERS: u32 = 0x15; // Constants for the section attributes part of the flags field of a section // structure. /// User setable attributes -pub const SECTION_ATTRIBUTES_USR: u32 = 0xff000000; +pub const SECTION_ATTRIBUTES_USR: u32 = 0xff00_0000; /// section contains only true machine instructions -pub const S_ATTR_PURE_INSTRUCTIONS: u32 = 0x80000000; +pub const S_ATTR_PURE_INSTRUCTIONS: u32 = 0x8000_0000; /// section contains coalesced symbols that are not to be in a ranlib table of contents -pub const S_ATTR_NO_TOC: u32 = 0x40000000; +pub const S_ATTR_NO_TOC: u32 = 0x4000_0000; /// ok to strip static symbols in this section in files with the MH_DYLDLINK flag -pub const S_ATTR_STRIP_STATIC_SYMS: u32 = 0x20000000; +pub const S_ATTR_STRIP_STATIC_SYMS: u32 = 0x2000_0000; /// no dead stripping -pub const S_ATTR_NO_DEAD_STRIP: u32 = 0x10000000; +pub const S_ATTR_NO_DEAD_STRIP: u32 = 0x1000_0000; /// blocks are live if they reference live blocks -pub const S_ATTR_LIVE_SUPPORT: u32 = 0x08000000; +pub const S_ATTR_LIVE_SUPPORT: u32 = 0x0800_0000; /// Used with i386 code stubs written on by dyld -pub const S_ATTR_SELF_MODIFYING_CODE: u32 = 0x04000000; +pub const S_ATTR_SELF_MODIFYING_CODE: u32 = 0x0400_0000; // If a segment contains any sections marked with S_ATTR_DEBUG then all // sections in that segment must have this attribute. No section other than @@ -128,15 +128,15 @@ pub const S_ATTR_SELF_MODIFYING_CODE: u32 = 0x04000000; // from sections with this attribute into its output file. These sections // generally contain DWARF debugging info. /// debug section -pub const S_ATTR_DEBUG: u32 = 0x02000000; +pub const S_ATTR_DEBUG: u32 = 0x0200_0000; /// system setable attributes -pub const SECTION_ATTRIBUTES_SYS: u32 = 0x00ffff00; +pub const SECTION_ATTRIBUTES_SYS: u32 = 0x00ff_ff00; /// section contains some machine instructions -pub const S_ATTR_SOME_INSTRUCTIONS: u32 = 0x00000400; +pub const S_ATTR_SOME_INSTRUCTIONS: u32 = 0x0000_0400; /// section has external relocation entries -pub const S_ATTR_EXT_RELOC: u32 = 0x00000200; +pub const S_ATTR_EXT_RELOC: u32 = 0x0000_0200; /// section has local relocation entries -pub const S_ATTR_LOC_RELOC: u32 = 0x00000100; +pub const S_ATTR_LOC_RELOC: u32 = 0x0000_0100; // The names of segments and sections in them are mostly meaningless to the // link-editor. But there are few things to support traditional UNIX @@ -150,45 +150,52 @@ pub const S_ATTR_LOC_RELOC: u32 = 0x00000100; // The currently known segment names and the section names in those segments /// the pagezero segment which has no protections and catches NULL references for MH_EXECUTE files -pub const SEG_PAGEZERO: &'static str = "__PAGEZERO"; +pub const SEG_PAGEZERO: &str = "__PAGEZERO"; /// the tradition UNIX text segment -pub const SEG_TEXT: &'static str = "__TEXT"; +pub const SEG_TEXT: &str = "__TEXT"; /// the real text part of the text section no headers, and no padding -pub const SECT_TEXT: &'static str = "__text"; +pub const SECT_TEXT: &str = "__text"; /// the fvmlib initialization section -pub const SECT_FVMLIB_INIT0: &'static str = "__fvmlib_init0"; +pub const SECT_FVMLIB_INIT0: &str = "__fvmlib_init0"; /// the section following the fvmlib initialization section -pub const SECT_FVMLIB_INIT1: &'static str = "__fvmlib_init1"; +pub const SECT_FVMLIB_INIT1: &str = "__fvmlib_init1"; /// the tradition UNIX data segment -pub const SEG_DATA: &'static str = "__DATA"; +pub const SEG_DATA: &str = "__DATA"; /// the real initialized data section no padding, no bss overlap -pub const SECT_DATA: &'static str = "__data"; +pub const SECT_DATA: &str = "__data"; /// the real uninitialized data sectionno padding -pub const SECT_BSS: &'static str = "__bss"; +pub const SECT_BSS: &str = "__bss"; /// the section common symbols are allocated in by the link editor -pub const SECT_COMMON: &'static str = "__common"; +pub const SECT_COMMON: &str = "__common"; /// objective-C runtime segment -pub const SEG_OBJC: &'static str = "__OBJC"; +pub const SEG_OBJC: &str = "__OBJC"; /// symbol table -pub const SECT_OBJC_SYMBOLS: &'static str = "__symbol_table"; +pub const SECT_OBJC_SYMBOLS: &str = "__symbol_table"; /// module information -pub const SECT_OBJC_MODULES: &'static str = "__module_info"; +pub const SECT_OBJC_MODULES: &str = "__module_info"; /// string table -pub const SECT_OBJC_STRINGS: &'static str = "__selector_strs"; +pub const SECT_OBJC_STRINGS: &str = "__selector_strs"; /// string table -pub const SECT_OBJC_REFS: &'static str = "__selector_refs"; +pub const SECT_OBJC_REFS: &str = "__selector_refs"; /// the icon segment -pub const SEG_ICON: &'static str = "__ICON"; +pub const SEG_ICON: &str = "__ICON"; /// the icon headers -pub const SECT_ICON_HEADER: &'static str = "__header"; +pub const SECT_ICON_HEADER: &str = "__header"; /// the icons in tiff format -pub const SECT_ICON_TIFF: &'static str = "__tiff"; +pub const SECT_ICON_TIFF: &str = "__tiff"; /// the segment containing all structs created and maintained by the link editor. Created with -seglinkedit option to ld(1) for MH_EXECUTE and FVMLIB file types only -pub const SEG_LINKEDIT: &'static str = "__LINKEDIT"; +pub const SEG_LINKEDIT: &str = "__LINKEDIT"; /// the unix stack segment -pub const SEG_UNIXSTACK: &'static str = "__UNIXSTACK"; +pub const SEG_UNIXSTACK: &str = "__UNIXSTACK"; /// the segment for the self (dyld) modifing code stubs that has read, write and execute permissions -pub const SEG_IMPORT: &'static str = "__IMPORT"; +pub const SEG_IMPORT: &str = "__IMPORT"; + +/// Segment is readable. +pub const VM_PROT_READ: u32 = 0x1; +/// Segment is writable. +pub const VM_PROT_WRITE: u32 = 0x2; +/// Segment is executable. +pub const VM_PROT_EXECUTE: u32 = 0x4; pub mod cputype { @@ -198,11 +205,13 @@ pub mod cputype { pub type CpuSubType = u32; /// the mask for CPU feature flags - pub const CPU_SUBTYPE_MASK: u32 = 0xff000000; + pub const CPU_SUBTYPE_MASK: u32 = 0xff00_0000; /// mask for architecture bits - pub const CPU_ARCH_MASK: CpuType = 0xff000000; + pub const CPU_ARCH_MASK: CpuType = 0xff00_0000; /// the mask for 64 bit ABI - pub const CPU_ARCH_ABI64: CpuType = 0x01000000; + pub const CPU_ARCH_ABI64: CpuType = 0x0100_0000; + /// the mask for ILP32 ABI on 64 bit hardware + pub const CPU_ARCH_ABI64_32: CpuType = 0x0200_0000; // CPU Types pub const CPU_TYPE_ANY: CpuType = !0; @@ -216,6 +225,7 @@ pub mod cputype { pub const CPU_TYPE_HPPA: CpuType = 11; pub const CPU_TYPE_ARM: CpuType = 12; pub const CPU_TYPE_ARM64: CpuType = (CPU_TYPE_ARM | CPU_ARCH_ABI64); + pub const CPU_TYPE_ARM64_32: CpuType = (CPU_TYPE_ARM | CPU_ARCH_ABI64_32); pub const CPU_TYPE_MC88000: CpuType = 13; pub const CPU_TYPE_SPARC: CpuType = 14; pub const CPU_TYPE_I860: CpuType = 15; @@ -325,6 +335,9 @@ pub mod cputype { pub const CPU_SUBTYPE_ARM_V8: CpuSubType = 13; pub const CPU_SUBTYPE_ARM64_ALL: CpuSubType = 0; pub const CPU_SUBTYPE_ARM64_V8: CpuSubType = 1; + pub const CPU_SUBTYPE_ARM64_E: CpuSubType = 2; + pub const CPU_SUBTYPE_ARM64_32_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_ARM64_32_V8: CpuSubType = 1; macro_rules! cpu_flag_mapping { ( @@ -374,6 +387,7 @@ pub mod cputype { ("x86_64", CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL), ("x86_64h", CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_H), ("arm64", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL), + ("arm64_32", CPU_TYPE_ARM64_32, CPU_SUBTYPE_ARM64_32_ALL), ("ppc970-64", CPU_TYPE_POWERPC64, CPU_SUBTYPE_POWERPC_970), ("ppc", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_ALL), ("i386", CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL), @@ -415,6 +429,8 @@ pub mod cputype { ("armv7m", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7M), ("armv7em", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7EM), ("arm64v8", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_V8), + ("arm64e", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_E), + ("arm64_32_v8", CPU_TYPE_ARM64_32, CPU_SUBTYPE_ARM64_32_V8), } } diff --git a/third_party/rust/goblin/src/mach/exports.rs b/third_party/rust/goblin/src/mach/exports.rs index 131d4a0fe84c..56b0fd2995a3 100644 --- a/third_party/rust/goblin/src/mach/exports.rs +++ b/third_party/rust/goblin/src/mach/exports.rs @@ -4,15 +4,15 @@ // TODO: // (1) Weak of regular_symbol_info type probably needs to be added ? -// (3) /usr/lib/libstdc++.6.0.9.dylib has flag 0xc at many offsets... they're weak +// (3) /usr/lib/libstdc++.6.0.9.dylib has flag 0xc at many offsets... they're weak use core::ops::Range; -use scroll::{self, Pread, Uleb128}; -use error; +use scroll::{Pread, Uleb128}; +use crate::error; use core::fmt::{self, Debug}; -use mach::load_command; -use alloc::vec::Vec; -use alloc::string::String; +use crate::mach::load_command; +use crate::alloc::vec::Vec; +use crate::alloc::string::String; type Flag = u64; @@ -43,8 +43,8 @@ impl SymbolKind { _ => SymbolKind::UnknownSymbolKind(kind), } } - pub fn to_str(kind: SymbolKind) -> &'static str { - match kind { + pub fn to_str(&self) -> &'static str { + match self { SymbolKind::Regular => "Regular", SymbolKind::Absolute => "Absolute", SymbolKind::ThreadLocal => "Thread_LOCAL", @@ -84,7 +84,7 @@ impl<'a> ExportInfo<'a> { let address = bytes.pread::(offset)?; Ok(Regular { address: address.into(), - flags: flags + flags }) }; let reexport = |mut offset| -> error::Result> { @@ -97,9 +97,9 @@ impl<'a> ExportInfo<'a> { let lib = libs[lib_ordinal as usize]; let lib_symbol_name = if lib_symbol_name == "" { None } else { Some (lib_symbol_name)}; Ok(Reexport { - lib: lib, - lib_symbol_name: lib_symbol_name, - flags: flags + lib, + lib_symbol_name, + flags }) }; match SymbolKind::new(flags) { @@ -111,9 +111,9 @@ impl<'a> ExportInfo<'a> { offset += stub_offset.size(); let resolver_offset = bytes.pread::(offset)?; Ok(Stub { - stub_offset: stub_offset, - resolver_offset: resolver_offset, - flags: flags + stub_offset, + resolver_offset, + flags }) // else if (flags = kEXPORT_SYMBOL_FLAGS_WEAK_DEFINITION) then (*0x40 unused*) } else { @@ -156,7 +156,7 @@ impl<'a> Export<'a> { ExportInfo::Regular { address, .. } => address, _ => 0x0, }; - Export { name: name, info: info, size: 0, offset: offset } + Export { name, info, size: 0, offset } } } @@ -199,23 +199,23 @@ impl<'a> ExportTrie<'a> { fn walk_trie(&self, libs: &[&'a str], current_symbol: String, start: usize, exports: &mut Vec>) -> error::Result<()> { if start < self.location.end { - let offset = &mut start.clone(); - let terminal_size = Uleb128::read(&self.data, offset)?; + let mut offset = start; + let terminal_size = Uleb128::read(&self.data, &mut offset)?; // let mut input = String::new(); // ::std::io::stdin().read_line(&mut input).unwrap(); // println!("@ {:#x} node: {:#x} current_symbol: {}", start, terminal_size, current_symbol); if terminal_size == 0 { - let nbranches = Uleb128::read(&self.data, offset)? as usize; + let nbranches = Uleb128::read(&self.data, &mut offset)? as usize; //println!("\t@ {:#x} BRAN {}", *offset, nbranches); - let branches = self.walk_branches(nbranches, current_symbol, *offset)?; + let branches = self.walk_branches(nbranches, current_symbol, offset)?; self.walk_nodes(libs, branches, exports) } else { // terminal node, but the tricky part is that they can have children... - let pos = *offset; + let pos = offset; let children_start = &mut (pos + terminal_size as usize); let nchildren = Uleb128::read(&self.data, children_start)? as usize; - let flags = Uleb128::read(&self.data, offset)?; - //println!("\t@ {:#x} TERM {} flags: {:#x}", *offset, nchildren, flags); - let info = ExportInfo::parse(&self.data, libs, flags, *offset)?; + let flags = Uleb128::read(&self.data, &mut offset)?; + //println!("\t@ {:#x} TERM {} flags: {:#x}", offset, nchildren, flags); + let info = ExportInfo::parse(&self.data, libs, flags, offset)?; let export = Export::new(current_symbol.clone(), info); //println!("\t{:?}", &export); exports.push(export); @@ -233,7 +233,7 @@ impl<'a> ExportTrie<'a> { /// Walk the export trie for symbols exported by this binary, using the provided `libs` to resolve re-exports pub fn exports(&self, libs: &[&'a str]) -> error::Result>> { - let offset = self.location.start.clone(); + let offset = self.location.start; let current_symbol = String::new(); let mut exports = Vec::new(); self.walk_trie(libs, current_symbol, offset, &mut exports)?; @@ -245,7 +245,7 @@ impl<'a> ExportTrie<'a> { let start = command.export_off as usize; let end = (command.export_size + command.export_off) as usize; ExportTrie { - data: bytes.as_ref(), + data: bytes, location: start..end, } } @@ -253,9 +253,10 @@ impl<'a> ExportTrie<'a> { impl<'a> Debug for ExportTrie<'a> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - writeln!(fmt, "ExportTrie {{")?; - writeln!(fmt, " Location: {:#x}..{:#x}", self.location.start, self.location.end)?; - writeln!(fmt, "}}") + fmt.debug_struct("ExportTrie") + .field("data", &"<... redacted ...>") + .field("location", &format_args!("{:#x}..{:#x}", self.location.start, self.location.end)) + .finish() } } diff --git a/third_party/rust/goblin/src/mach/fat.rs b/third_party/rust/goblin/src/mach/fat.rs index 3178477e6c7d..28d795936eb4 100644 --- a/third_party/rust/goblin/src/mach/fat.rs +++ b/third_party/rust/goblin/src/mach/fat.rs @@ -7,12 +7,12 @@ if_std! { use std::io::{self, Read}; } -use scroll::{self, Pread}; -use mach::constants::cputype::{CpuType, CpuSubType, CPU_SUBTYPE_MASK, CPU_ARCH_ABI64}; -use error; +use scroll::{Pread, Pwrite, SizeWith}; +use crate::mach::constants::cputype::{CpuType, CpuSubType, CPU_SUBTYPE_MASK, CPU_ARCH_ABI64}; +use crate::error; -pub const FAT_MAGIC: u32 = 0xcafebabe; -pub const FAT_CIGAM: u32 = 0xbebafeca; +pub const FAT_MAGIC: u32 = 0xcafe_babe; +pub const FAT_CIGAM: u32 = 0xbeba_feca; #[repr(C)] #[derive(Clone, Copy, Default, Pread, Pwrite, SizeWith)] @@ -28,19 +28,22 @@ pub const SIZEOF_FAT_HEADER: usize = 8; impl fmt::Debug for FatHeader { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "0x{:x} nfat_arch: {}\n", self.magic, self.nfat_arch) + f.debug_struct("FatHeader") + .field("magic", &format_args!("0x{:x}", self.magic)) + .field("nfat_arch", &self.nfat_arch) + .finish() } } impl FatHeader { /// Reinterpret a `FatHeader` from `bytes` - pub fn from_bytes(bytes: &[u8; SIZEOF_FAT_HEADER]) -> FatHeader { + pub fn from_bytes(bytes: [u8; SIZEOF_FAT_HEADER]) -> FatHeader { let mut offset = 0; let magic = bytes.gread_with(&mut offset, scroll::BE).unwrap(); let nfat_arch = bytes.gread_with(&mut offset, scroll::BE).unwrap(); FatHeader { - magic: magic, - nfat_arch: nfat_arch, + magic, + nfat_arch, } } @@ -48,8 +51,8 @@ impl FatHeader { #[cfg(feature = "std")] pub fn from_fd(fd: &mut File) -> io::Result { let mut header = [0; SIZEOF_FAT_HEADER]; - try!(fd.read(&mut header)); - Ok(FatHeader::from_bytes(&header)) + fd.read_exact(&mut header)?; + Ok(FatHeader::from_bytes(header)) } /// Parse a mach-o fat header from the `bytes` diff --git a/third_party/rust/goblin/src/mach/header.rs b/third_party/rust/goblin/src/mach/header.rs index b4167701031b..de7e03f3fee5 100644 --- a/third_party/rust/goblin/src/mach/header.rs +++ b/third_party/rust/goblin/src/mach/header.rs @@ -1,13 +1,14 @@ //! A header contains minimal architecture information, the binary kind, the number of load commands, as well as an endianness hint use core::fmt; -use scroll::{ctx, Pwrite, Pread}; +use scroll::ctx; +use scroll::{Pread, Pwrite, SizeWith}; use scroll::ctx::SizeWith; -use plain::{self, Plain}; +use plain::Plain; -use mach::constants::cputype::{CpuType, CpuSubType, CPU_SUBTYPE_MASK}; -use error; -use container::{self, Container}; +use crate::mach::constants::cputype::{CpuType, CpuSubType, CPU_SUBTYPE_MASK}; +use crate::error; +use crate::container::{self, Container}; // Constants for the flags field of the mach_header /// the object file has no undefined references @@ -57,23 +58,23 @@ pub const MH_ROOT_SAFE: u32 = 0x40000; pub const MH_SETUID_SAFE: u32 = 0x80000; /// When this bit is set on a dylib, the static linker does not need to examine dependent dylibs to /// see if any are re-exported -pub const MH_NO_REEXPORTED_DYLIBS: u32 = 0x100000; +pub const MH_NO_REEXPORTED_DYLIBS: u32 = 0x0010_0000; /// When this bit is set, the OS will load the main executable at a random address. /// Only used in MH_EXECUTE filetypes. -pub const MH_PIE: u32 = 0x200000; +pub const MH_PIE: u32 = 0x0020_0000; /// Only for use on dylibs. When linking against a dylib that has this bit set, the static linker /// will automatically not create a LC_LOAD_DYLIB load command to the dylib if no symbols are being /// referenced from the dylib. -pub const MH_DEAD_STRIPPABLE_DYLIB: u32 = 0x400000; +pub const MH_DEAD_STRIPPABLE_DYLIB: u32 = 0x0040_0000; /// Contains a section of type S_THREAD_LOCAL_VARIABLES -pub const MH_HAS_TLV_DESCRIPTORS: u32 = 0x800000; +pub const MH_HAS_TLV_DESCRIPTORS: u32 = 0x0080_0000; /// When this bit is set, the OS will run the main executable with a non-executable heap even on /// platforms (e.g. i386) that don't require it. Only used in MH_EXECUTE filetypes. -pub const MH_NO_HEAP_EXECUTION: u32 = 0x1000000; +pub const MH_NO_HEAP_EXECUTION: u32 = 0x0100_0000; // TODO: verify this number is correct, it was previously 0x02000000 which could indicate a typo/data entry error /// The code was linked for use in an application extension. -pub const MH_APP_EXTENSION_SAFE: u32 = 0x2000000; +pub const MH_APP_EXTENSION_SAFE: u32 = 0x0200_0000; #[inline(always)] pub fn flag_to_str(flag: u32) -> &'static str { @@ -109,11 +110,11 @@ pub fn flag_to_str(flag: u32) -> &'static str { } /// Mach Header magic constant -pub const MH_MAGIC: u32 = 0xfeedface; -pub const MH_CIGAM: u32 = 0xcefaedfe; +pub const MH_MAGIC: u32 = 0xfeed_face; +pub const MH_CIGAM: u32 = 0xcefa_edfe; /// Mach Header magic constant for 64-bit -pub const MH_MAGIC_64: u32 = 0xfeedfacf; -pub const MH_CIGAM_64: u32 = 0xcffaedfe; +pub const MH_MAGIC_64: u32 = 0xfeed_facf; +pub const MH_CIGAM_64: u32 = 0xcffa_edfe; // Constants for the filetype field of the mach_header /// relocatable object file @@ -247,16 +248,16 @@ pub struct Header { impl fmt::Debug for Header { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, - "0x{:x} {} 0x{:x} {} {} {} 0x{:x} 0x{:x}", - self.magic, - self.cputype(), - self.cpusubtype(), - filetype_to_str(self.filetype), - self.ncmds, - self.sizeofcmds, - self.flags, - self.reserved) + f.debug_struct("Header") + .field("magic", &format_args!("0x{:x}", self.magic)) + .field("cputype", &self.cputype()) + .field("cpusubtype", &format_args!("0x{:x}", self.cpusubtype())) + .field("filetype", &filetype_to_str(self.filetype)) + .field("ncmds", &self.ncmds) + .field("sizeofcmds", &self.sizeofcmds) + .field("flags", &format_args!("0x{:x}", self.flags)) + .field("reserved", &format_args!("0x{:x}", self.reserved)) + .finish() } } @@ -320,7 +321,7 @@ impl From
for Header64 { } impl Header { - pub fn new(ctx: &container::Ctx) -> Self { + pub fn new(ctx: container::Ctx) -> Self { let mut header = Header::default(); header.magic = if ctx.is_big () { MH_MAGIC_64 } else { MH_MAGIC }; header @@ -357,10 +358,10 @@ impl ctx::SizeWith for Header { type Units = usize; fn size_with(container: &Container) -> usize { match container { - &Container::Little => { + Container::Little => { SIZEOF_HEADER_32 }, - &Container::Big => { + Container::Big => { SIZEOF_HEADER_64 }, } @@ -368,12 +369,12 @@ impl ctx::SizeWith for Header { } impl<'a> ctx::TryFromCtx<'a, container::Ctx> for Header { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_from_ctx(bytes: &'a [u8], container::Ctx { le, container }: container::Ctx) -> error::Result<(Self, Self::Size)> { let size = bytes.len(); if size < SIZEOF_HEADER_32 || size < SIZEOF_HEADER_64 { - let error = error::Error::Malformed(format!("bytes size is smaller than a Mach-o header")); + let error = error::Error::Malformed("bytes size is smaller than a Mach-o header".into()); Err(error) } else { match container { @@ -391,7 +392,7 @@ impl<'a> ctx::TryFromCtx<'a, container::Ctx> for Header { } impl ctx::TryIntoCtx for Header { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) -> error::Result { match ctx.container { @@ -419,10 +420,10 @@ mod tests { #[test] fn test_parse_armv7_header() { - use mach::constants::cputype::CPU_TYPE_ARM; + use crate::mach::constants::cputype::CPU_TYPE_ARM; const CPU_SUBTYPE_ARM_V7: u32 = 9; use super::Header; - use container::{Ctx, Container, Endian}; + use crate::container::{Ctx, Container, Endian}; use scroll::{Pread}; let bytes = b"\xce\xfa\xed\xfe\x0c\x00\x00\x00\t\x00\x00\x00\n\x00\x00\x00\x06\x00\x00\x00\x8c\r\x00\x00\x00\x00\x00\x00\x1b\x00\x00\x00\x18\x00\x00\x00\xe0\xf7B\xbb\x1c\xf50w\xa6\xf7u\xa3\xba("; let header: Header = bytes.pread_with(0, Ctx::new(Container::Little, Endian::Little)).unwrap(); diff --git a/third_party/rust/goblin/src/mach/imports.rs b/third_party/rust/goblin/src/mach/imports.rs index e0cb8006e072..65bbd7584270 100644 --- a/third_party/rust/goblin/src/mach/imports.rs +++ b/third_party/rust/goblin/src/mach/imports.rs @@ -2,18 +2,18 @@ // table of tuples: // -// symbol flags are undocumented +// symbol flags are undocumented use core::ops::Range; use core::fmt::{self, Debug}; use scroll::{Sleb128, Uleb128, Pread}; -use alloc::vec::Vec; +use crate::alloc::vec::Vec; -use container; -use error; -use mach::load_command; -use mach::bind_opcodes; -use mach::segment; +use crate::container; +use crate::error; +use crate::mach::load_command; +use crate::mach::bind_opcodes; +use crate::mach::segment; #[derive(Debug)] /// Import binding information generated by running the Finite State Automaton programmed via `bind_opcodes` @@ -97,9 +97,9 @@ impl<'a> Import<'a> { name: bi.symbol_name, dylib: libs[bi.symbol_library_ordinal as usize], is_lazy: bi.is_lazy, - offset: offset, - size: size, - address: address, + offset, + size, + address, addend: bi.addend, is_weak: bi.is_weak(), start_of_sequence_offset: start_of_sequence_offset as u64 @@ -118,10 +118,11 @@ pub struct BindInterpreter<'a> { impl<'a> Debug for BindInterpreter<'a> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - writeln!(fmt, "BindInterpreter {{")?; - writeln!(fmt, " Location: {:#x}..{:#x}", self.location.start, self.location.end)?; - writeln!(fmt, " Lazy Location: {:#x}..{:#x}", self.lazy_location.start, self.lazy_location.end)?; - writeln!(fmt, "}}") + fmt.debug_struct("BindInterpreter") + .field("data", &"<... redacted ...>") + .field("location", &format_args!("{:#x}..{:#x}", self.location.start, self.location.end)) + .field("lazy_location", &format_args!("{:#x}..{:#x}", self.lazy_location.start, self.lazy_location.end)) + .finish() } } @@ -135,45 +136,45 @@ impl<'a> BindInterpreter<'a> { let location = get_pos(command.bind_off, command.bind_size); let lazy_location = get_pos(command.lazy_bind_off, command.lazy_bind_size); BindInterpreter { - data: bytes.as_ref(), - location: location, - lazy_location: lazy_location, + data: bytes, + location, + lazy_location, } } /// Return the imports in this binary - pub fn imports(&self, libs: &[&'a str], segments: &[segment::Segment], ctx: &container::Ctx) -> error::Result>>{ + pub fn imports(&self, libs: &[&'a str], segments: &[segment::Segment], ctx: container::Ctx) -> error::Result>>{ let mut imports = Vec::new(); self.run(false, libs, segments, ctx, &mut imports)?; self.run( true, libs, segments, ctx, &mut imports)?; Ok(imports) } - fn run(&self, is_lazy: bool, libs: &[&'a str], segments: &[segment::Segment], ctx: &container::Ctx, imports: &mut Vec>) -> error::Result<()>{ - use mach::bind_opcodes::*; + fn run(&self, is_lazy: bool, libs: &[&'a str], segments: &[segment::Segment], ctx: container::Ctx, imports: &mut Vec>) -> error::Result<()>{ + use crate::mach::bind_opcodes::*; let location = if is_lazy { &self.lazy_location } else { &self.location }; let mut bind_info = BindInformation::new(is_lazy); - let offset = &mut location.start.clone(); + let mut offset = location.start; let mut start_of_sequence: usize = 0; - while *offset < location.end { - let opcode = self.data.gread::(offset)? as bind_opcodes::Opcode; + while offset < location.end { + let opcode = self.data.gread::(&mut offset)? as bind_opcodes::Opcode; // let mut input = String::new(); // ::std::io::stdin().read_line(&mut input).unwrap(); - // println!("opcode: {} ({:#x}) offset: {:#x}\n {:?}", opcode_to_str(opcode & BIND_OPCODE_MASK), opcode, *offset - location.start - 1, &bind_info); + // println!("opcode: {} ({:#x}) offset: {:#x}\n {:?}", opcode_to_str(opcode & BIND_OPCODE_MASK), opcode, offset - location.start - 1, &bind_info); match opcode & BIND_OPCODE_MASK { // we do nothing, don't update our records, and add a new, fresh record BIND_OPCODE_DONE => { bind_info = BindInformation::new(is_lazy); - start_of_sequence = *offset - location.start; + start_of_sequence = offset - location.start; }, BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => { let symbol_library_ordinal = opcode & BIND_IMMEDIATE_MASK; bind_info.symbol_library_ordinal = symbol_library_ordinal; }, BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { - let symbol_library_ordinal = Uleb128::read(&self.data, offset)?; + let symbol_library_ordinal = Uleb128::read(&self.data, &mut offset)?; bind_info.symbol_library_ordinal = symbol_library_ordinal as u8; }, BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => { @@ -184,8 +185,8 @@ impl<'a> BindInterpreter<'a> { }, BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { let symbol_flags = opcode & BIND_IMMEDIATE_MASK; - let symbol_name = self.data.pread::<&str>(*offset)?; - *offset = *offset + symbol_name.len() + 1; // second time this \0 caused debug woes + let symbol_name = self.data.pread::<&str>(offset)?; + offset += symbol_name.len() + 1; // second time this \0 caused debug woes bind_info.symbol_name = symbol_name; bind_info.symbol_flags = symbol_flags; }, @@ -194,26 +195,26 @@ impl<'a> BindInterpreter<'a> { bind_info.bind_type = bind_type; }, BIND_OPCODE_SET_ADDEND_SLEB => { - let addend = Sleb128::read(&self.data, offset)?; + let addend = Sleb128::read(&self.data, &mut offset)?; bind_info.addend = addend; }, BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { let seg_index = opcode & BIND_IMMEDIATE_MASK; // dyld sets the address to the segActualLoadAddress(segIndex) + uleb128 // address = segActualLoadAddress(segmentIndex) + read_uleb128(p, end); - let seg_offset = Uleb128::read(&self.data, offset)?; + let seg_offset = Uleb128::read(&self.data, &mut offset)?; bind_info.seg_index = seg_index; bind_info.seg_offset = seg_offset; }, BIND_OPCODE_ADD_ADDR_ULEB => { - let addr = Uleb128::read(&self.data, offset)?; + let addr = Uleb128::read(&self.data, &mut offset)?; let seg_offset = bind_info.seg_offset.wrapping_add(addr); bind_info.seg_offset = seg_offset; }, // record the record by placing its value into our list BIND_OPCODE_DO_BIND => { // from dyld: - // if ( address >= segmentEndAddress ) + // if ( address >= segmentEndAddress ) // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); // address += sizeof(intptr_t); @@ -223,19 +224,19 @@ impl<'a> BindInterpreter<'a> { }, BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => { // dyld: - // if ( address >= segmentEndAddress ) + // if ( address >= segmentEndAddress ) // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); // address += read_uleb128(p, end) + sizeof(intptr_t); // we bind the old record, then increment bind info address for the next guy, plus the ptr offset *) imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); - let addr = Uleb128::read(&self.data, offset)?; + let addr = Uleb128::read(&self.data, &mut offset)?; let seg_offset = bind_info.seg_offset.wrapping_add(addr).wrapping_add(ctx.size() as u64); bind_info.seg_offset = seg_offset; }, BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => { - // dyld: - // if ( address >= segmentEndAddress ) + // dyld: + // if ( address >= segmentEndAddress ) // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); // address += immediate*sizeof(intptr_t) + sizeof(intptr_t); @@ -244,7 +245,7 @@ impl<'a> BindInterpreter<'a> { imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); let scale = opcode & BIND_IMMEDIATE_MASK; let size = ctx.size() as u64; - let seg_offset = bind_info.seg_offset.wrapping_add(scale as u64 * size).wrapping_add(size); + let seg_offset = bind_info.seg_offset.wrapping_add(u64::from(scale) * size).wrapping_add(size); bind_info.seg_offset = seg_offset; }, BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => { @@ -252,14 +253,14 @@ impl<'a> BindInterpreter<'a> { // count = read_uleb128(p, end); // skip = read_uleb128(p, end); // for (uint32_t i=0; i < count; ++i) { - // if ( address >= segmentEndAddress ) + // if ( address >= segmentEndAddress ) // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); // address += skip + sizeof(intptr_t); // } // break; - let count = Uleb128::read(&self.data, offset)?; - let skip = Uleb128::read(&self.data, offset)?; + let count = Uleb128::read(&self.data, &mut offset)?; + let skip = Uleb128::read(&self.data, &mut offset)?; let skip_plus_size = skip + ctx.size() as u64; for _i in 0..count { imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); @@ -270,7 +271,7 @@ impl<'a> BindInterpreter<'a> { _ => { } } - } + } Ok(()) } } diff --git a/third_party/rust/goblin/src/mach/load_command.rs b/third_party/rust/goblin/src/mach/load_command.rs index d92e372f0611..3868b031c75b 100644 --- a/third_party/rust/goblin/src/mach/load_command.rs +++ b/third_party/rust/goblin/src/mach/load_command.rs @@ -1,8 +1,9 @@ //! Load commands tell the kernel and dynamic linker anything from how to load this binary into memory, what the entry point is, apple specific information, to which libraries it requires for dynamic linking -use error; +use crate::error; use core::fmt::{self, Display}; -use scroll::{self, ctx, Endian, Pread}; +use scroll::{ctx, Endian}; +use scroll::{Pread, Pwrite, IOread, IOwrite, SizeWith}; /////////////////////////////////////// // Load Commands from mach-o/loader.h @@ -141,7 +142,7 @@ impl SegmentCommand64 { /// target pathname (the name of the library as found for execution), and the /// minor version number. The address of where the headers are loaded is in /// header_addr. (THIS IS OBSOLETE and no longer supported). -#[repr(packed)] +#[repr(C)] #[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] pub struct Fvmlib { /// library's target pathname @@ -191,7 +192,7 @@ pub const SIZEOF_FVMLIB_COMMAND: usize = 20; /// An object that uses a dynamically linked shared library also contains a /// dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or /// LC_REEXPORT_DYLIB) for each library it uses. -#[repr(packed)] +#[repr(C)] #[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] pub struct Dylib { /// library's path name @@ -418,7 +419,7 @@ impl ThreadCommand { // uint32_t gs; // } let eip: u32 = self.thread_state[10]; - Ok(eip as u64) + Ok(u64::from(eip)) }, super::cputype::CPU_TYPE_X86_64 => { // struct x86_thread_state64_t { @@ -445,8 +446,8 @@ impl ThreadCommand { // uint64_t gs; // } let rip: u64 = - (self.thread_state[32] as u64) - | ((self.thread_state[33] as u64) << 32); + (u64::from(self.thread_state[32])) + | ((u64::from(self.thread_state[33])) << 32); Ok(rip) } super::cputype::CPU_TYPE_ARM => { @@ -458,9 +459,9 @@ impl ThreadCommand { // uint32_t cpsr; // } let pc: u32 = self.thread_state[15]; - Ok(pc as u64) + Ok(u64::from(pc)) } - super::cputype::CPU_TYPE_ARM64 => { + super::cputype::CPU_TYPE_ARM64 | super::cputype::CPU_TYPE_ARM64_32 => { // struct arm_thread_state64_t { // uint64_t x[29]; // uint64_t fp; @@ -471,14 +472,14 @@ impl ThreadCommand { // uint32_t pad; // } let pc: u64 = - (self.thread_state[64] as u64) - | ((self.thread_state[65] as u64) << 32); + (u64::from(self.thread_state[64])) + | ((u64::from(self.thread_state[65])) << 32); Ok(pc) } // https://github.com/m4b/goblin/issues/64 // Probably a G4 super::cputype::CPU_TYPE_POWERPC => { - Ok(self.thread_state[0] as u64) + Ok(u64::from(self.thread_state[0])) }, // I think the G5 was the last motorola powerpc processor used by apple before switching to intel cpus. // unfortunately I don't have any binaries on hand to see what its thread state looks like :/ @@ -495,10 +496,9 @@ impl ThreadCommand { } impl<'a> ctx::TryFromCtx<'a, Endian> for ThreadCommand { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_from_ctx(bytes: &'a [u8], le: Endian) -> error::Result<(Self, Self::Size)> { - use scroll::{Pread}; let lc = bytes.pread_with::(0, le)?; // read the thread state flavor and length of the thread state @@ -519,16 +519,16 @@ impl<'a> ctx::TryFromCtx<'a, Endian> for ThreadCommand { // read the thread state let mut thread_state: [u32; 70] = [ 0; 70 ]; - for i in 0..count as usize { - thread_state[i] = thread_state_bytes.pread_with(i*4, le)?; + for (i, state) in thread_state.iter_mut().enumerate().take(count as usize) { + *state = thread_state_bytes.pread_with(i*4, le)?; } Ok((ThreadCommand{ cmd: lc.cmd, cmdsize: lc.cmdsize, - flavor: flavor, - count: count, - thread_state: thread_state, + flavor, + count, + thread_state, }, lc.cmdsize as _)) } } @@ -607,8 +607,8 @@ pub struct SymtabCommand { pub strsize: u32, } -impl SymtabCommand { - pub fn new() -> Self { +impl Default for SymtabCommand { + fn default() -> Self { SymtabCommand { cmd: LC_SYMTAB, cmdsize: SIZEOF_SYMTAB_COMMAND as u32, @@ -620,6 +620,12 @@ impl SymtabCommand { } } +impl SymtabCommand { + pub fn new() -> Self { + Default::default() + } +} + pub const SIZEOF_SYMTAB_COMMAND: usize = 24; /// This is the second set of the symbolic information which is used to support @@ -703,8 +709,8 @@ pub struct DysymtabCommand { pub nlocrel: u32, } -impl DysymtabCommand { - pub fn new() -> Self { +impl Default for DysymtabCommand { + fn default() -> Self { DysymtabCommand { cmd: LC_DYSYMTAB, cmdsize: SIZEOF_DYSYMTAB_COMMAND as u32, @@ -730,6 +736,12 @@ impl DysymtabCommand { } } +impl DysymtabCommand { + pub fn new() -> Self { + Default::default() + } +} + pub const SIZEOF_DYSYMTAB_COMMAND: usize = 80; // TODO: unimplemented @@ -1150,7 +1162,7 @@ pub struct DataInCodeEntry { // Constants, et. al /////////////////////////////////////// -pub const LC_REQ_DYLD: u32 = 0x80000000; +pub const LC_REQ_DYLD: u32 = 0x8000_0000; pub const LC_LOAD_WEAK_DYLIB: u32 = 0x18 | LC_REQ_DYLD; pub const LC_RPATH: u32 = 0x1c | LC_REQ_DYLD; pub const LC_REEXPORT_DYLIB: u32 = 0x1f | LC_REQ_DYLD; @@ -1257,6 +1269,7 @@ pub fn cmd_to_str(cmd: u32) -> &'static str { /////////////////////////////////////////// #[derive(Debug)] +#[allow(clippy::large_enum_variant)] /// The various load commands as a cast-free variant/enum pub enum CommandVariant { Segment32 (SegmentCommand32), @@ -1310,10 +1323,9 @@ pub enum CommandVariant { } impl<'a> ctx::TryFromCtx<'a, Endian> for CommandVariant { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_from_ctx(bytes: &'a [u8], le: Endian) -> error::Result<(Self, Self::Size)> { - use scroll::{Pread}; use self::CommandVariant::*; let lc = bytes.pread_with::(0, le)?; let size = lc.cmdsize as usize; @@ -1367,7 +1379,7 @@ impl<'a> ctx::TryFromCtx<'a, Endian> for CommandVariant { LC_DYLIB_CODE_SIGN_DRS => { let comm = bytes.pread_with:: (0, le)?; Ok((DylibCodeSignDrs (comm), size))}, LC_LINKER_OPTION => { let comm = bytes.pread_with:: (0, le)?; Ok((LinkerOption (comm), size))}, LC_LINKER_OPTIMIZATION_HINT => {let comm = bytes.pread_with:: (0, le)?; Ok((LinkerOptimizationHint (comm), size))}, - _ => Ok((Unimplemented (lc.clone()), size)), + _ => Ok((Unimplemented (lc), size)), } } } diff --git a/third_party/rust/goblin/src/mach/mod.rs b/third_party/rust/goblin/src/mach/mod.rs index 4bc1a9385c18..26a84afc5400 100644 --- a/third_party/rust/goblin/src/mach/mod.rs +++ b/third_party/rust/goblin/src/mach/mod.rs @@ -1,12 +1,14 @@ //! The Mach-o, mostly zero-copy, binary format parser and raw struct definitions use core::fmt; -use alloc::vec::Vec; +use crate::alloc::vec::Vec; -use scroll::{self, Pread, BE}; +use log::debug; + +use scroll::{Pread, BE}; use scroll::ctx::SizeWith; -use error; -use container; +use crate::error; +use crate::container; pub mod header; pub mod constants; @@ -28,8 +30,8 @@ pub fn peek(bytes: &[u8], offset: usize) -> error::Result { /// Parses a magic number, and an accompanying mach-o binary parsing context, according to the magic number. pub fn parse_magic_and_ctx(bytes: &[u8], offset: usize) -> error::Result<(u32, Option)> { - use mach::header::*; - use container::Container; + use crate::mach::header::*; + use crate::container::Container; let magic = bytes.pread_with::(offset, BE)?; let ctx = match magic { MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => { @@ -97,7 +99,7 @@ impl<'a> MachO<'a> { } /// Return an iterator over all the symbols in this binary pub fn symbols(&self) -> symbols::SymbolIterator<'a> { - if let &Some(ref symbols) = &self.symbols { + if let Some(ref symbols) = self.symbols { symbols.into_iter() } else { symbols::SymbolIterator::default() @@ -128,7 +130,7 @@ impl<'a> MachO<'a> { /// Return the imported symbols in this binary that dyld knows about (if any) pub fn imports(&self) -> error::Result> { if let Some(ref interpreter) = self.bind_interpreter { - interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), &self.ctx) + interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx) } else { Ok(vec![]) } @@ -136,14 +138,14 @@ impl<'a> MachO<'a> { /// Parses the Mach-o binary from `bytes` at `offset` pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result> { let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?; - let ctx = if let Some(ctx) = maybe_ctx { ctx } else { return Err(error::Error::BadMagic(magic as u64)) }; + let ctx = if let Some(ctx) = maybe_ctx { ctx } else { return Err(error::Error::BadMagic(u64::from(magic))) }; debug!("Ctx: {:?}", ctx); let offset = &mut offset; let header: header::Header = bytes.pread_with(*offset, ctx)?; debug!("Mach-o header: {:?}", header); let little_endian = ctx.le.is_little(); let is_64 = ctx.container.is_big(); - *offset = *offset + header::Header::size_with(&ctx.container); + *offset += header::Header::size_with(&ctx.container); let ncmds = header.ncmds; let mut cmds: Vec = Vec::with_capacity(ncmds); let mut symbols = None; @@ -223,19 +225,19 @@ impl<'a> MachO<'a> { }; Ok(MachO { - header: header, + header, load_commands: cmds, - segments: segments, - symbols: symbols, - libs: libs, - export_trie: export_trie, - bind_interpreter: bind_interpreter, - entry: entry, - old_style_entry: old_style_entry, - name: name, - ctx: ctx, - is_64: is_64, - little_endian: little_endian, + segments, + symbols, + libs, + export_trie, + bind_interpreter, + entry, + old_style_entry, + name, + ctx, + is_64, + little_endian, data: bytes, }) } @@ -263,7 +265,7 @@ impl<'a> Iterator for FatArchIterator<'a> { None } else { let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start; - let arch = self.data.pread_with::(offset, scroll::BE).map_err(|e| e.into()); + let arch = self.data.pread_with::(offset, scroll::BE).map_err(core::convert::Into::into); self.index += 1; Some(arch) } @@ -342,7 +344,7 @@ impl<'a> MultiArch<'a> { /// Try to get the Mach-o binary at `index` pub fn get(&self, index: usize) -> error::Result> { if index >= self.narches { - return Err(error::Error::Malformed(format!("Requested the {}-th binary, but there are only {} architectures in this container", index, self.narches).into())) + return Err(error::Error::Malformed(format!("Requested the {}-th binary, but there are only {} architectures in this container", index, self.narches))) } let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start; let arch = self.data.pread_with::(offset, scroll::BE)?; @@ -378,6 +380,7 @@ impl<'a> fmt::Debug for MultiArch<'a> { } #[derive(Debug)] +#[allow(clippy::large_enum_variant)] /// Either a collection of multiple architectures, or a single mach-o binary pub enum Mach<'a> { /// A "fat" multi-architecture binary container @@ -391,8 +394,7 @@ impl<'a> Mach<'a> { pub fn parse(bytes: &'a [u8]) -> error::Result { let size = bytes.len(); if size < 4 { - let error = error::Error::Malformed( - format!("size is smaller than a magical number")); + let error = error::Error::Malformed("size is smaller than a magical number".into()); return Err(error); } let magic = peek(&bytes, 0)?; diff --git a/third_party/rust/goblin/src/mach/relocation.rs b/third_party/rust/goblin/src/mach/relocation.rs index 4a55041e9676..940190d524e3 100644 --- a/third_party/rust/goblin/src/mach/relocation.rs +++ b/third_party/rust/goblin/src/mach/relocation.rs @@ -23,7 +23,8 @@ // by the link-editor. The value R_ABS is used for relocation entries for // absolute symbols which need no further relocation. use core::fmt; -use mach; +use crate::mach; +use scroll::{Pread, Pwrite, IOwrite, SizeWith, IOread}; // TODO: armv7 relocations are scattered, must and r_address with 0x8000_0000 to check if its scattered or not #[derive(Copy, Clone, Pread, Pwrite, IOwrite, SizeWith, IOread)] @@ -41,41 +42,41 @@ pub const SIZEOF_RELOCATION_INFO: usize = 8; impl RelocationInfo { /// Symbol index if `r_extern` == 1 or section ordinal if `r_extern` == 0. In bits :24 #[inline] - pub fn r_symbolnum(&self) -> usize { + pub fn r_symbolnum(self) -> usize { (self.r_info & 0x00ff_ffffu32) as usize } /// Was relocated pc relative already, 1 bit #[inline] - pub fn r_pcrel(&self) -> u8 { + pub fn r_pcrel(self) -> u8 { ((self.r_info & 0x0100_0000u32) >> 24) as u8 } /// The length of the relocation, 0=byte, 1=word, 2=long, 3=quad, 2 bits #[inline] - pub fn r_length(&self) -> u8 { + pub fn r_length(self) -> u8 { ((self.r_info & 0x0600_0000u32) >> 25) as u8 } /// Does not include value of sym referenced, 1 bit #[inline] - pub fn r_extern(&self) -> u8 { + pub fn r_extern(self) -> u8 { ((self.r_info & 0x0800_0000) >> 27) as u8 } /// Ff not 0, machine specific relocation type, in bits :4 #[inline] - pub fn r_type(&self) -> u8 { + pub fn r_type(self) -> u8 { ((self.r_info & 0xf000_0000) >> 28) as u8 } /// If true, this relocation is for a symbol; if false, or a section ordinal otherwise #[inline] - pub fn is_extern(&self) -> bool { + pub fn is_extern(self) -> bool { self.r_extern() == 1 } /// If true, this is a PIC relocation #[inline] - pub fn is_pic(&self) -> bool { + pub fn is_pic(self) -> bool { self.r_pcrel() > 0 } /// Returns a string representation of this relocation, given the machine `cputype` - pub fn to_str(&self, cputype: mach::cputype::CpuType) -> &'static str { + pub fn to_str(self, cputype: mach::cputype::CpuType) -> &'static str { reloc_to_str(self.r_type(), cputype) } } @@ -164,9 +165,9 @@ pub const ARM64_RELOC_TLVP_LOAD_PAGEOFF12: RelocType = 9; pub const ARM64_RELOC_ADDEND: RelocType = 10; pub fn reloc_to_str(reloc: RelocType, cputype: mach::cputype::CpuType) -> &'static str { - use mach::constants::cputype::*; + use crate::mach::constants::cputype::*; match cputype { - CPU_TYPE_ARM64 => { + CPU_TYPE_ARM64 | CPU_TYPE_ARM64_32 => { match reloc { ARM64_RELOC_UNSIGNED => "ARM64_RELOC_UNSIGNED", ARM64_RELOC_SUBTRACTOR => "ARM64_RELOC_SUBTRACTOR", diff --git a/third_party/rust/goblin/src/mach/segment.rs b/third_party/rust/goblin/src/mach/segment.rs index 4ced79b3c2c5..9ee7a2bfc11a 100644 --- a/third_party/rust/goblin/src/mach/segment.rs +++ b/third_party/rust/goblin/src/mach/segment.rs @@ -1,16 +1,18 @@ -use scroll::{self, Pread, Pwrite}; +use scroll::{Pread, Pwrite}; use scroll::ctx::{self, SizeWith}; +use log::{debug, warn}; + use core::fmt; use core::ops::{Deref, DerefMut}; -use alloc::boxed::Box; -use alloc::vec::Vec; +use crate::alloc::boxed::Box; +use crate::alloc::vec::Vec; -use container; -use error; +use crate::container; +use crate::error; -use mach::relocation::RelocationInfo; -use mach::load_command::{Section32, Section64, SegmentCommand32, SegmentCommand64, SIZEOF_SECTION_32, SIZEOF_SECTION_64, SIZEOF_SEGMENT_COMMAND_32, SIZEOF_SEGMENT_COMMAND_64, LC_SEGMENT, LC_SEGMENT_64}; +use crate::mach::relocation::RelocationInfo; +use crate::mach::load_command::{Section32, Section64, SegmentCommand32, SegmentCommand64, SIZEOF_SECTION_32, SIZEOF_SECTION_64, SIZEOF_SEGMENT_COMMAND_32, SIZEOF_SEGMENT_COMMAND_64, LC_SEGMENT, LC_SEGMENT_64}; pub struct RelocationIterator<'a> { data: &'a [u8], @@ -72,10 +74,10 @@ impl Section { let offset = self.reloff as usize; debug!("Relocations for {} starting at offset: {:#x}", self.name().unwrap_or("BAD_SECTION_NAME"), offset); RelocationIterator { - offset: offset, + offset, nrelocs: self.nreloc as usize, count: 0, - data: data, + data, ctx: ctx.le, } } @@ -139,8 +141,8 @@ impl From for Section { Section { sectname: section.sectname, segname: section.segname, - addr: section.addr as u64, - size: section.size as u64, + addr: u64::from(section.addr), + size: u64::from(section.size), offset: section.offset, align: section.align, reloff: section.reloff, @@ -167,7 +169,7 @@ impl From for Section { } impl<'a> ctx::TryFromCtx<'a, container::Ctx> for Section { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_from_ctx(bytes: &'a [u8], ctx: container::Ctx) -> Result<(Self, Self::Size), Self::Error> { match ctx.container { @@ -194,7 +196,7 @@ impl ctx::SizeWith for Section { } impl ctx::TryIntoCtx for Section { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) -> Result { if ctx.is_big () { @@ -255,7 +257,7 @@ impl<'a> Iterator for SectionIterator<'a> { }); Some(Ok((section, data))) }, - Err(e) => Some(Err(e.into())) + Err(e) => Some(Err(e)) } } } @@ -363,7 +365,7 @@ impl<'a> ctx::SizeWith for Segment<'a> { } impl<'a> ctx::TryIntoCtx for Segment<'a> { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) -> Result { let segment_size = Self::size_with(&ctx); @@ -386,6 +388,16 @@ impl<'a> ctx::IntoCtx for Segment<'a> { } } +/// Read data that belongs to a segment if the offset is within the boundaries of bytes. +fn segment_data(bytes: &[u8], fileoff :u64, filesize :u64) -> Result<&[u8], error::Error> { + let data :&[u8] = if filesize != 0 { + bytes.pread_with(fileoff as usize, filesize as usize)? + } else { + &[] + }; + Ok(data) +} + impl<'a> Segment<'a> { /// Create a new, blank segment, with cmd either `LC_SEGMENT_64`, or `LC_SEGMENT`, depending on `ctx`. /// **NB** You are responsible for providing a correctly marshalled byte array as the sections. You should not use this for anything other than writing. @@ -405,7 +417,7 @@ impl<'a> Segment<'a> { data: sections, offset: 0, raw_data: &[], - ctx: ctx, + ctx, } } /// Get the name of this segment @@ -422,28 +434,26 @@ impl<'a> Segment<'a> { } /// Convert the raw C 32-bit segment command to a generalized version pub fn from_32(bytes: &'a[u8], segment: &SegmentCommand32, offset: usize, ctx: container::Ctx) -> Result { - let data = bytes.pread_with(segment.fileoff as usize, segment.filesize as usize)?; Ok(Segment { cmd: segment.cmd, cmdsize: segment.cmdsize, segname: segment.segname, - vmaddr: segment.vmaddr as u64, - vmsize: segment.vmsize as u64, - fileoff: segment.fileoff as u64, - filesize: segment.filesize as u64, + vmaddr: u64::from(segment.vmaddr), + vmsize: u64::from(segment.vmsize), + fileoff: u64::from(segment.fileoff), + filesize: u64::from(segment.filesize), maxprot: segment.maxprot, initprot: segment.initprot, nsects: segment.nsects, flags: segment.flags, - data: data, - offset: offset, + data: segment_data(bytes, segment.fileoff as u64, segment.filesize as u64)?, + offset, raw_data: bytes, - ctx: ctx, + ctx, }) } /// Convert the raw C 64-bit segment command to a generalized version pub fn from_64(bytes: &'a [u8], segment: &SegmentCommand64, offset: usize, ctx: container::Ctx) -> Result { - let data = bytes.pread_with(segment.fileoff as usize, segment.filesize as usize)?; Ok(Segment { cmd: segment.cmd, cmdsize: segment.cmdsize, @@ -456,10 +466,10 @@ impl<'a> Segment<'a> { initprot: segment.initprot, nsects: segment.nsects, flags: segment.flags, - offset: offset, - data: data, + data: segment_data(bytes, segment.fileoff, segment.filesize)?, + offset, raw_data: bytes, - ctx: ctx, + ctx, }) } } @@ -497,7 +507,7 @@ impl<'a> Segments<'a> { pub fn new(ctx: container::Ctx) -> Self { Segments { segments: Vec::new(), - ctx: ctx, + ctx, } } /// Get every section from every segment diff --git a/third_party/rust/goblin/src/mach/symbols.rs b/third_party/rust/goblin/src/mach/symbols.rs index a96536120ed0..87e170864056 100644 --- a/third_party/rust/goblin/src/mach/symbols.rs +++ b/third_party/rust/goblin/src/mach/symbols.rs @@ -2,11 +2,12 @@ //! //! Symbols are essentially a type, offset, and the symbol name -use scroll::{ctx, Pread, Pwrite}; +use scroll::ctx; use scroll::ctx::SizeWith; -use error; -use container::{self, Container}; -use mach::load_command; +use scroll::{Pread, Pwrite, SizeWith, IOread, IOwrite}; +use crate::error; +use crate::container::{self, Container}; +use crate::mach::load_command; use core::fmt::{self, Debug}; // The n_type field really contains four fields which are used via the following masks. @@ -82,6 +83,48 @@ pub const NLIST_TYPE_MASK: u8 = 0xe; pub const NLIST_TYPE_GLOBAL: u8 = 0x1; pub const NLIST_TYPE_LOCAL: u8 = 0x0; +/// Mask for reference flags of `n_desc` field. +pub const REFERENCE_TYPE: u16 = 0xf; +/// This symbol is a reference to an external non-lazy (data) symbol. +pub const REFERENCE_FLAG_UNDEFINED_NON_LAZY: u16 = 0x0; +/// This symbol is a reference to an external lazy symbol—that is, to a function call. +pub const REFERENCE_FLAG_UNDEFINED_LAZY: u16 = 0x1; +/// This symbol is defined in this module. +pub const REFERENCE_FLAG_DEFINED: u16 = 0x2; +/// This symbol is defined in this module and is visible only to modules within this +/// shared library. +pub const REFERENCE_FLAG_PRIVATE_DEFINED: u16 = 0x3; +/// This symbol is defined in another module in this file, is a non-lazy (data) symbol, +/// and is visible only to modules within this shared library. +pub const REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY: u16 = 0x4; +/// This symbol is defined in another module in this file, is a lazy (function) symbol, +/// and is visible only to modules within this shared library. +pub const REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY: u16 = 0x5; + +// Additional flags of n_desc field. + +/// Must be set for any defined symbol that is referenced by dynamic-loader APIs +/// (such as dlsym and NSLookupSymbolInImage) and not ordinary undefined symbol +/// references. The `strip` tool uses this bit to avoid removing symbols that must +/// exist: If the symbol has this bit set, `strip` does not strip it. +pub const REFERENCED_DYNAMICALLY: u16 = 0x10; +/// Sometimes used by the dynamic linker at runtime in a fully linked image. Do not +/// set this bit in a fully linked image. +pub const N_DESC_DISCARDED: u16 = 0x20; +/// When set in a relocatable object file (file type MH_OBJECT) on a defined symbol, +/// indicates to the static linker to never dead-strip the symbol. +// (Note that the same bit (0x20) is used for two nonoverlapping purposes.) +pub const N_NO_DEAD_STRIP: u16 = 0x20; +/// Indicates that this undefined symbol is a weak reference. If the dynamic linker +/// cannot find a definition for this symbol, it sets the address of this symbol to 0. +/// The static linker sets this symbol given the appropriate weak-linking flags. +pub const N_WEAK_REF: u16 = 0x40; +/// Indicates that this symbol is a weak definition. If the static linker or the +/// dynamic linker finds another (non-weak) definition for this symbol, the weak +/// definition is ignored. Only symbols in a coalesced section can be marked as a +/// weak definition. +pub const N_WEAK_DEF: u16 = 0x80; + pub fn n_type_to_str(n_type: u8) -> &'static str { match n_type { N_UNDF => "N_UNDF", @@ -112,13 +155,13 @@ pub const SIZEOF_NLIST_32: usize = 12; impl Debug for Nlist32 { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - write!(fmt, "strx: {:04} type: {:#02x} sect: {:#x} desc: {:#03x} value: {:#x}", - self.n_strx, - self.n_type, - self.n_sect, - self.n_desc, - self.n_value, - ) + fmt.debug_struct("Nlist32") + .field("n_strx", &format_args!("{:04}", self.n_strx)) + .field("n_type", &format_args!("{:#02x}", self.n_type)) + .field("n_sect", &format_args!("{:#x}", self.n_sect)) + .field("n_desc", &format_args!("{:#03x}", self.n_desc)) + .field("n_value", &format_args!("{:#x}", self.n_value)) + .finish() } } @@ -141,13 +184,13 @@ pub const SIZEOF_NLIST_64: usize = 16; impl Debug for Nlist64 { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - write!(fmt, "strx: {:04} type: {:#02x} sect: {:#x} desc: {:#03x} value: {:#x}", - self.n_strx, - self.n_type, - self.n_sect, - self.n_desc, - self.n_value, - ) + fmt.debug_struct("Nlist64") + .field("n_strx", &format_args!("{:04}", self.n_strx)) + .field("n_type", &format_args!("{:#02x}", self.n_type)) + .field("n_sect", &format_args!("{:#x}", self.n_sect)) + .field("n_desc", &format_args!("{:#03x}", self.n_desc)) + .field("n_value", &format_args!("{:#x}", self.n_value)) + .finish() } } @@ -178,6 +221,10 @@ impl Nlist { pub fn is_global(&self) -> bool { self.n_type & N_EXT != 0 } + /// Whether this symbol is weak or not + pub fn is_weak(&self) -> bool { + self.n_desc & (N_WEAK_REF | N_WEAK_DEF) != 0 + } /// Whether this symbol is undefined or not pub fn is_undefined(&self) -> bool { self.n_sect == 0 && self.n_type & N_TYPE == N_UNDF @@ -191,7 +238,6 @@ impl Nlist { impl ctx::SizeWith for Nlist { type Units = usize; fn size_with(ctx: &container::Ctx) -> usize { - use container::Container; match ctx.container { Container::Little => { SIZEOF_NLIST_32 @@ -210,7 +256,7 @@ impl From for Nlist { n_type: nlist.n_type, n_sect: nlist.n_sect as usize, n_desc: nlist.n_desc, - n_value: nlist.n_value as u64, + n_value: u64::from(nlist.n_value), } } } @@ -252,9 +298,9 @@ impl From for Nlist64 { } impl<'a> ctx::TryFromCtx<'a, container::Ctx> for Nlist { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; - fn try_from_ctx(bytes: &'a [u8], container::Ctx { container, le }: container::Ctx) -> ::error::Result<(Self, Self::Size)> { + fn try_from_ctx(bytes: &'a [u8], container::Ctx { container, le }: container::Ctx) -> crate::error::Result<(Self, Self::Size)> { let nlist = match container { Container::Little => { (bytes.pread_with::(0, le)?.into(), SIZEOF_NLIST_32) @@ -268,7 +314,7 @@ impl<'a> ctx::TryFromCtx<'a, container::Ctx> for Nlist { } impl ctx::TryIntoCtx for Nlist { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_into_ctx(self, bytes: &mut [u8], container::Ctx { container, le }: container::Ctx) -> Result { @@ -298,18 +344,18 @@ pub struct SymbolsCtx { } impl<'a, T: ?Sized> ctx::TryFromCtx<'a, SymbolsCtx, T> for Symbols<'a> where T: AsRef<[u8]> { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_from_ctx(bytes: &'a T, SymbolsCtx { nsyms, strtab, ctx - }: SymbolsCtx) -> ::error::Result<(Self, Self::Size)> { + }: SymbolsCtx) -> crate::error::Result<(Self, Self::Size)> { let data = bytes.as_ref(); Ok ((Symbols { - data: data, + data, start: 0, - nsyms: nsyms, - strtab: strtab, - ctx: ctx, + nsyms, + strtab, + ctx, }, data.len())) } } @@ -337,10 +383,10 @@ impl<'a> Iterator for SymbolIterator<'a> { Ok(name) => { Some(Ok((name, symbol))) }, - Err(e) => return Some(Err(e.into())) + Err(e) => Some(Err(e.into())) } }, - Err(e) => return Some(Err(e.into())) + Err(e) => Some(Err(e)) } } } @@ -372,16 +418,16 @@ impl<'a> Symbols<'a> { let nsyms = count; Ok (Symbols { data: bytes, - start: start, - nsyms: nsyms, - strtab: strtab, + start, + nsyms, + strtab, ctx: container::Ctx::default(), }) } pub fn parse(bytes: &'a [u8], symtab: &load_command::SymtabCommand, ctx: container::Ctx) -> error::Result> { // we need to normalize the strtab offset before we receive the truncated bytes in pread_with let strtab = symtab.stroff - symtab.symoff; - Ok(bytes.pread_with(symtab.symoff as usize, SymbolsCtx { nsyms: symtab.nsyms as usize, strtab: strtab as usize, ctx: ctx })?) + Ok(bytes.pread_with(symtab.symoff as usize, SymbolsCtx { nsyms: symtab.nsyms as usize, strtab: strtab as usize, ctx })?) } pub fn iter(&self) -> SymbolIterator<'a> { @@ -396,7 +442,7 @@ impl<'a> Symbols<'a> { } /// Parses a single Nlist symbol from the binary, with its accompanying name - pub fn get(&self, index: usize) -> ::error::Result<(&'a str, Nlist)> { + pub fn get(&self, index: usize) -> crate::error::Result<(&'a str, Nlist)> { let sym: Nlist = self.data.pread_with(self.start + (index * Nlist::size_with(&self.ctx)), self.ctx)?; let name = self.data.pread(self.strtab + sym.n_strx)?; Ok((name, sym)) @@ -405,16 +451,22 @@ impl<'a> Symbols<'a> { impl<'a> Debug for Symbols<'a> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - writeln!(fmt, "Data: {} start: {:#?}, nsyms: {} strtab: {:#x}", self.data.len(), self.start, self.nsyms, self.strtab)?; - writeln!(fmt, "Symbols: {{")?; + fmt.debug_struct("Symbols") + .field("data", &self.data.len()) + .field("start", &format_args!("{:#?}", self.start)) + .field("nsyms", &self.nsyms) + .field("strtab", &format_args!("{:#x}", self.strtab)) + .finish()?; + + writeln!(fmt, "Symbol List {{")?; for (i, res) in self.iter().enumerate() { match res { - Ok((name, nlist)) => { - writeln!(fmt, "{: >10x} {} sect: {:#x} type: {:#02x} desc: {:#03x}", nlist.n_value, name, nlist.n_sect, nlist.n_type, nlist.n_desc)?; - }, - Err(error) => { - writeln!(fmt, " Bad symbol, index: {}, sym: {:?}", i, error)?; - } + Ok((name, nlist)) => writeln!( + fmt, + "{: >10x} {} sect: {:#x} type: {:#02x} desc: {:#03x}", + nlist.n_value, name, nlist.n_sect, nlist.n_type, nlist.n_desc + )?, + Err(error) => writeln!(fmt, " Bad symbol, index: {}, sym: {:?}", i, error)?, } } writeln!(fmt, "}}") diff --git a/third_party/rust/goblin/src/pe/data_directories.rs b/third_party/rust/goblin/src/pe/data_directories.rs index 4665ff671deb..d5522ff5a1f5 100644 --- a/third_party/rust/goblin/src/pe/data_directories.rs +++ b/third_party/rust/goblin/src/pe/data_directories.rs @@ -1,5 +1,5 @@ -use error; -use scroll::{self, Pread}; +use crate::error; +use scroll::{Pread, Pwrite, SizeWith}; #[repr(C)] #[derive(Debug, PartialEq, Copy, Clone, Default)] @@ -28,12 +28,12 @@ impl DataDirectories { pub fn parse(bytes: &[u8], count: usize, offset: &mut usize) -> error::Result { let mut data_directories = [None; NUM_DATA_DIRECTORIES]; if count > NUM_DATA_DIRECTORIES { return Err (error::Error::Malformed(format!("data directory count ({}) is greater than maximum number of data directories ({})", count, NUM_DATA_DIRECTORIES))) } - for i in 0..count { + for dir in data_directories.iter_mut().take(count) { let dd = DataDirectory::parse(bytes, offset)?; let dd = if dd.virtual_address == 0 && dd.size == 0 { None } else { Some (dd) }; - data_directories[i] = dd; + *dir = dd; } - Ok (DataDirectories { data_directories: data_directories }) + Ok (DataDirectories { data_directories }) } pub fn get_export_table(&self) -> &Option { let idx = 0; diff --git a/third_party/rust/goblin/src/pe/debug.rs b/third_party/rust/goblin/src/pe/debug.rs index 3455f57e8ecf..6f01c6133b7f 100644 --- a/third_party/rust/goblin/src/pe/debug.rs +++ b/third_party/rust/goblin/src/pe/debug.rs @@ -1,9 +1,9 @@ -use scroll::{self, Pread}; -use error; +use scroll::{Pread, Pwrite, SizeWith}; +use crate::error; -use pe::section_table; -use pe::utils; -use pe::data_directories; +use crate::pe::section_table; +use crate::pe::utils; +use crate::pe::data_directories; #[derive(Debug, PartialEq, Copy, Clone, Default)] pub struct DebugData<'a> { @@ -12,16 +12,16 @@ pub struct DebugData<'a> { } impl<'a> DebugData<'a> { - pub fn parse(bytes: &'a [u8], dd: &data_directories::DataDirectory, sections: &[section_table::SectionTable]) -> error::Result { - let image_debug_directory = ImageDebugDirectory::parse(bytes, dd, sections)?; + pub fn parse(bytes: &'a [u8], dd: data_directories::DataDirectory, sections: &[section_table::SectionTable], file_alignment: u32) -> error::Result { + let image_debug_directory = ImageDebugDirectory::parse(bytes, dd, sections, file_alignment)?; let codeview_pdb70_debug_info = CodeviewPDB70DebugInfo::parse(bytes, &image_debug_directory)?; Ok(DebugData{ - image_debug_directory: image_debug_directory, - codeview_pdb70_debug_info: codeview_pdb70_debug_info + image_debug_directory, + codeview_pdb70_debug_info }) } - + /// Return this executable's debugging GUID, suitable for matching against a PDB file. pub fn guid(&self) -> Option<[u8; 16]> { self.codeview_pdb70_debug_info @@ -54,18 +54,18 @@ pub const IMAGE_DEBUG_TYPE_FIXUP: u32 = 6; pub const IMAGE_DEBUG_TYPE_BORLAND: u32 = 9; impl ImageDebugDirectory { - fn parse(bytes: &[u8], dd: &data_directories::DataDirectory, sections: &[section_table::SectionTable]) -> error::Result { + fn parse(bytes: &[u8], dd: data_directories::DataDirectory, sections: &[section_table::SectionTable], file_alignment: u32) -> error::Result { let rva = dd.virtual_address as usize; - let offset = utils::find_offset(rva, sections).ok_or(error::Error::Malformed(format!("Cannot map ImageDebugDirectory rva {:#x} into offset", rva)))?;; + let offset = utils::find_offset(rva, sections, file_alignment).ok_or_else(|| error::Error::Malformed(format!("Cannot map ImageDebugDirectory rva {:#x} into offset", rva)))?;; let idd: Self = bytes.pread_with(offset, scroll::LE)?; Ok (idd) } } -pub const CODEVIEW_PDB70_MAGIC: u32 = 0x53445352; -pub const CODEVIEW_PDB20_MAGIC: u32 = 0x3031424e; -pub const CODEVIEW_CV50_MAGIC: u32 = 0x3131424e; -pub const CODEVIEW_CV41_MAGIC: u32 = 0x3930424e; +pub const CODEVIEW_PDB70_MAGIC: u32 = 0x5344_5352; +pub const CODEVIEW_PDB20_MAGIC: u32 = 0x3031_424e; +pub const CODEVIEW_CV50_MAGIC: u32 = 0x3131_424e; +pub const CODEVIEW_CV41_MAGIC: u32 = 0x3930_424e; // http://llvm.org/doxygen/CVDebugRecord_8h_source.html #[repr(C)] @@ -104,17 +104,15 @@ impl<'a> CodeviewPDB70DebugInfo<'a> { // read the rest let mut signature: [u8; 16] = [0; 16]; - for i in 0..16 { - signature[i] = bytes.gread_with(&mut offset, scroll::LE)?; - } + signature.copy_from_slice(bytes.gread_with(&mut offset, 16)?); let age: u32 = bytes.gread_with(&mut offset, scroll::LE)?; let filename = &bytes[offset..offset + filename_length]; Ok(Some(CodeviewPDB70DebugInfo{ - codeview_signature: codeview_signature, - signature: signature, - age: age, - filename: filename, + codeview_signature, + signature, + age, + filename, })) } } diff --git a/third_party/rust/goblin/src/pe/exception.rs b/third_party/rust/goblin/src/pe/exception.rs new file mode 100644 index 000000000000..d52608ef5d48 --- /dev/null +++ b/third_party/rust/goblin/src/pe/exception.rs @@ -0,0 +1,826 @@ +//! Exception handling and stack unwinding for x64. +//! +//! Exception information is exposed via the [`ExceptionData`] structure. If present in a PE file, +//! it contains a list of [`RuntimeFunction`] entries that can be used to get [`UnwindInfo`] for a +//! particular code location. +//! +//! Unwind information contains a list of unwind codes which specify the operations that are +//! necessary to restore registers (including the stack pointer RSP) when unwinding out of a +//! function. +//! +//! Depending on where the instruction pointer lies, there are three strategies to unwind: +//! +//! 1. If the RIP is within an epilog, then control is leaving the function, there can be no +//! exception handler associated with this exception for this function, and the effects of the +//! epilog must be continued to compute the context of the caller function. To determine if the +//! RIP is within an epilog, the code stream from RIP on is examined. If that code stream can be +//! matched to the trailing portion of a legitimate epilog, then it's in an epilog, and the +//! remaining portion of the epilog is simulated, with the context record updated as each +//! instruction is processed. After this, step 1 is repeated. +//! +//! 2. Case b) If the RIP lies within the prologue, then control has not entered the function, +//! there can be no exception handler associated with this exception for this function, and the +//! effects of the prolog must be undone to compute the context of the caller function. The RIP +//! is within the prolog if the distance from the function start to the RIP is less than or +//! equal to the prolog size encoded in the unwind info. The effects of the prolog are unwound +//! by scanning forward through the unwind codes array for the first entry with an offset less +//! than or equal to the offset of the RIP from the function start, then undoing the effect of +//! all remaining items in the unwind code array. Step 1 is then repeated. +//! +//! 3. If the RIP is not within a prolog or epilog and the function has an exception handler, then +//! the language-specific handler is called. The handler scans its data and calls filter +//! functions as appropriate. The language-specific handler can return that the exception was +//! handled or that the search is to be continued. It can also initiate an unwind directly. +//! +//! For more information, see [x64 exception handling]. +//! +//! [`ExceptionData`]: struct.ExceptionData.html +//! [`RuntimeFunction`]: struct.RuntimeFunction.html +//! [`UnwindInfo`]: struct.UnwindInfo.html +//! [x64 exception handling]: https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64?view=vs-2017 + +use core::cmp::Ordering; +use core::fmt; +use core::iter::FusedIterator; + +use scroll::ctx::TryFromCtx; +use scroll::{self, Pread, Pwrite}; + +use crate::error; + +use crate::pe::data_directories; +use crate::pe::section_table; +use crate::pe::utils; + +/// The function has an exception handler that should be called when looking for functions that need +/// to examine exceptions. +const UNW_FLAG_EHANDLER: u8 = 0x01; +/// The function has a termination handler that should be called when unwinding an exception. +const UNW_FLAG_UHANDLER: u8 = 0x02; +/// This unwind info structure is not the primary one for the procedure. Instead, the chained unwind +/// info entry is the contents of a previous `RUNTIME_FUNCTION` entry. If this flag is set, then the +/// `UNW_FLAG_EHANDLER` and `UNW_FLAG_UHANDLER` flags must be cleared. Also, the frame register and +/// fixed-stack allocation fields must have the same values as in the primary unwind info. +const UNW_FLAG_CHAININFO: u8 = 0x04; + +/// info == register number +const UWOP_PUSH_NONVOL: u8 = 0; +/// no info, alloc size in next 2 slots +const UWOP_ALLOC_LARGE: u8 = 1; +/// info == size of allocation / 8 - 1 +const UWOP_ALLOC_SMALL: u8 = 2; +/// no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 +const UWOP_SET_FPREG: u8 = 3; +/// info == register number, offset in next slot +const UWOP_SAVE_NONVOL: u8 = 4; +/// info == register number, offset in next 2 slots +const UWOP_SAVE_NONVOL_FAR: u8 = 5; +/// changes the structure of unwind codes to `struct Epilogue`. +/// (was UWOP_SAVE_XMM in version 1, but deprecated and removed) +const UWOP_EPILOG: u8 = 6; +/// reserved +/// (was UWOP_SAVE_XMM_FAR in version 1, but deprecated and removed) +const UWOP_SPARE_CODE: u8 = 7; +/// info == XMM reg number, offset in next slot +const UWOP_SAVE_XMM128: u8 = 8; +/// info == XMM reg number, offset in next 2 slots +const UWOP_SAVE_XMM128_FAR: u8 = 9; +/// info == 0: no error-code, 1: error-code +const UWOP_PUSH_MACHFRAME: u8 = 10; + +/// Size of `RuntimeFunction` entries. +const RUNTIME_FUNCTION_SIZE: usize = 12; +/// Size of unwind code slots. Codes take 1 - 3 slots. +const UNWIND_CODE_SIZE: usize = 2; + +/// An unwind entry for a range of a function. +/// +/// Unwind information for this function can be loaded with [`ExceptionData::get_unwind_info`]. +/// +/// [`ExceptionData::get_unwind_info`]: struct.ExceptionData.html#method.get_unwind_info +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Default, Pread, Pwrite)] +pub struct RuntimeFunction { + /// Function start address. + pub begin_address: u32, + /// Function end address. + pub end_address: u32, + /// Unwind info address. + pub unwind_info_address: u32, +} + +impl fmt::Debug for RuntimeFunction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("RuntimeFunction") + .field("begin_address", &format_args!("{:#x}", self.begin_address)) + .field("end_address", &format_args!("{:#x}", self.end_address)) + .field( + "unwind_info_address", + &format_args!("{:#x}", self.unwind_info_address), + ) + .finish() + } +} + +/// Iterator over runtime function entries in [`ExceptionData`](struct.ExceptionData.html). +#[derive(Debug)] +pub struct RuntimeFunctionIterator<'a> { + data: &'a [u8], +} + +impl Iterator for RuntimeFunctionIterator<'_> { + type Item = error::Result; + + fn next(&mut self) -> Option { + if self.data.is_empty() { + return None; + } + + Some(match self.data.pread_with(0, scroll::LE) { + Ok(func) => { + self.data = &self.data[RUNTIME_FUNCTION_SIZE..]; + Ok(func) + } + Err(error) => { + self.data = &[]; + Err(error.into()) + } + }) + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.data.len() / RUNTIME_FUNCTION_SIZE; + (len, Some(len)) + } +} + +impl FusedIterator for RuntimeFunctionIterator<'_> {} +impl ExactSizeIterator for RuntimeFunctionIterator<'_> {} + +/// An x64 register used during unwinding. +/// +/// - `0` - `15`: General purpose registers +/// - `17` - `32`: XMM registers +#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)] +pub struct Register(pub u8); + +impl Register { + fn xmm(number: u8) -> Self { + Register(number + 17) + } + + /// Returns the x64 register name. + pub fn name(self) -> &'static str { + match self.0 { + 0 => "$rax", + 1 => "$rcx", + 2 => "$rdx", + 3 => "$rbx", + 4 => "$rsp", + 5 => "$rbp", + 6 => "$rsi", + 7 => "$rdi", + 8 => "$r8", + 9 => "$r9", + 10 => "$r10", + 11 => "$r11", + 12 => "$r12", + 13 => "$r13", + 14 => "$r14", + 15 => "$r15", + 16 => "$rip", + 17 => "$xmm0", + 18 => "$xmm1", + 19 => "$xmm2", + 20 => "$xmm3", + 21 => "$xmm4", + 22 => "$xmm5", + 23 => "$xmm6", + 24 => "$xmm7", + 25 => "$xmm8", + 26 => "$xmm9", + 27 => "$xmm10", + 28 => "$xmm11", + 29 => "$xmm12", + 30 => "$xmm13", + 31 => "$xmm14", + 32 => "$xmm15", + _ => "", + } + } +} + +/// An unsigned offset to a value in the local stack frame. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum StackFrameOffset { + /// Offset from the current RSP, that is, the lowest address of the fixed stack allocation. + /// + /// To restore this register, read the value at the given offset from the RSP. + RSP(u32), + + /// Offset from the value of the frame pointer register. + /// + /// To restore this register, read the value at the given offset from the FP register, reduced + /// by the `frame_register_offset` value specified in the `UnwindInfo` structure. By definition, + /// the frame pointer register is any register other than RAX (`0`). + FP(u32), +} + +impl StackFrameOffset { + fn with_ctx(offset: u32, ctx: UnwindOpContext) -> Self { + match ctx.frame_register { + Register(0) => StackFrameOffset::RSP(offset), + Register(_) => StackFrameOffset::FP(offset), + } + } +} + +impl fmt::Display for Register { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(self.name()) + } +} + +/// An unwind operation corresponding to code in the function prolog. +/// +/// Unwind operations can be used to reverse the effects of the function prolog and restore register +/// values of parent stack frames that have been saved to the stack. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum UnwindOperation { + /// Push a nonvolatile integer register, decrementing `RSP` by 8. + PushNonVolatile(Register), + + /// Allocate a fixed-size area on the stack. + Alloc(u32), + + /// Establish the frame pointer register by setting the register to some offset of the current + /// RSP. The use of an offset permits establishing a frame pointer that points to the middle of + /// the fixed stack allocation, helping code density by allowing more accesses to use short + /// instruction forms. + SetFPRegister, + + /// Save a nonvolatile integer register on the stack using a MOV instead of a PUSH. This code is + /// primarily used for shrink-wrapping, where a nonvolatile register is saved to the stack in a + /// position that was previously allocated. + SaveNonVolatile(Register, StackFrameOffset), + + /// Save the lower 64 bits of a nonvolatile XMM register on the stack. + SaveXMM(Register, StackFrameOffset), + + /// Describes the function epilog. + /// + /// This operation has been introduced with unwind info version 2 and is not implemented yet. + Epilog, + + /// Save all 128 bits of a nonvolatile XMM register on the stack. + SaveXMM128(Register, StackFrameOffset), + + /// Push a machine frame. This is used to record the effect of a hardware interrupt or + /// exception. Depending on the error flag, this frame has two different layouts. + /// + /// This unwind code always appears in a dummy prolog, which is never actually executed but + /// instead appears before the real entry point of an interrupt routine, and exists only to + /// provide a place to simulate the push of a machine frame. This operation records that + /// simulation, which indicates the machine has conceptually done this: + /// + /// 1. Pop RIP return address from top of stack into `temp` + /// 2. `$ss`, Push old `$rsp`, `$rflags`, `$cs`, `temp` + /// 3. If error flag is `true`, push the error code + /// + /// Without an error code, RSP was incremented by `40` and the following was frame pushed: + /// + /// Offset | Value + /// ---------|-------- + /// RSP + 32 | `$ss` + /// RSP + 24 | old `$rsp` + /// RSP + 16 | `$rflags` + /// RSP + 8 | `$cs` + /// RSP + 0 | `$rip` + /// + /// With an error code, RSP was incremented by `48` and the following was frame pushed: + /// + /// Offset | Value + /// ---------|-------- + /// RSP + 40 | `$ss` + /// RSP + 32 | old `$rsp` + /// RSP + 24 | `$rflags` + /// RSP + 16 | `$cs` + /// RSP + 8 | `$rip` + /// RSP + 0 | error code + PushMachineFrame(bool), + + /// A reserved operation without effect. + Noop, +} + +/// Context used to parse unwind operation. +#[derive(Clone, Copy, Debug, PartialEq)] +struct UnwindOpContext { + /// Version of the unwind info. + version: u8, + + /// The nonvolatile register used as the frame pointer of this function. + /// + /// If this register is non-zero, all stack frame offsets used in unwind operations are of type + /// `StackFrameOffset::FP`. When loading these offsets, they have to be based off the value of + /// this frame register instead of the conventional RSP. This allows the RSP to be modified. + frame_register: Register, +} + +/// An unwind operation that is executed at a particular place in the function prolog. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct UnwindCode { + /// Offset of the corresponding instruction in the function prolog. + /// + /// To be precise, this is the offset from the beginning of the prolog of the end of the + /// instruction that performs this operation, plus 1 (that is, the offset of the start of the + /// next instruction). + /// + /// Unwind codes are ordered by this offset in reverse order, suitable for unwinding. + pub code_offset: u8, + + /// The operation that was performed by the code in the prolog. + pub operation: UnwindOperation, +} + +impl<'a> TryFromCtx<'a, UnwindOpContext> for UnwindCode { + type Error = error::Error; + type Size = usize; + + #[inline] + fn try_from_ctx( + bytes: &'a [u8], + ctx: UnwindOpContext, + ) -> Result<(Self, Self::Size), Self::Error> { + let mut read = 0; + let code_offset = bytes.gread_with::(&mut read, scroll::LE)?; + let operation = bytes.gread_with::(&mut read, scroll::LE)?; + + let operation_code = operation & 0xf; + let operation_info = operation >> 4; + + let operation = match operation_code { + self::UWOP_PUSH_NONVOL => { + let register = Register(operation_info); + UnwindOperation::PushNonVolatile(register) + } + self::UWOP_ALLOC_LARGE => { + let offset = match operation_info { + 0 => u32::from(bytes.gread_with::(&mut read, scroll::LE)?) * 8, + 1 => bytes.gread_with::(&mut read, scroll::LE)?, + i => { + let msg = format!("invalid op info ({}) for UWOP_ALLOC_LARGE", i); + return Err(error::Error::Malformed(msg)); + } + }; + UnwindOperation::Alloc(offset) + } + self::UWOP_ALLOC_SMALL => { + let offset = u32::from(operation_info) * 8 + 8; + UnwindOperation::Alloc(offset) + } + self::UWOP_SET_FPREG => UnwindOperation::SetFPRegister, + self::UWOP_SAVE_NONVOL => { + let register = Register(operation_info); + let offset = u32::from(bytes.gread_with::(&mut read, scroll::LE)?) * 8; + UnwindOperation::SaveNonVolatile(register, StackFrameOffset::with_ctx(offset, ctx)) + } + self::UWOP_SAVE_NONVOL_FAR => { + let register = Register(operation_info); + let offset = bytes.gread_with::(&mut read, scroll::LE)?; + UnwindOperation::SaveNonVolatile(register, StackFrameOffset::with_ctx(offset, ctx)) + } + self::UWOP_EPILOG => { + let data = u32::from(bytes.gread_with::(&mut read, scroll::LE)?) * 16; + if ctx.version == 1 { + let register = Register::xmm(operation_info); + UnwindOperation::SaveXMM(register, StackFrameOffset::with_ctx(data, ctx)) + } else { + // TODO: See https://weekly-geekly.github.io/articles/322956/index.html + UnwindOperation::Epilog + } + } + self::UWOP_SPARE_CODE => { + let data = bytes.gread_with::(&mut read, scroll::LE)?; + if ctx.version == 1 { + let register = Register::xmm(operation_info); + UnwindOperation::SaveXMM128(register, StackFrameOffset::with_ctx(data, ctx)) + } else { + UnwindOperation::Noop + } + } + self::UWOP_SAVE_XMM128 => { + let register = Register::xmm(operation_info); + let offset = u32::from(bytes.gread_with::(&mut read, scroll::LE)?) * 16; + UnwindOperation::SaveXMM128(register, StackFrameOffset::with_ctx(offset, ctx)) + } + self::UWOP_SAVE_XMM128_FAR => { + let register = Register::xmm(operation_info); + let offset = bytes.gread_with::(&mut read, scroll::LE)?; + UnwindOperation::SaveXMM128(register, StackFrameOffset::with_ctx(offset, ctx)) + } + self::UWOP_PUSH_MACHFRAME => { + let is_error = match operation_info { + 0 => false, + 1 => true, + i => { + let msg = format!("invalid op info ({}) for UWOP_PUSH_MACHFRAME", i); + return Err(error::Error::Malformed(msg)); + } + }; + UnwindOperation::PushMachineFrame(is_error) + } + op => { + let msg = format!("unknown unwind op code ({})", op); + return Err(error::Error::Malformed(msg)); + } + }; + + let code = UnwindCode { + code_offset, + operation, + }; + + Ok((code, read)) + } +} + +/// An iterator over unwind codes for a function or part of a function, returned from +/// [`UnwindInfo`]. +/// +/// [`UnwindInfo`]: struct.UnwindInfo.html +#[derive(Clone, Debug)] +pub struct UnwindCodeIterator<'a> { + bytes: &'a [u8], + offset: usize, + context: UnwindOpContext, +} + +impl Iterator for UnwindCodeIterator<'_> { + type Item = error::Result; + + fn next(&mut self) -> Option { + if self.offset >= self.bytes.len() { + return None; + } + + Some(self.bytes.gread_with(&mut self.offset, self.context)) + } + + fn size_hint(&self) -> (usize, Option) { + let upper = (self.bytes.len() - self.offset) / UNWIND_CODE_SIZE; + // the largest codes take up three slots + let lower = (upper + 3 - (upper % 3)) / 3; + (lower, Some(upper)) + } +} + +impl FusedIterator for UnwindCodeIterator<'_> {} + +/// A language-specific handler that is called as part of the search for an exception handler or as +/// part of an unwind. +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum UnwindHandler<'a> { + /// The image-relative address of an exception handler and its implementation-defined data. + ExceptionHandler(u32, &'a [u8]), + /// The image-relative address of a termination handler and its implementation-defined data. + TerminationHandler(u32, &'a [u8]), +} + +/// Unwind information for a function or portion of a function. +/// +/// The unwind info structure is used to record the effects a function has on the stack pointer and +/// where the nonvolatile registers are saved on the stack. The unwind codes can be enumerated with +/// [`unwind_codes`]. +/// +/// This unwind info might only be secondary information, and link to a [chained unwind handler]. +/// For unwinding, this link shall be followed until the root unwind info record has been resolved. +/// +/// [`unwind_codes`]: struct.UnwindInfo.html#method.unwind_codes +/// [chained unwind handler]: struct.UnwindInfo.html#structfield.chained_info +#[derive(Clone)] +pub struct UnwindInfo<'a> { + /// Version of this unwind info. + pub version: u8, + + /// Length of the function prolog in bytes. + pub size_of_prolog: u8, + + /// The nonvolatile register used as the frame pointer of this function. + /// + /// If this register is non-zero, all stack frame offsets used in unwind operations are of type + /// `StackFrameOffset::FP`. When loading these offsets, they have to be based off the value of + /// this frame register instead of the conventional RSP. This allows the RSP to be modified. + pub frame_register: Register, + + /// Offset from RSP that is applied to the FP register when it is established. + /// + /// When loading offsets of type `StackFrameOffset::FP` from the stack, this offset has to be + /// subtracted before loading the value since the actual RSP was lower by that amount in the + /// prolog. + pub frame_register_offset: u32, + + /// A record pointing to chained unwind information. + /// + /// If chained unwind info is present, then this unwind info is a secondary one and the linked + /// unwind info contains primary information. Chained info is useful in two situations. First, + /// it is used for noncontiguous code segments. Second, this mechanism is sometimes used to + /// group volatile register saves. + /// + /// The referenced unwind info can itself specify chained unwind information, until it arrives + /// at the root unwind info. Generally, the entire chain should be considered when unwinding. + pub chained_info: Option, + + /// An exception or termination handler called as part of the unwind. + pub handler: Option>, + + /// A list of unwind codes, sorted descending by code offset. + code_bytes: &'a [u8], +} + +impl<'a> UnwindInfo<'a> { + /// Parses unwind information from the image at the given offset. + pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result { + // Read the version and flags fields, which are combined into a single byte. + let version_flags: u8 = bytes.gread_with(&mut offset, scroll::LE)?; + let version = version_flags & 0b111; + let flags = version_flags >> 3; + + if version < 1 || version > 2 { + let msg = format!("unsupported unwind code version ({})", version); + return Err(error::Error::Malformed(msg)); + } + + let size_of_prolog = bytes.gread_with::(&mut offset, scroll::LE)?; + let count_of_codes = bytes.gread_with::(&mut offset, scroll::LE)?; + + // Parse the frame register and frame register offset values, that are combined into a + // single byte. + let frame_info = bytes.gread_with::(&mut offset, scroll::LE)?; + // If nonzero, then the function uses a frame pointer (FP), and this field is the number + // of the nonvolatile register used as the frame pointer. The zero register value does + // not need special casing since it will not be referenced by the unwind operations. + let frame_register = Register(frame_info & 0xf); + // The the scaled offset from RSP that is applied to the FP register when it's + // established. The actual FP register is set to RSP + 16 * this number, allowing + // offsets from 0 to 240. + let frame_register_offset = u32::from((frame_info >> 4) * 16); + + // An array of items that explains the effect of the prolog on the nonvolatile registers and + // RSP. Some unwind codes require more than one slot in the array. + let codes_size = count_of_codes as usize * UNWIND_CODE_SIZE; + let code_bytes = bytes.gread_with(&mut offset, codes_size)?; + + // For alignment purposes, the codes array always has an even number of entries, and the + // final entry is potentially unused. In that case, the array is one longer than indicated + // by the count of unwind codes field. + if count_of_codes % 2 != 0 { + offset += 2; + } + debug_assert!(offset % 4 == 0); + + let mut chained_info = None; + let mut handler = None; + + // If flag UNW_FLAG_CHAININFO is set then the UNWIND_INFO structure ends with three UWORDs. + // These UWORDs represent the RUNTIME_FUNCTION information for the function of the chained + // unwind. + if flags & UNW_FLAG_CHAININFO != 0 { + chained_info = Some(bytes.gread_with(&mut offset, scroll::LE)?); + + // The relative address of the language-specific handler is present in the UNWIND_INFO + // whenever flags UNW_FLAG_EHANDLER or UNW_FLAG_UHANDLER are set. The language-specific + // handler is called as part of the search for an exception handler or as part of an unwind. + } else if flags & (UNW_FLAG_EHANDLER | UNW_FLAG_UHANDLER) != 0 { + let offset = bytes.gread_with::(&mut offset, scroll::LE)? as usize; + let data = &bytes[offset..]; + + handler = Some(if flags & UNW_FLAG_EHANDLER != 0 { + UnwindHandler::ExceptionHandler(offset as u32, data) + } else { + UnwindHandler::TerminationHandler(offset as u32, data) + }); + } + + Ok(UnwindInfo { + version, + size_of_prolog, + frame_register, + frame_register_offset, + chained_info, + handler, + code_bytes, + }) + } + + /// Returns an iterator over unwind codes in this unwind info. + /// + /// Unwind codes are iterated in descending `code_offset` order suitable for unwinding. If the + /// optional [`chained_info`] is present, codes of that unwind info should be interpreted + /// immediately afterwards. + pub fn unwind_codes(&self) -> UnwindCodeIterator<'a> { + UnwindCodeIterator { + bytes: self.code_bytes, + offset: 0, + context: UnwindOpContext { + version: self.version, + frame_register: self.frame_register, + }, + } + } +} + +impl fmt::Debug for UnwindInfo<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let count_of_codes = self.code_bytes.len() / UNWIND_CODE_SIZE; + + f.debug_struct("UnwindInfo") + .field("version", &self.version) + .field("size_of_prolog", &self.size_of_prolog) + .field("frame_register", &self.frame_register) + .field("frame_register_offset", &self.frame_register_offset) + .field("count_of_codes", &count_of_codes) + .field("chained_info", &self.chained_info) + .field("handler", &self.handler) + .finish() + } +} + +impl<'a> IntoIterator for &'_ UnwindInfo<'a> { + type Item = error::Result; + type IntoIter = UnwindCodeIterator<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.unwind_codes() + } +} + +/// Exception handling and stack unwind information for functions in the image. +pub struct ExceptionData<'a> { + bytes: &'a [u8], + offset: usize, + size: usize, + file_alignment: u32, +} + +impl<'a> ExceptionData<'a> { + /// Parses exception data from the image at the given offset. + pub fn parse( + bytes: &'a [u8], + directory: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result { + let size = directory.size as usize; + + if size % RUNTIME_FUNCTION_SIZE != 0 { + Err(scroll::Error::BadInput { + size, + msg: "invalid exception directory table size", + })?; + } + + let rva = directory.virtual_address as usize; + let offset = utils::find_offset(rva, sections, file_alignment).ok_or_else(|| { + error::Error::Malformed(format!("cannot map exception_rva ({:#x}) into offset", rva)) + })?; + + if offset % 4 != 0 { + Err(scroll::Error::BadOffset(offset))?; + } + + Ok(ExceptionData { + bytes, + offset, + size, + file_alignment, + }) + } + + /// The number of function entries described by this exception data. + pub fn len(&self) -> usize { + self.size / RUNTIME_FUNCTION_SIZE + } + + /// Indicating whether there are functions in this entry. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Iterates all function entries in order of their code offset. + /// + /// To search for a function by relative instruction address, use [`find_function`]. To resolve + /// unwind information, use [`get_unwind_info`]. + /// + /// [`find_function`]: struct.ExceptionData.html#method.find_function + /// [`get_unwind_info`]: struct.ExceptionData.html#method.get_unwind_info + pub fn functions(&self) -> RuntimeFunctionIterator<'a> { + RuntimeFunctionIterator { + data: &self.bytes[self.offset..self.offset + self.size], + } + } + + /// Returns the function at the given index. + pub fn get_function(&self, index: usize) -> error::Result { + self.get_function_by_offset(index * RUNTIME_FUNCTION_SIZE) + } + + /// Performs a binary search to find a function entry covering the given RVA relative to the + /// image. + pub fn find_function(&self, rva: u32) -> error::Result> { + // NB: Binary search implementation copied from std::slice::binary_search_by and adapted. + // Theoretically, there should be nothing that causes parsing runtime functions to fail and + // all access to the bytes buffer is guaranteed to be in range. However, since all other + // functions also return Results, this is much more ergonomic here. + + let mut size = self.len(); + if size == 0 { + return Ok(None); + } + + let mut base = 0; + while size > 1 { + let half = size / 2; + let mid = base + half; + let offset = self.offset + mid * RUNTIME_FUNCTION_SIZE; + let addr = self.bytes.pread_with::(offset, scroll::LE)?; + base = if addr > rva { base } else { mid }; + size -= half; + } + + let offset = self.offset + base * RUNTIME_FUNCTION_SIZE; + let addr = self.bytes.pread_with::(offset, scroll::LE)?; + let function = match addr.cmp(&rva) { + Ordering::Less | Ordering::Equal => self.get_function(base)?, + Ordering::Greater if base == 0 => return Ok(None), + Ordering::Greater => self.get_function(base - 1)?, + }; + + if function.end_address > rva { + Ok(Some(function)) + } else { + Ok(None) + } + } + + /// Resolves unwind information for the given function entry. + pub fn get_unwind_info( + &self, + mut function: RuntimeFunction, + sections: &[section_table::SectionTable], + ) -> error::Result> { + while function.unwind_info_address % 2 != 0 { + let rva = (function.unwind_info_address & !1) as usize; + function = self.get_function_by_rva(rva, sections)?; + } + + let rva = function.unwind_info_address as usize; + let offset = utils::find_offset(rva, sections, self.file_alignment).ok_or_else(|| { + error::Error::Malformed(format!("cannot map unwind rva ({:#x}) into offset", rva)) + })?; + + UnwindInfo::parse(self.bytes, offset) + } + + fn get_function_by_rva( + &self, + rva: usize, + sections: &[section_table::SectionTable], + ) -> error::Result { + let offset = utils::find_offset(rva, sections, self.file_alignment).ok_or_else(|| { + error::Error::Malformed(format!("cannot map exception rva ({:#x}) into offset", rva)) + })?; + + self.get_function_by_offset(offset) + } + + #[inline] + fn get_function_by_offset(&self, offset: usize) -> error::Result { + debug_assert!(offset % RUNTIME_FUNCTION_SIZE == 0); + debug_assert!(offset < self.size); + + Ok(self.bytes.pread_with(self.offset + offset, scroll::LE)?) + } +} + +impl fmt::Debug for ExceptionData<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("ExceptionData") + .field("file_alignment", &self.file_alignment) + .field("offset", &format_args!("{:#x}", self.offset)) + .field("size", &format_args!("{:#x}", self.size)) + .field("len", &self.len()) + .finish() + } +} + +impl<'a> IntoIterator for &'_ ExceptionData<'a> { + type Item = error::Result; + type IntoIter = RuntimeFunctionIterator<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.functions() + } +} diff --git a/third_party/rust/goblin/src/pe/export.rs b/third_party/rust/goblin/src/pe/export.rs index d62dcf18c3f3..f1df5d7395bd 100644 --- a/third_party/rust/goblin/src/pe/export.rs +++ b/third_party/rust/goblin/src/pe/export.rs @@ -1,11 +1,13 @@ -use scroll::{self, Pread}; -use alloc::vec::Vec; +use scroll::{Pread, Pwrite}; +use crate::alloc::vec::Vec; -use error; +use log::debug; -use pe::utils; -use pe::section_table; -use pe::data_directories; +use crate::error; + +use crate::pe::utils; +use crate::pe::section_table; +use crate::pe::data_directories; #[repr(C)] #[derive(Debug, PartialEq, Copy, Clone, Default)] @@ -64,17 +66,17 @@ pub struct ExportData<'a> { } impl<'a> ExportData<'a> { - pub fn parse(bytes: &'a [u8], dd: &data_directories::DataDirectory, sections: &[section_table::SectionTable]) -> error::Result> { + pub fn parse(bytes: &'a [u8], dd: data_directories::DataDirectory, sections: &[section_table::SectionTable], file_alignment: u32) -> error::Result> { let export_rva = dd.virtual_address as usize; let size = dd.size as usize; debug!("export_rva {:#x} size {:#}", export_rva, size); - let export_offset = utils::find_offset_or(export_rva, sections, &format!("cannot map export_rva ({:#x}) into offset", export_rva))?; + let export_offset = utils::find_offset_or(export_rva, sections, file_alignment, &format!("cannot map export_rva ({:#x}) into offset", export_rva))?; let export_directory_table = ExportDirectoryTable::parse(bytes, export_offset) .map_err(|_| error::Error::Malformed(format!("cannot parse export_directory_table (offset {:#x})", export_offset)))?; let number_of_name_pointers = export_directory_table.number_of_name_pointers as usize; let address_table_entries = export_directory_table.address_table_entries as usize; - let export_name_pointer_table = utils::find_offset(export_directory_table.name_pointer_rva as usize, sections).map_or(vec![], |table_offset| { + let export_name_pointer_table = utils::find_offset(export_directory_table.name_pointer_rva as usize, sections, file_alignment).map_or(vec![], |table_offset| { let mut offset = table_offset; let mut table: ExportNamePointerTable = Vec::with_capacity(number_of_name_pointers); @@ -89,7 +91,7 @@ impl<'a> ExportData<'a> { table }); - let export_ordinal_table = utils::find_offset(export_directory_table.ordinal_table_rva as usize, sections).map_or(vec![], |table_offset| { + let export_ordinal_table = utils::find_offset(export_directory_table.ordinal_table_rva as usize, sections, file_alignment).map_or(vec![], |table_offset| { let mut offset = table_offset; let mut table: ExportOrdinalTable = Vec::with_capacity(number_of_name_pointers); @@ -104,7 +106,7 @@ impl<'a> ExportData<'a> { table }); - let export_address_table = utils::find_offset(export_directory_table.export_address_table_rva as usize, sections).map_or(vec![], |table_offset| { + let export_address_table = utils::find_offset(export_directory_table.export_address_table_rva as usize, sections, file_alignment).map_or(vec![], |table_offset| { let mut offset = table_offset; let mut table: ExportAddressTable = Vec::with_capacity(address_table_entries); let export_end = export_rva + size; @@ -124,14 +126,14 @@ impl<'a> ExportData<'a> { table }); - let name = utils::find_offset(export_directory_table.name_rva as usize, sections).and_then(|offset| bytes.pread(offset).ok()); + let name = utils::find_offset(export_directory_table.name_rva as usize, sections, file_alignment).and_then(|offset| bytes.pread(offset).ok()); Ok(ExportData { - name: name, - export_directory_table: export_directory_table, - export_name_pointer_table: export_name_pointer_table, - export_ordinal_table: export_ordinal_table, - export_address_table: export_address_table, + name, + export_directory_table, + export_name_pointer_table, + export_ordinal_table, + export_address_table, }) } } @@ -144,44 +146,35 @@ pub enum Reexport<'a> { } impl<'a> scroll::ctx::TryFromCtx<'a, scroll::Endian> for Reexport<'a> { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; #[inline] fn try_from_ctx(bytes: &'a [u8], _ctx: scroll::Endian) -> Result<(Self, Self::Size), Self::Error> { - use scroll::{Pread}; let reexport = bytes.pread::<&str>(0)?; let reexport_len = reexport.len(); debug!("reexport: {}", &reexport); for o in 0..reexport_len { let c: u8 = bytes.pread(o)?; debug!("reexport offset: {:#x} char: {:#x}", o, c); - match c { - // '.' - 0x2e => { - let i = o - 1; - let dll: &'a str = bytes.pread_with(0, ::scroll::ctx::StrCtx::Length(i))?; - debug!("dll: {:?}", &dll); - let len = reexport_len - i - 1; - let rest: &'a [u8] = bytes.pread_with(o, len)?; - debug!("rest: {:?}", &rest); - let len = rest.len() - 1; - match rest[0] { - // '#' - 0x23 => { - // UNTESTED - let ordinal = rest.pread_with::<&str>(1, ::scroll::ctx::StrCtx::Length(len))?; - let ordinal = ordinal.parse::().map_err(|_e| error::Error::Malformed(format!("Cannot parse reexport ordinal from {} bytes", bytes.len())))?; - // FIXME: return size - return Ok((Reexport::DLLOrdinal { export: dll, ordinal: ordinal as usize }, 0)) - }, - _ => { - let export = rest.pread_with::<&str>(1, ::scroll::ctx::StrCtx::Length(len))?; - // FIXME: return size - return Ok((Reexport::DLLName { export: export, lib: dll }, 0)) - } - } - }, - _ => {} + if c == b'.' { + let i = o - 1; + let dll: &'a str = bytes.pread_with(0, scroll::ctx::StrCtx::Length(i))?; + debug!("dll: {:?}", &dll); + let len = reexport_len - i - 1; + let rest: &'a [u8] = bytes.pread_with(o, len)?; + debug!("rest: {:?}", &rest); + let len = rest.len() - 1; + if rest[0] == b'#' { + // UNTESTED + let ordinal = rest.pread_with::<&str>(1, scroll::ctx::StrCtx::Length(len))?; + let ordinal = ordinal.parse::().map_err(|_e| error::Error::Malformed(format!("Cannot parse reexport ordinal from {} bytes", bytes.len())))?; + // FIXME: return size + return Ok((Reexport::DLLOrdinal { export: dll, ordinal: ordinal as usize }, 0)) + } else { + let export = rest.pread_with::<&str>(1, scroll::ctx::StrCtx::Length(len))?; + // FIXME: return size + return Ok((Reexport::DLLName { export, lib: dll }, 0)) + } } } Err(error::Error::Malformed(format!("Reexport {:#} is malformed", reexport))) @@ -189,7 +182,7 @@ impl<'a> scroll::ctx::TryFromCtx<'a, scroll::Endian> for Reexport<'a> { } impl<'a> Reexport<'a> { - pub fn parse(bytes: &'a [u8], offset: usize) -> ::error::Result> { + pub fn parse(bytes: &'a [u8], offset: usize) -> crate::error::Result> { bytes.pread(offset) } } @@ -209,6 +202,7 @@ struct ExportCtx<'a> { pub ptr: u32, pub idx: usize, pub sections: &'a [section_table::SectionTable], + pub file_alignment: u32, pub addresses: &'a ExportAddressTable, pub ordinals: &'a ExportOrdinalTable, } @@ -217,23 +211,23 @@ impl<'a, 'b> scroll::ctx::TryFromCtx<'a, ExportCtx<'b>> for Export<'a> { type Error = error::Error; type Size = usize; #[inline] - fn try_from_ctx(bytes: &'a [u8], ExportCtx { ptr, idx, sections, addresses, ordinals }: ExportCtx<'b>) -> Result<(Self, Self::Size), Self::Error> { + fn try_from_ctx(bytes: &'a [u8], ExportCtx { ptr, idx, sections, file_alignment, addresses, ordinals }: ExportCtx<'b>) -> Result<(Self, Self::Size), Self::Error> { use self::ExportAddressTableEntry::*; - let name = utils::find_offset(ptr as usize, sections).map_or(None, |offset| bytes.pread::<&str>(offset).ok()); + let name = utils::find_offset(ptr as usize, sections, file_alignment).and_then(|offset| bytes.pread::<&str>(offset).ok()); if let Some(ordinal) = ordinals.get(idx) { if let Some(rva) = addresses.get(*ordinal as usize) { match *rva { ExportRVA(rva) => { let rva = rva as usize; - let offset = utils::find_offset_or(rva, sections, &format!("cannot map RVA ({:#x}) of export ordinal {} into offset", rva, ordinal))?; + let offset = utils::find_offset_or(rva, sections, file_alignment, &format!("cannot map RVA ({:#x}) of export ordinal {} into offset", rva, ordinal))?; Ok((Export { name, offset, rva, reexport: None, size: 0 }, 0)) }, ForwarderRVA(rva) => { let rva = rva as usize; - let offset = utils::find_offset_or(rva, sections, &format!("cannot map RVA ({:#x}) of export ordinal {} into offset", rva, ordinal))?; + let offset = utils::find_offset_or(rva, sections, file_alignment, &format!("cannot map RVA ({:#x}) of export ordinal {} into offset", rva, ordinal))?; let reexport = Reexport::parse(bytes, offset)?; Ok((Export { name, offset, rva, reexport: Some(reexport), size: 0 }, 0)) } @@ -248,16 +242,16 @@ impl<'a, 'b> scroll::ctx::TryFromCtx<'a, ExportCtx<'b>> for Export<'a> { } impl<'a> Export<'a> { - pub fn parse(bytes: &'a [u8], export_data: &ExportData, sections: &[section_table::SectionTable]) -> error::Result>> { + pub fn parse(bytes: &'a [u8], export_data: &ExportData, sections: &[section_table::SectionTable], file_alignment: u32) -> error::Result>> { let pointers = &export_data.export_name_pointer_table; let addresses = &export_data.export_address_table; let ordinals = &export_data.export_ordinal_table; let mut exports = Vec::with_capacity(pointers.len()); for (idx, &ptr) in pointers.iter().enumerate() { - if let Ok(export) = bytes.pread_with(0, ExportCtx { ptr, idx, sections, addresses, ordinals }) { + if let Ok(export) = bytes.pread_with(0, ExportCtx { ptr, idx, sections, file_alignment, addresses, ordinals }) { exports.push(export); - } + } } // TODO: sort + compute size diff --git a/third_party/rust/goblin/src/pe/header.rs b/third_party/rust/goblin/src/pe/header.rs index 2ca13f7d80fa..6318af738707 100644 --- a/third_party/rust/goblin/src/pe/header.rs +++ b/third_party/rust/goblin/src/pe/header.rs @@ -1,7 +1,9 @@ -use error; - -use pe::optional_header; -use scroll::{self, Pread}; +use crate::alloc::vec::Vec; +use crate::error; +use crate::pe::{optional_header, section_table, symbol}; +use crate::strtab; +use log::debug; +use scroll::{Pread, Pwrite, IOread, IOwrite, SizeWith}; /// DOS header present in all PE binaries #[repr(C)] @@ -22,16 +24,14 @@ impl DosHeader { .map_err(|_| error::Error::Malformed(format!("cannot parse DOS signature (offset {:#x})", 0)))?; let pe_pointer = bytes.pread_with(PE_POINTER_OFFSET as usize, scroll::LE) .map_err(|_| error::Error::Malformed(format!("cannot parse PE header pointer (offset {:#x})", PE_POINTER_OFFSET)))?; - Ok (DosHeader { signature: signature, pe_pointer: pe_pointer }) + Ok (DosHeader { signature, pe_pointer }) } } /// COFF Header #[repr(C)] -#[derive(Debug, PartialEq, Copy, Clone, Default)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] pub struct CoffHeader { - /// COFF Magic: PE\0\0, little endian - pub signature: u32, /// The machine type pub machine: u16, pub number_of_sections: u16, @@ -42,38 +42,67 @@ pub struct CoffHeader { pub characteristics: u16, } -pub const SIZEOF_COFF_HEADER: usize = 24; +pub const SIZEOF_COFF_HEADER: usize = 20; /// PE\0\0, little endian -pub const COFF_MAGIC: u32 = 0x00004550; +pub const PE_MAGIC: u32 = 0x0000_4550; +pub const SIZEOF_PE_MAGIC: usize = 4; pub const COFF_MACHINE_X86: u16 = 0x14c; pub const COFF_MACHINE_X86_64: u16 = 0x8664; impl CoffHeader { pub fn parse(bytes: &[u8], offset: &mut usize) -> error::Result { - let mut coff = CoffHeader::default(); - coff.signature = bytes.gread_with(offset, scroll::LE) - .map_err(|_| error::Error::Malformed(format!("cannot parse COFF signature (offset {:#x})", offset)))?; - coff.machine = bytes.gread_with(offset, scroll::LE) - .map_err(|_| error::Error::Malformed(format!("cannot parse COFF machine (offset {:#x})", offset)))?; - coff.number_of_sections = bytes.gread_with(offset, scroll::LE) - .map_err(|_| error::Error::Malformed(format!("cannot parse COFF number of sections (offset {:#x})", offset)))?; - coff.time_date_stamp = bytes.gread_with(offset, scroll::LE) - .map_err(|_| error::Error::Malformed(format!("cannot parse COFF time date stamp (offset {:#x})", offset)))?; - coff.pointer_to_symbol_table = bytes.gread_with(offset, scroll::LE) - .map_err(|_| error::Error::Malformed(format!("cannot parse COFF pointer to symbol table (offset {:#x})", offset)))?; - coff.number_of_symbol_table = bytes.gread_with(offset, scroll::LE) - .map_err(|_| error::Error::Malformed(format!("cannot parse COFF number of symbol (offset {:#x})", offset)))?; - coff.size_of_optional_header = bytes.gread_with(offset, scroll::LE) - .map_err(|_| error::Error::Malformed(format!("cannot parse COFF size of optional header (offset {:#x})", offset)))?; - coff.characteristics = bytes.gread_with(offset, scroll::LE) - .map_err(|_| error::Error::Malformed(format!("cannot parse COFF characteristics (offset {:#x})", offset)))?; - Ok(coff) + Ok(bytes.gread_with(offset, scroll::LE)?) + } + + /// Parse the COFF section headers. + /// + /// For COFF, these immediately follow the COFF header. For PE, these immediately follow the + /// optional header. + pub fn sections( + &self, + bytes: &[u8], + offset: &mut usize, + ) -> error::Result> { + let nsections = self.number_of_sections as usize; + let mut sections = Vec::with_capacity(nsections); + // Note that if we are handling a BigCoff, the size of the symbol will be different! + let string_table_offset = self.pointer_to_symbol_table as usize + + symbol::SymbolTable::size(self.number_of_symbol_table as usize); + for i in 0..nsections { + let section = section_table::SectionTable::parse(bytes, offset, string_table_offset as usize)?; + debug!("({}) {:#?}", i, section); + sections.push(section); + } + Ok(sections) + } + + /// Return the COFF symbol table. + pub fn symbols<'a>( + &self, + bytes: &'a [u8], + ) -> error::Result> { + let offset = self.pointer_to_symbol_table as usize; + let number = self.number_of_symbol_table as usize; + symbol::SymbolTable::parse(bytes, offset, number) + } + + /// Return the COFF string table. + pub fn strings<'a>( + &self, + bytes: &'a [u8], + ) -> error::Result> { + let offset = self.pointer_to_symbol_table as usize + + symbol::SymbolTable::size(self.number_of_symbol_table as usize); + let length = bytes.pread_with::(offset, scroll::LE)? as usize; + Ok(strtab::Strtab::parse(bytes, offset, length, 0).unwrap()) } } #[derive(Debug, PartialEq, Copy, Clone, Default)] pub struct Header { pub dos_header: DosHeader, + /// PE Magic: PE\0\0, little endian + pub signature: u32, pub coff_header: CoffHeader, pub optional_header: Option, } @@ -82,19 +111,21 @@ impl Header { pub fn parse(bytes: &[u8]) -> error::Result { let dos_header = DosHeader::parse(&bytes)?; let mut offset = dos_header.pe_pointer as usize; + let signature = bytes.gread_with(&mut offset, scroll::LE) + .map_err(|_| error::Error::Malformed(format!("cannot parse PE signature (offset {:#x})", offset)))?; let coff_header = CoffHeader::parse(&bytes, &mut offset)?; let optional_header = if coff_header.size_of_optional_header > 0 { Some (bytes.pread::(offset)?) } else { None }; - Ok( Header { dos_header: dos_header, coff_header: coff_header, optional_header: optional_header }) + Ok( Header { dos_header, signature, coff_header, optional_header }) } } #[cfg(test)] mod tests { - use super::{DOS_MAGIC, COFF_MAGIC, COFF_MACHINE_X86, Header}; + use super::{DOS_MAGIC, PE_MAGIC, COFF_MACHINE_X86, Header}; const CRSS_HEADER: [u8; 688] = [0x4d, 0x5a, 0x90, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, @@ -145,7 +176,7 @@ mod tests { fn crss_header () { let header = Header::parse(&&CRSS_HEADER[..]).unwrap(); assert!(header.dos_header.signature == DOS_MAGIC); - assert!(header.coff_header.signature == COFF_MAGIC); + assert!(header.signature == PE_MAGIC); assert!(header.coff_header.machine == COFF_MACHINE_X86); println!("header: {:?}", &header); } diff --git a/third_party/rust/goblin/src/pe/import.rs b/third_party/rust/goblin/src/pe/import.rs index 82cdef8e2f40..97e2196c52a0 100644 --- a/third_party/rust/goblin/src/pe/import.rs +++ b/third_party/rust/goblin/src/pe/import.rs @@ -1,20 +1,21 @@ -use alloc::borrow::Cow; -use alloc::vec::Vec; +use crate::alloc::borrow::Cow; +use crate::alloc::vec::Vec; use core::fmt::{LowerHex, Debug}; -use scroll::{self, Pread}; +use scroll::{Pread, Pwrite, SizeWith}; use scroll::ctx::TryFromCtx; -use error; +use crate::error; -use pe::section_table; -use pe::utils; -use pe::data_directories; +use crate::pe::section_table; +use crate::pe::utils; +use crate::pe::data_directories; +use log::{debug, warn}; pub const IMPORT_BY_ORDINAL_32: u32 = 0x8000_0000; pub const IMPORT_BY_ORDINAL_64: u64 = 0x8000_0000_0000_0000; -pub const IMPORT_RVA_MASK_32: u32 = 0x8fff_ffff; -pub const IMPORT_RVA_MASK_64: u64 = 0x0000_0000_8fff_ffff; +pub const IMPORT_RVA_MASK_32: u32 = 0x7fff_ffff; +pub const IMPORT_RVA_MASK_64: u64 = 0x0000_0000_7fff_ffff; pub trait Bitfield<'a>: Into + PartialEq + Eq + LowerHex + Debug + TryFromCtx<'a, scroll::Endian, Error=scroll::Error, Size=usize> { fn is_ordinal(&self) -> bool; @@ -51,7 +52,7 @@ impl<'a> HintNameTableEntry<'a> { let offset = &mut offset; let hint = bytes.gread_with(offset, scroll::LE)?; let name = bytes.pread::<&'a str>(*offset)?; - Ok(HintNameTableEntry { hint: hint, name: name }) + Ok(HintNameTableEntry { hint, name }) } } @@ -64,8 +65,7 @@ pub enum SyntheticImportLookupTableEntry<'a> { pub type ImportLookupTable<'a> = Vec>; impl<'a> SyntheticImportLookupTableEntry<'a> { - pub fn parse>(bytes: &'a [u8], mut offset: usize, sections: &[section_table::SectionTable]) - -> error::Result> { + pub fn parse>(bytes: &'a [u8], mut offset: usize, sections: &[section_table::SectionTable], file_alignment: u32) -> error::Result> { let le = scroll::LE; let offset = &mut offset; let mut table = Vec::new(); @@ -86,7 +86,7 @@ impl<'a> SyntheticImportLookupTableEntry<'a> { let rva = bitfield.to_rva(); let hentry = { debug!("searching for RVA {:#x}", rva); - if let Some(offset) = utils::find_offset(rva as usize, sections) { + if let Some(offset) = utils::find_offset(rva as usize, sections, file_alignment) { debug!("offset {:#x}", offset); HintNameTableEntry::parse(bytes, offset)? } else { @@ -142,33 +142,39 @@ pub struct SyntheticImportDirectoryEntry<'a> { } impl<'a> SyntheticImportDirectoryEntry<'a> { - pub fn parse>(bytes: &'a [u8], import_directory_entry: ImportDirectoryEntry, sections: &[section_table::SectionTable]) -> error::Result> { + pub fn parse>(bytes: &'a [u8], import_directory_entry: ImportDirectoryEntry, sections: &[section_table::SectionTable], file_alignment: u32) -> error::Result> { const LE: scroll::Endian = scroll::LE; let name_rva = import_directory_entry.name_rva; - let name = utils::try_name(bytes, name_rva as usize, sections)?; + let name = utils::try_name(bytes, name_rva as usize, sections, file_alignment)?; let import_lookup_table = { let import_lookup_table_rva = import_directory_entry.import_lookup_table_rva; - debug!("Synthesizing lookup table imports for {} lib, with import lookup table rva: {:#x}", name, import_lookup_table_rva); - if let Some(import_lookup_table_offset) = utils::find_offset(import_lookup_table_rva as usize, sections) { - let import_lookup_table = SyntheticImportLookupTableEntry::parse::(bytes, import_lookup_table_offset, sections)?; - debug!("Successfully synthesized import lookup table entry: {:#?}", import_lookup_table); + let import_address_table_rva = import_directory_entry.import_address_table_rva; + if let Some(import_lookup_table_offset) = utils::find_offset(import_lookup_table_rva as usize, sections, file_alignment) { + debug!("Synthesizing lookup table imports for {} lib, with import lookup table rva: {:#x}", name, import_lookup_table_rva); + let import_lookup_table = SyntheticImportLookupTableEntry::parse::(bytes, import_lookup_table_offset, sections, file_alignment)?; + debug!("Successfully synthesized import lookup table entry from lookup table: {:#?}", import_lookup_table); Some(import_lookup_table) + } else if let Some(import_address_table_offset) = utils::find_offset(import_address_table_rva as usize, sections, file_alignment) { + debug!("Synthesizing lookup table imports for {} lib, with import address table rva: {:#x}", name, import_lookup_table_rva); + let import_address_table = SyntheticImportLookupTableEntry::parse::(bytes, import_address_table_offset, sections, file_alignment)?; + debug!("Successfully synthesized import lookup table entry from IAT: {:#?}", import_address_table); + Some(import_address_table) } else { None } }; - let import_address_table_offset = &mut utils::find_offset(import_directory_entry.import_address_table_rva as usize, sections).ok_or(error::Error::Malformed(format!("Cannot map import_address_table_rva {:#x} into offset for {}", import_directory_entry.import_address_table_rva, name)))?; + let import_address_table_offset = &mut utils::find_offset(import_directory_entry.import_address_table_rva as usize, sections, file_alignment).ok_or_else(|| error::Error::Malformed(format!("Cannot map import_address_table_rva {:#x} into offset for {}", import_directory_entry.import_address_table_rva, name)))?; let mut import_address_table = Vec::new(); loop { let import_address = bytes.gread_with::(import_address_table_offset, LE)?.into(); if import_address == 0 { break } else { import_address_table.push(import_address); } } Ok(SyntheticImportDirectoryEntry { - import_directory_entry: import_directory_entry, - name: name, - import_lookup_table: import_lookup_table, - import_address_table: import_address_table + import_directory_entry, + name, + import_lookup_table, + import_address_table }) } } @@ -180,10 +186,10 @@ pub struct ImportData<'a> { } impl<'a> ImportData<'a> { - pub fn parse>(bytes: &'a[u8], dd: &data_directories::DataDirectory, sections: &[section_table::SectionTable]) -> error::Result> { + pub fn parse>(bytes: &'a[u8], dd: data_directories::DataDirectory, sections: &[section_table::SectionTable], file_alignment: u32) -> error::Result> { let import_directory_table_rva = dd.virtual_address as usize; debug!("import_directory_table_rva {:#x}", import_directory_table_rva); - let offset = &mut utils::find_offset(import_directory_table_rva, sections).ok_or(error::Error::Malformed(format!("Cannot create ImportData; cannot map import_directory_table_rva {:#x} into offset", import_directory_table_rva)))?;; + let offset = &mut utils::find_offset(import_directory_table_rva, sections, file_alignment).ok_or_else(|| error::Error::Malformed(format!("Cannot create ImportData; cannot map import_directory_table_rva {:#x} into offset", import_directory_table_rva)))?;; debug!("import data offset {:#x}", offset); let mut import_data = Vec::new(); loop { @@ -192,13 +198,13 @@ impl<'a> ImportData<'a> { if import_directory_entry.is_null() { break; } else { - let entry = SyntheticImportDirectoryEntry::parse::(bytes, import_directory_entry, sections)?; + let entry = SyntheticImportDirectoryEntry::parse::(bytes, import_directory_entry, sections, file_alignment)?; debug!("entry {:#?}", entry); import_data.push(entry); } } debug!("finished ImportData"); - Ok(ImportData { import_data: import_data}) + Ok(ImportData { import_data}) } } @@ -225,23 +231,23 @@ impl<'a> Import<'a> { let offset = import_base + (i * T::size_of()); use self::SyntheticImportLookupTableEntry::*; let (rva, name, ordinal) = - match entry { - &HintNameTableRVA ((rva, ref hint_entry)) => { + match *entry { + HintNameTableRVA ((rva, ref hint_entry)) => { // if hint_entry.name = "" && hint_entry.hint = 0 { // println!(" warning hint/name table rva from {} without hint {:#x}", dll, rva); // } - (rva, Cow::Borrowed(hint_entry.name), hint_entry.hint.clone()) + (rva, Cow::Borrowed(hint_entry.name), hint_entry.hint) }, - &OrdinalNumber(ordinal) => { + OrdinalNumber(ordinal) => { let name = format!("ORDINAL {}", ordinal); (0x0, Cow::Owned(name), ordinal) }, }; let import = Import { - name: name, - ordinal: ordinal, dll: dll, - size: T::size_of(), offset: offset, rva: rva as usize + name, + ordinal, dll, + size: T::size_of(), offset, rva: rva as usize }; imports.push(import); } diff --git a/third_party/rust/goblin/src/pe/mod.rs b/third_party/rust/goblin/src/pe/mod.rs index b218f45fc92c..986a4e208cbc 100644 --- a/third_party/rust/goblin/src/pe/mod.rs +++ b/third_party/rust/goblin/src/pe/mod.rs @@ -3,7 +3,7 @@ // TODO: panics with unwrap on None for apisetschema.dll, fhuxgraphics.dll and some others -use alloc::vec::Vec; +use crate::alloc::vec::Vec; pub mod header; pub mod optional_header; @@ -13,10 +13,16 @@ pub mod data_directories; pub mod export; pub mod import; pub mod debug; -mod utils; +pub mod exception; +pub mod symbol; +pub mod relocation; +pub mod utils; -use error; -use container; +use crate::error; +use crate::container; +use crate::strtab; + +use log::debug; #[derive(Debug)] /// An analyzed PE32/PE32+ binary @@ -48,7 +54,9 @@ pub struct PE<'a> { /// The list of libraries which this binary imports symbols from pub libraries: Vec<&'a str>, /// Debug information, if any, contained in the PE header - pub debug_data: Option> + pub debug_data: Option>, + /// Exception handling and stack unwind information, if any, contained in the PE header + pub exception_data: Option>, } impl<'a> PE<'a> { @@ -56,14 +64,8 @@ impl<'a> PE<'a> { pub fn parse(bytes: &'a [u8]) -> error::Result { let header = header::Header::parse(bytes)?; debug!("{:#?}", header); - let offset = &mut (header.dos_header.pe_pointer as usize + header::SIZEOF_COFF_HEADER + header.coff_header.size_of_optional_header as usize); - let nsections = header.coff_header.number_of_sections as usize; - let mut sections = Vec::with_capacity(nsections); - for i in 0..nsections { - let section = section_table::SectionTable::parse(bytes, offset)?; - debug!("({}) {:#?}", i, section); - sections.push(section); - } + let offset = &mut (header.dos_header.pe_pointer as usize + header::SIZEOF_PE_MAGIC + header::SIZEOF_COFF_HEADER + header.coff_header.size_of_optional_header as usize); + let sections = header.coff_header.sections(bytes, offset)?; let is_lib = characteristic::is_dll(header.coff_header.characteristics); let mut entry = 0; let mut image_base = 0; @@ -74,27 +76,29 @@ impl<'a> PE<'a> { let mut import_data = None; let mut libraries = vec![]; let mut debug_data = None; + let mut exception_data = None; let mut is_64 = false; if let Some(optional_header) = header.optional_header { entry = optional_header.standard_fields.address_of_entry_point as usize; image_base = optional_header.windows_fields.image_base as usize; is_64 = optional_header.container()? == container::Container::Big; debug!("entry {:#x} image_base {:#x} is_64: {}", entry, image_base, is_64); - if let &Some(export_table) = optional_header.data_directories.get_export_table() { - if let Ok(ed) = export::ExportData::parse(bytes, &export_table, §ions) { + let file_alignment = optional_header.windows_fields.file_alignment; + if let Some(export_table) = *optional_header.data_directories.get_export_table() { + if let Ok(ed) = export::ExportData::parse(bytes, export_table, §ions, file_alignment) { debug!("export data {:#?}", ed); - exports = export::Export::parse(bytes, &ed, §ions)?; + exports = export::Export::parse(bytes, &ed, §ions, file_alignment)?; name = ed.name; debug!("name: {:#?}", name); export_data = Some(ed); } } debug!("exports: {:#?}", exports); - if let &Some(import_table) = optional_header.data_directories.get_import_table() { + if let Some(import_table) = *optional_header.data_directories.get_import_table() { let id = if is_64 { - import::ImportData::parse::(bytes, &import_table, §ions)? + import::ImportData::parse::(bytes, import_table, §ions, file_alignment)? } else { - import::ImportData::parse::(bytes, &import_table, §ions)? + import::ImportData::parse::(bytes, import_table, §ions, file_alignment)? }; debug!("import data {:#?}", id); if is_64 { @@ -108,25 +112,59 @@ impl<'a> PE<'a> { import_data = Some(id); } debug!("imports: {:#?}", imports); - if let &Some(debug_table) = optional_header.data_directories.get_debug_table() { - debug_data = Some(debug::DebugData::parse(bytes, &debug_table, §ions)?); + if let Some(debug_table) = *optional_header.data_directories.get_debug_table() { + debug_data = Some(debug::DebugData::parse(bytes, debug_table, §ions, file_alignment)?); + } + + debug!("exception data: {:#?}", exception_data); + if let Some(exception_table) = *optional_header.data_directories.get_exception_table() { + exception_data = Some(exception::ExceptionData::parse(bytes, exception_table, §ions, file_alignment)?); } } Ok( PE { - header: header, - sections: sections, + header, + sections, size: 0, - name: name, - is_lib: is_lib, - is_64: is_64, - entry: entry, - image_base: image_base, - export_data: export_data, - import_data: import_data, - exports: exports, - imports: imports, - libraries: libraries, - debug_data: debug_data, + name, + is_lib, + is_64, + entry, + image_base, + export_data, + import_data, + exports, + imports, + libraries, + debug_data, + exception_data, }) } } + +/// An analyzed COFF object +#[derive(Debug)] +pub struct Coff<'a> { + /// The COFF header + pub header: header::CoffHeader, + /// A list of the sections in this COFF binary + pub sections: Vec, + /// The COFF symbol table. + pub symbols: symbol::SymbolTable<'a>, + /// The string table. + pub strings: strtab::Strtab<'a>, +} + +impl<'a> Coff<'a> { + /// Reads a COFF object from the underlying `bytes` + pub fn parse(bytes: &'a [u8]) -> error::Result { + let offset = &mut 0; + let header = header::CoffHeader::parse(bytes, offset)?; + debug!("{:#?}", header); + // TODO: maybe parse optional header, but it isn't present for Windows. + *offset += header.size_of_optional_header as usize; + let sections = header.sections(bytes, offset)?; + let symbols = header.symbols(bytes)?; + let strings = header.strings(bytes)?; + Ok(Coff { header, sections, symbols, strings }) + } +} diff --git a/third_party/rust/goblin/src/pe/optional_header.rs b/third_party/rust/goblin/src/pe/optional_header.rs index 8d063e0c021e..5da4d986c352 100644 --- a/third_party/rust/goblin/src/pe/optional_header.rs +++ b/third_party/rust/goblin/src/pe/optional_header.rs @@ -1,9 +1,10 @@ -use container; -use error; +use crate::container; +use crate::error; -use pe::data_directories; +use crate::pe::data_directories; -use scroll::{ctx, Endian, LE, Pread}; +use scroll::{ctx, Endian, LE}; +use scroll::{Pread, Pwrite, SizeWith}; /// standard COFF fields #[repr(C)] @@ -62,11 +63,11 @@ impl From for StandardFields { magic: fields.magic, major_linker_version: fields.major_linker_version, minor_linker_version: fields.minor_linker_version, - size_of_code: fields.size_of_code as u64, - size_of_initialized_data: fields.size_of_initialized_data as u64, - size_of_uninitialized_data: fields.size_of_uninitialized_data as u64, - address_of_entry_point: fields.address_of_entry_point as u64, - base_of_code: fields.base_of_code as u64, + size_of_code: u64::from(fields.size_of_code), + size_of_initialized_data: u64::from(fields.size_of_initialized_data), + size_of_uninitialized_data: u64::from(fields.size_of_uninitialized_data), + address_of_entry_point: u64::from(fields.address_of_entry_point), + base_of_code: u64::from(fields.base_of_code), base_of_data: fields.base_of_data, } } @@ -78,11 +79,11 @@ impl From for StandardFields { magic: fields.magic, major_linker_version: fields.major_linker_version, minor_linker_version: fields.minor_linker_version, - size_of_code: fields.size_of_code as u64, - size_of_initialized_data: fields.size_of_initialized_data as u64, - size_of_uninitialized_data: fields.size_of_uninitialized_data as u64, - address_of_entry_point: fields.address_of_entry_point as u64, - base_of_code: fields.base_of_code as u64, + size_of_code: u64::from(fields.size_of_code), + size_of_initialized_data: u64::from(fields.size_of_initialized_data), + size_of_uninitialized_data: u64::from(fields.size_of_uninitialized_data), + address_of_entry_point: u64::from(fields.address_of_entry_point), + base_of_code: u64::from(fields.base_of_code), base_of_data: 0, } } @@ -182,7 +183,7 @@ pub const SIZEOF_WINDOWS_FIELDS_64: usize = 88; impl From for WindowsFields { fn from(windows: WindowsFields32) -> Self { WindowsFields { - image_base: windows.image_base as u64, + image_base: u64::from(windows.image_base), section_alignment: windows.section_alignment, file_alignment: windows.file_alignment, major_operating_system_version: windows.major_operating_system_version, @@ -197,10 +198,10 @@ impl From for WindowsFields { check_sum: windows.check_sum, subsystem: windows.subsystem, dll_characteristics: windows.dll_characteristics, - size_of_stack_reserve: windows.size_of_stack_reserve as u64, - size_of_stack_commit: windows.size_of_stack_commit as u64, - size_of_heap_reserve: windows.size_of_heap_reserve as u64, - size_of_heap_commit: windows.size_of_heap_commit as u64, + size_of_stack_reserve: u64::from(windows.size_of_stack_reserve), + size_of_stack_commit: u64::from(windows.size_of_stack_commit), + size_of_heap_reserve: u64::from(windows.size_of_heap_reserve), + size_of_heap_commit: u64::from(windows.size_of_heap_commit), loader_flags: windows.loader_flags, number_of_rva_and_sizes: windows.number_of_rva_and_sizes, } @@ -254,14 +255,14 @@ impl OptionalHeader { Ok(container::Container::Big) }, magic => { - Err(error::Error::BadMagic(magic as u64)) + Err(error::Error::BadMagic(u64::from(magic))) } } } } impl<'a> ctx::TryFromCtx<'a, Endian> for OptionalHeader { - type Error = ::error::Error; + type Error = crate::error::Error; type Size = usize; fn try_from_ctx(bytes: &'a [u8], _: Endian) -> error::Result<(Self, Self::Size)> { let magic = bytes.pread_with::(0, LE)?; @@ -277,13 +278,13 @@ impl<'a> ctx::TryFromCtx<'a, Endian> for OptionalHeader { let windows_fields = bytes.gread_with::(offset, LE)?; (standard_fields, windows_fields) }, - _ => return Err(error::Error::BadMagic(magic as u64)) + _ => return Err(error::Error::BadMagic(u64::from(magic))) }; let data_directories = data_directories::DataDirectories::parse(&bytes, windows_fields.number_of_rva_and_sizes as usize, offset)?; Ok ((OptionalHeader { - standard_fields: standard_fields, - windows_fields: windows_fields, - data_directories: data_directories, + standard_fields, + windows_fields, + data_directories, }, 0)) // TODO: FIXME } } diff --git a/third_party/rust/goblin/src/pe/relocation.rs b/third_party/rust/goblin/src/pe/relocation.rs new file mode 100644 index 000000000000..ab8398ce73b4 --- /dev/null +++ b/third_party/rust/goblin/src/pe/relocation.rs @@ -0,0 +1,133 @@ +use crate::error; +use scroll::{IOread, IOwrite, Pread, Pwrite, SizeWith}; + +/// Size of a single COFF relocation. +pub const COFF_RELOCATION_SIZE: usize = 10; + +// x86 relocations. + +/// The relocation is ignored. +pub const IMAGE_REL_I386_ABSOLUTE: u16 = 0x0000; +/// Not supported. +pub const IMAGE_REL_I386_DIR16: u16 = 0x0001; +/// Not supported. +pub const IMAGE_REL_I386_REL16: u16 = 0x0002; +/// The target's 32-bit VA. +pub const IMAGE_REL_I386_DIR32: u16 = 0x0006; +/// The target's 32-bit RVA. +pub const IMAGE_REL_I386_DIR32NB: u16 = 0x0007; +/// Not supported. +pub const IMAGE_REL_I386_SEG12: u16 = 0x0009; +/// The 16-bit section index of the section that contains the target. +/// +/// This is used to support debugging information. +pub const IMAGE_REL_I386_SECTION: u16 = 0x000A; +/// The 32-bit offset of the target from the beginning of its section. +/// +/// This is used to support debugging information and static thread local storage. +pub const IMAGE_REL_I386_SECREL: u16 = 0x000B; +/// The CLR token. +pub const IMAGE_REL_I386_TOKEN: u16 = 0x000C; +/// A 7-bit offset from the base of the section that contains the target. +pub const IMAGE_REL_I386_SECREL7: u16 = 0x000D; +/// The 32-bit relative displacement to the target. +/// +/// This supports the x86 relative branch and call instructions. +pub const IMAGE_REL_I386_REL32: u16 = 0x0014; + +// x86-64 relocations. + +/// The relocation is ignored. +pub const IMAGE_REL_AMD64_ABSOLUTE: u16 = 0x0000; +/// The 64-bit VA of the relocation target. +pub const IMAGE_REL_AMD64_ADDR64: u16 = 0x0001; +/// The 32-bit VA of the relocation target. +pub const IMAGE_REL_AMD64_ADDR32: u16 = 0x0002; +/// The 32-bit address without an image base (RVA). +pub const IMAGE_REL_AMD64_ADDR32NB: u16 = 0x0003; +/// The 32-bit relative address from the byte following the relocation. +pub const IMAGE_REL_AMD64_REL32: u16 = 0x0004; +/// The 32-bit address relative to byte distance 1 from the relocation. +pub const IMAGE_REL_AMD64_REL32_1: u16 = 0x0005; +/// The 32-bit address relative to byte distance 2 from the relocation. +pub const IMAGE_REL_AMD64_REL32_2: u16 = 0x0006; +/// The 32-bit address relative to byte distance 3 from the relocation. +pub const IMAGE_REL_AMD64_REL32_3: u16 = 0x0007; +/// The 32-bit address relative to byte distance 4 from the relocation. +pub const IMAGE_REL_AMD64_REL32_4: u16 = 0x0008; +/// The 32-bit address relative to byte distance 5 from the relocation. +pub const IMAGE_REL_AMD64_REL32_5: u16 = 0x0009; +/// The 16-bit section index of the section that contains the target. +/// +/// This is used to support debugging information. +pub const IMAGE_REL_AMD64_SECTION: u16 = 0x000A; +/// The 32-bit offset of the target from the beginning of its section. +/// +/// This is used to support debugging information and static thread local storage. +pub const IMAGE_REL_AMD64_SECREL: u16 = 0x000B; +/// A 7-bit unsigned offset from the base of the section that contains the target. +pub const IMAGE_REL_AMD64_SECREL7: u16 = 0x000C; +/// CLR tokens. +pub const IMAGE_REL_AMD64_TOKEN: u16 = 0x000D; +/// A 32-bit signed span-dependent value emitted into the object. +pub const IMAGE_REL_AMD64_SREL32: u16 = 0x000E; +/// A pair that must immediately follow every span-dependent value. +pub const IMAGE_REL_AMD64_PAIR: u16 = 0x000F; +/// A 32-bit signed span-dependent value that is applied at link time. +pub const IMAGE_REL_AMD64_SSPAN32: u16 = 0x0010; + +/// A COFF relocation. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct Relocation { + /// The address of the item to which relocation is applied. + /// + /// This is the offset from the beginning of the section, plus the + /// value of the section's `virtual_address` field. + pub virtual_address: u32, + /// A zero-based index into the symbol table. + /// + /// This symbol gives the address that is to be used for the relocation. If the specified + /// symbol has section storage class, then the symbol's address is the address with the + /// first section of the same name. + pub symbol_table_index: u32, + /// A value that indicates the kind of relocation that should be performed. + /// + /// Valid relocation types depend on machine type. + pub typ: u16, +} + +/// An iterator for COFF relocations. +#[derive(Default)] +pub struct Relocations<'a> { + offset: usize, + relocations: &'a [u8], +} + +impl<'a> Relocations<'a> { + /// Parse a COFF relocation table at the given offset. + /// + /// The offset and number of relocations should be from the COFF section header. + pub fn parse(bytes: &'a [u8], offset: usize, number: usize) -> error::Result> { + let relocations = bytes.pread_with(offset, number * COFF_RELOCATION_SIZE)?; + Ok(Relocations { + offset: 0, + relocations, + }) + } +} + +impl<'a> Iterator for Relocations<'a> { + type Item = Relocation; + fn next(&mut self) -> Option { + if self.offset >= self.relocations.len() { + None + } else { + Some( + self.relocations + .gread_with(&mut self.offset, scroll::LE) + .unwrap(), + ) + } + } +} diff --git a/third_party/rust/goblin/src/pe/section_table.rs b/third_party/rust/goblin/src/pe/section_table.rs index b1fe3e441e98..1a42c0ebc73f 100644 --- a/third_party/rust/goblin/src/pe/section_table.rs +++ b/third_party/rust/goblin/src/pe/section_table.rs @@ -1,10 +1,13 @@ -use scroll::{self, Pread}; -use error; +use crate::alloc::string::{String, ToString}; +use scroll::{ctx, Pread, Pwrite}; +use crate::error::{self, Error}; +use crate::pe::relocation; #[repr(C)] -#[derive(Debug, PartialEq, Copy, Clone, Default)] +#[derive(Debug, PartialEq, Clone, Default)] pub struct SectionTable { pub name: [u8; 8], + pub real_name: Option, pub virtual_size: u32, pub virtual_address: u32, pub size_of_raw_data: u32, @@ -18,13 +21,38 @@ pub struct SectionTable { pub const SIZEOF_SECTION_TABLE: usize = 8 * 5; +// Based on https://github.com/llvm-mirror/llvm/blob/af7b1832a03ab6486c42a40d21695b2c03b2d8a3/lib/Object/COFFObjectFile.cpp#L70 +// Decodes a string table entry in base 64 (//AAAAAA). Expects string without +// prefixed slashes. +fn base64_decode_string_entry(s: &str) -> Result { + assert!(s.len() <= 6, "String too long, possible overflow."); + + let mut val = 0; + for c in s.bytes() { + let v = if b'A' <= c && c <= b'Z' { // 00..=25 + c - b'A' + } else if b'a' <= c && c <= b'z' { // 26..=51 + c - b'a' + 26 + } else if b'0' <= c && c <= b'9' { // 52..=61 + c - b'0' + 52 + } else if c == b'+' { // 62 + 62 + } else if c == b'/' { // 63 + 63 + } else { + return Err(()) + }; + val = val * 64 + v as usize; + } + Ok(val) +} + impl SectionTable { - pub fn parse(bytes: &[u8], offset: &mut usize) -> error::Result { + pub fn parse(bytes: &[u8], offset: &mut usize, string_table_offset: usize) -> error::Result { let mut table = SectionTable::default(); let mut name = [0u8; 8]; - for i in 0..8 { - name[i] = bytes.gread_with(offset, scroll::LE)?; - } + name.copy_from_slice(bytes.gread_with(offset, 8)?); + table.name = name; table.virtual_size = bytes.gread_with(offset, scroll::LE)?; table.virtual_address = bytes.gread_with(offset, scroll::LE)?; @@ -35,66 +63,197 @@ impl SectionTable { table.number_of_relocations = bytes.gread_with(offset, scroll::LE)?; table.number_of_linenumbers = bytes.gread_with(offset, scroll::LE)?; table.characteristics = bytes.gread_with(offset, scroll::LE)?; + + if let Some(idx) = table.name_offset()? { + table.real_name = Some(bytes.pread::<&str>(string_table_offset + idx)?.to_string()); + } Ok(table) } + + pub fn name_offset(&self) -> error::Result> { + // Based on https://github.com/llvm-mirror/llvm/blob/af7b1832a03ab6486c42a40d21695b2c03b2d8a3/lib/Object/COFFObjectFile.cpp#L1054 + if self.name[0] == b'/' { + let idx: usize = if self.name[1] == b'/' { + let b64idx = self.name.pread::<&str>(2)?; + base64_decode_string_entry(b64idx).map_err(|_| + Error::Malformed(format!("Invalid indirect section name //{}: base64 decoding failed", b64idx)))? + } else { + let name = self.name.pread::<&str>(1)?; + name.parse().map_err(|err| + Error::Malformed(format!("Invalid indirect section name /{}: {}", name, err)))? + }; + Ok(Some(idx)) + } else { + Ok(None) + } + } + + pub fn set_name_offset(&mut self, mut idx: usize) -> error::Result<()> { + if idx <= 9_999_999 { // 10^7 - 1 + // write!(&mut self.name[1..], "{}", idx) without using io::Write. + // We write into a temporary since we calculate digits starting at the right. + let mut name = [0; 7]; + let mut len = 0; + if idx == 0 { + name[6] = b'0'; + len = 1; + } else { + while idx != 0 { + let rem = (idx % 10) as u8; + idx /= 10; + name[6 - len] = b'0' + rem; + len += 1; + } + } + self.name = [0; 8]; + self.name[0] = b'/'; + self.name[1..][..len].copy_from_slice(&name[7 - len..]); + Ok(()) + } else if idx as u64 <= 0xfff_fff_fff { // 64^6 - 1 + self.name[0] = b'/'; + self.name[1] = b'/'; + for i in 0..6 { + let rem = (idx % 64) as u8; + idx /= 64; + let c = match rem { + 0..=25 => b'A' + rem, + 26..=51 => b'a' + rem - 26, + 52..=61 => b'0' + rem - 52, + 62 => b'+', + 63 => b'/', + _ => unreachable!(), + }; + self.name[7 - i] = c; + } + Ok(()) + } else { + Err(Error::Malformed(format!("Invalid section name offset: {}", idx))) + } + } + pub fn name(&self) -> error::Result<&str> { - Ok(self.name.pread(0)?) + match self.real_name.as_ref() { + Some(s) => Ok(s), + None => Ok(self.name.pread(0)?) + } + } + + pub fn relocations<'a>(&self, bytes: &'a[u8]) -> error::Result> { + let offset = self.pointer_to_relocations as usize; + let number = self.number_of_relocations as usize; + relocation::Relocations::parse(bytes, offset, number) + } +} + +impl ctx::SizeWith for SectionTable { + type Units = usize; + fn size_with(_ctx: &scroll::Endian) -> usize { + SIZEOF_SECTION_TABLE + } +} + +impl ctx::TryIntoCtx for SectionTable { + type Error = error::Error; + type Size = usize; + fn try_into_ctx(self, bytes: &mut [u8], ctx: scroll::Endian) -> Result { + let offset = &mut 0; + bytes.gwrite(&self.name[..], offset)?; + bytes.gwrite_with(self.virtual_size, offset, ctx)?; + bytes.gwrite_with(self.virtual_address, offset, ctx)?; + bytes.gwrite_with(self.size_of_raw_data, offset, ctx)?; + bytes.gwrite_with(self.pointer_to_raw_data, offset, ctx)?; + bytes.gwrite_with(self.pointer_to_relocations, offset, ctx)?; + bytes.gwrite_with(self.pointer_to_linenumbers, offset, ctx)?; + bytes.gwrite_with(self.number_of_relocations, offset, ctx)?; + bytes.gwrite_with(self.number_of_linenumbers, offset, ctx)?; + bytes.gwrite_with(self.characteristics, offset, ctx)?; + Ok(SIZEOF_SECTION_TABLE) + } +} + +impl ctx::IntoCtx for SectionTable { + fn into_ctx(self, bytes: &mut [u8], ctx: scroll::Endian) { + bytes.pwrite_with(self, 0, ctx).unwrap(); } } /// The section should not be padded to the next boundary. This flag is obsolete and is replaced /// by `IMAGE_SCN_ALIGN_1BYTES`. This is valid only for object files. -pub const IMAGE_SCN_TYPE_NO_PAD: u32 = 0x00000008; +pub const IMAGE_SCN_TYPE_NO_PAD: u32 = 0x0000_0008; /// The section contains executable code. -pub const IMAGE_SCN_CNT_CODE: u32 = 0x00000020; +pub const IMAGE_SCN_CNT_CODE: u32 = 0x0000_0020; /// The section contains initialized data. -pub const IMAGE_SCN_CNT_INITIALIZED_DATA: u32 = 0x00000040; +pub const IMAGE_SCN_CNT_INITIALIZED_DATA: u32 = 0x0000_0040; /// The section contains uninitialized data. -pub const IMAGE_SCN_CNT_UNINITIALIZED_DATA: u32 = 0x00000080; -pub const IMAGE_SCN_LNK_OTHER: u32 = 0x00000100; +pub const IMAGE_SCN_CNT_UNINITIALIZED_DATA: u32 = 0x0000_0080; +pub const IMAGE_SCN_LNK_OTHER: u32 = 0x0000_0100; /// The section contains comments or other information. The .drectve section has this type. /// This is valid for object files only. -pub const IMAGE_SCN_LNK_INFO: u32 = 0x00000200; +pub const IMAGE_SCN_LNK_INFO: u32 = 0x0000_0200; /// The section will not become part of the image. This is valid only for object files. -pub const IMAGE_SCN_LNK_REMOVE: u32 = 0x00000800; +pub const IMAGE_SCN_LNK_REMOVE: u32 = 0x0000_0800; /// The section contains COMDAT data. This is valid only for object files. -pub const IMAGE_SCN_LNK_COMDAT: u32 = 0x00001000; +pub const IMAGE_SCN_LNK_COMDAT: u32 = 0x0000_1000; /// The section contains data referenced through the global pointer (GP). -pub const IMAGE_SCN_GPREL: u32 = 0x00008000; -pub const IMAGE_SCN_MEM_PURGEABLE: u32 = 0x00020000; -pub const IMAGE_SCN_MEM_16BIT: u32 = 0x00020000; -pub const IMAGE_SCN_MEM_LOCKED: u32 = 0x00040000; -pub const IMAGE_SCN_MEM_PRELOAD: u32 = 0x00080000; +pub const IMAGE_SCN_GPREL: u32 = 0x0000_8000; +pub const IMAGE_SCN_MEM_PURGEABLE: u32 = 0x0002_0000; +pub const IMAGE_SCN_MEM_16BIT: u32 = 0x0002_0000; +pub const IMAGE_SCN_MEM_LOCKED: u32 = 0x0004_0000; +pub const IMAGE_SCN_MEM_PRELOAD: u32 = 0x0008_0000; -pub const IMAGE_SCN_ALIGN_1BYTES: u32 = 0x00100000; -pub const IMAGE_SCN_ALIGN_2BYTES: u32 = 0x00200000; -pub const IMAGE_SCN_ALIGN_4BYTES: u32 = 0x00300000; -pub const IMAGE_SCN_ALIGN_8BYTES: u32 = 0x00400000; -pub const IMAGE_SCN_ALIGN_16BYTES: u32 = 0x00500000; -pub const IMAGE_SCN_ALIGN_32BYTES: u32 = 0x00600000; -pub const IMAGE_SCN_ALIGN_64BYTES: u32 = 0x00700000; -pub const IMAGE_SCN_ALIGN_128BYTES: u32 = 0x00800000; -pub const IMAGE_SCN_ALIGN_256BYTES: u32 = 0x00900000; -pub const IMAGE_SCN_ALIGN_512BYTES: u32 = 0x00A00000; -pub const IMAGE_SCN_ALIGN_1024BYTES: u32 = 0x00B00000; -pub const IMAGE_SCN_ALIGN_2048BYTES: u32 = 0x00C00000; -pub const IMAGE_SCN_ALIGN_4096BYTES: u32 = 0x00D00000; -pub const IMAGE_SCN_ALIGN_8192BYTES: u32 = 0x00E00000; -pub const IMAGE_SCN_ALIGN_MASK: u32 = 0x00F00000; +pub const IMAGE_SCN_ALIGN_1BYTES: u32 = 0x0010_0000; +pub const IMAGE_SCN_ALIGN_2BYTES: u32 = 0x0020_0000; +pub const IMAGE_SCN_ALIGN_4BYTES: u32 = 0x0030_0000; +pub const IMAGE_SCN_ALIGN_8BYTES: u32 = 0x0040_0000; +pub const IMAGE_SCN_ALIGN_16BYTES: u32 = 0x0050_0000; +pub const IMAGE_SCN_ALIGN_32BYTES: u32 = 0x0060_0000; +pub const IMAGE_SCN_ALIGN_64BYTES: u32 = 0x0070_0000; +pub const IMAGE_SCN_ALIGN_128BYTES: u32 = 0x0080_0000; +pub const IMAGE_SCN_ALIGN_256BYTES: u32 = 0x0090_0000; +pub const IMAGE_SCN_ALIGN_512BYTES: u32 = 0x00A0_0000; +pub const IMAGE_SCN_ALIGN_1024BYTES: u32 = 0x00B0_0000; +pub const IMAGE_SCN_ALIGN_2048BYTES: u32 = 0x00C0_0000; +pub const IMAGE_SCN_ALIGN_4096BYTES: u32 = 0x00D0_0000; +pub const IMAGE_SCN_ALIGN_8192BYTES: u32 = 0x00E0_0000; +pub const IMAGE_SCN_ALIGN_MASK: u32 = 0x00F0_0000; /// The section contains extended relocations. -pub const IMAGE_SCN_LNK_NRELOC_OVFL: u32 = 0x01000000; +pub const IMAGE_SCN_LNK_NRELOC_OVFL: u32 = 0x0100_0000; /// The section can be discarded as needed. -pub const IMAGE_SCN_MEM_DISCARDABLE: u32 = 0x02000000; +pub const IMAGE_SCN_MEM_DISCARDABLE: u32 = 0x0200_0000; /// The section cannot be cached. -pub const IMAGE_SCN_MEM_NOT_CACHED: u32 = 0x04000000; +pub const IMAGE_SCN_MEM_NOT_CACHED: u32 = 0x0400_0000; /// The section is not pageable. -pub const IMAGE_SCN_MEM_NOT_PAGED: u32 = 0x08000000; +pub const IMAGE_SCN_MEM_NOT_PAGED: u32 = 0x0800_0000; /// The section can be shared in memory. -pub const IMAGE_SCN_MEM_SHARED: u32 = 0x10000000; +pub const IMAGE_SCN_MEM_SHARED: u32 = 0x1000_0000; /// The section can be executed as code. -pub const IMAGE_SCN_MEM_EXECUTE: u32 = 0x20000000; +pub const IMAGE_SCN_MEM_EXECUTE: u32 = 0x2000_0000; /// The section can be read. -pub const IMAGE_SCN_MEM_READ: u32 = 0x40000000; +pub const IMAGE_SCN_MEM_READ: u32 = 0x4000_0000; /// The section can be written to. -pub const IMAGE_SCN_MEM_WRITE: u32 = 0x80000000; +pub const IMAGE_SCN_MEM_WRITE: u32 = 0x8000_0000; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn set_name_offset() { + let mut section = SectionTable::default(); + for &(offset, name) in [ + (0usize, b"/0\0\0\0\0\0\0"), + (1, b"/1\0\0\0\0\0\0"), + (9_999_999, b"/9999999"), + (10_000_000, b"//AAmJaA"), + #[cfg(target_pointer_width = "64")] + (0xfff_fff_fff, b"////////"), + ].iter() { + section.set_name_offset(offset).unwrap(); + assert_eq!(§ion.name, name); + assert_eq!(section.name_offset().unwrap(), Some(offset)); + } + #[cfg(target_pointer_width = "64")] + assert!(section.set_name_offset(0x1_000_000_000).is_err()); + } +} diff --git a/third_party/rust/goblin/src/pe/symbol.rs b/third_party/rust/goblin/src/pe/symbol.rs new file mode 100644 index 000000000000..e44f27dc920c --- /dev/null +++ b/third_party/rust/goblin/src/pe/symbol.rs @@ -0,0 +1,513 @@ +use crate::alloc::vec::Vec; +use crate::error; +use crate::strtab; +use core::fmt::{self, Debug}; +use scroll::{ctx, IOread, IOwrite, Pread, Pwrite, SizeWith}; + +/// Size of a single symbol in the COFF Symbol Table. +pub const COFF_SYMBOL_SIZE: usize = 18; + +// Values for `Symbol::section_number`. + +/// The symbol record is not yet assigned a section. A `value` of zero +/// indicates that a reference to an external symbol is defined elsewhere. +/// A `value` of non-zero is a common symbol with a size that is specified by the `value`. +pub const IMAGE_SYM_UNDEFINED: i16 = 0; +/// The symbol has an absolute (non-relocatable) `value` and is not an address. +pub const IMAGE_SYM_ABSOLUTE: i16 = -1; +/// The symbol provides general type or debugging information but does not +/// correspond to a section. +pub const IMAGE_SYM_DEBUG: i16 = -2; + +// Base types for `Symbol::typ`. + +/// No type information or unknown base type. Microsoft tools use this setting +pub const IMAGE_SYM_TYPE_NULL: u16 = 0; +/// No valid type; used with void pointers and functions +pub const IMAGE_SYM_TYPE_VOID: u16 = 1; +/// A character (signed byte) +pub const IMAGE_SYM_TYPE_CHAR: u16 = 2; +/// A 2-byte signed integer +pub const IMAGE_SYM_TYPE_SHORT: u16 = 3; +/// A natural integer type (normally 4 bytes in Windows) +pub const IMAGE_SYM_TYPE_INT: u16 = 4; +/// A 4-byte signed integer +pub const IMAGE_SYM_TYPE_LONG: u16 = 5; +/// A 4-byte floating-point number +pub const IMAGE_SYM_TYPE_FLOAT: u16 = 6; +/// An 8-byte floating-point number +pub const IMAGE_SYM_TYPE_DOUBLE: u16 = 7; +/// A structure +pub const IMAGE_SYM_TYPE_STRUCT: u16 = 8; +/// A union +pub const IMAGE_SYM_TYPE_UNION: u16 = 9; +/// An enumerated type +pub const IMAGE_SYM_TYPE_ENUM: u16 = 10; +/// A member of enumeration (a specific value) +pub const IMAGE_SYM_TYPE_MOE: u16 = 11; +/// A byte; unsigned 1-byte integer +pub const IMAGE_SYM_TYPE_BYTE: u16 = 12; +/// A word; unsigned 2-byte integer +pub const IMAGE_SYM_TYPE_WORD: u16 = 13; +/// An unsigned integer of natural size (normally, 4 bytes) +pub const IMAGE_SYM_TYPE_UINT: u16 = 14; +/// An unsigned 4-byte integer +pub const IMAGE_SYM_TYPE_DWORD: u16 = 15; + +// Derived types for `Symbol::typ`. + +/// No derived type; the symbol is a simple scalar variable. +pub const IMAGE_SYM_DTYPE_NULL: u16 = 0; +/// The symbol is a pointer to base type. +pub const IMAGE_SYM_DTYPE_POINTER: u16 = 1; +/// The symbol is a function that returns a base type. +pub const IMAGE_SYM_DTYPE_FUNCTION: u16 = 2; +/// The symbol is an array of base type. +pub const IMAGE_SYM_DTYPE_ARRAY: u16 = 3; + +pub const IMAGE_SYM_TYPE_MASK: u16 = 0xf; +pub const IMAGE_SYM_DTYPE_SHIFT: usize = 4; + +// Values for `Symbol::storage_class`. + +/// A special symbol that represents the end of function, for debugging purposes. +pub const IMAGE_SYM_CLASS_END_OF_FUNCTION: u8 = 0xff; +/// No assigned storage class. +pub const IMAGE_SYM_CLASS_NULL: u8 = 0; +/// The automatic (stack) variable. +/// +/// The `value` field specifies the stack frame offset. +pub const IMAGE_SYM_CLASS_AUTOMATIC: u8 = 1; +/// A value that Microsoft tools use for external symbols. +/// +/// The `value` field indicates the size if the section number is +/// `IMAGE_SYM_UNDEFINED` (0). If the section number is not zero, +/// then the `value` field specifies the offset within the section. +pub const IMAGE_SYM_CLASS_EXTERNAL: u8 = 2; +/// A static symbol. +/// +/// The 'value' field specifies the offset of the symbol within the section. +/// If the `value` field is zero, then the symbol represents a section name. +pub const IMAGE_SYM_CLASS_STATIC: u8 = 3; +/// A register variable. +/// +/// The `value` field specifies the register number. +pub const IMAGE_SYM_CLASS_REGISTER: u8 = 4; +/// A symbol that is defined externally. +pub const IMAGE_SYM_CLASS_EXTERNAL_DEF: u8 = 5; +/// A code label that is defined within the module. +/// +/// The `value` field specifies the offset of the symbol within the section. +pub const IMAGE_SYM_CLASS_LABEL: u8 = 6; +/// A reference to a code label that is not defined. +pub const IMAGE_SYM_CLASS_UNDEFINED_LABEL: u8 = 7; +/// The structure member. +/// +/// The `value` field specifies the n th member. +pub const IMAGE_SYM_CLASS_MEMBER_OF_STRUCT: u8 = 8; +/// A formal argument (parameter) of a function. +/// +/// The `value` field specifies the n th argument. +pub const IMAGE_SYM_CLASS_ARGUMENT: u8 = 9; +/// The structure tag-name entry. +pub const IMAGE_SYM_CLASS_STRUCT_TAG: u8 = 10; +/// A union member. +/// +/// The `value` field specifies the n th member. +pub const IMAGE_SYM_CLASS_MEMBER_OF_UNION: u8 = 11; +/// The Union tag-name entry. +pub const IMAGE_SYM_CLASS_UNION_TAG: u8 = 12; +/// A Typedef entry. +pub const IMAGE_SYM_CLASS_TYPE_DEFINITION: u8 = 13; +/// A static data declaration. +pub const IMAGE_SYM_CLASS_UNDEFINED_STATIC: u8 = 14; +/// An enumerated type tagname entry. +pub const IMAGE_SYM_CLASS_ENUM_TAG: u8 = 15; +/// A member of an enumeration. +/// +/// The `value` field specifies the n th member. +pub const IMAGE_SYM_CLASS_MEMBER_OF_ENUM: u8 = 16; +/// A register parameter. +pub const IMAGE_SYM_CLASS_REGISTER_PARAM: u8 = 17; +/// A bit-field reference. +/// +/// The `value` field specifies the n th bit in the bit field. +pub const IMAGE_SYM_CLASS_BIT_FIELD: u8 = 18; +/// A .bb (beginning of block) or .eb (end of block) record. +/// +/// The `value` field is the relocatable address of the code location. +pub const IMAGE_SYM_CLASS_BLOCK: u8 = 100; +/// A value that Microsoft tools use for symbol records that define the extent of a function. +/// +/// Records may be begin function (.bf ), end function ( .ef ), and lines in function ( .lf ). +/// For .lf records, the `value` field gives the number of source lines in the function. +/// For .ef records, the `value` field gives the size of the function code. +pub const IMAGE_SYM_CLASS_FUNCTION: u8 = 101; +/// An end-of-structure entry. +pub const IMAGE_SYM_CLASS_END_OF_STRUCT: u8 = 102; +/// The source-file symbol record. +/// +/// The symbol is followed by auxiliary records that name the file. +pub const IMAGE_SYM_CLASS_FILE: u8 = 103; +/// A definition of a section (Microsoft tools use STATIC storage class instead). +pub const IMAGE_SYM_CLASS_SECTION: u8 = 104; +/// A weak external. +pub const IMAGE_SYM_CLASS_WEAK_EXTERNAL: u8 = 105; +/// A CLR token symbol. +/// +/// The name is an ASCII string that consists of the hexadecimal value of the token. +pub const IMAGE_SYM_CLASS_CLR_TOKEN: u8 = 107; + +/// A COFF symbol. +/// +/// Unwind information for this function can be loaded with [`ExceptionData::get_unwind_info`]. +/// +/// [`ExceptionData::get_unwind_info`]: struct.ExceptionData.html#method.get_unwind_info +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct Symbol { + /// The name of the symbol. + /// + /// An array of 8 bytes is used if the name is not more than 8 bytes long. + /// This array is padded with nulls on the right if the name is less than 8 bytes long. + /// + /// For longer names, the first 4 bytes are all zeros, and the second 4 bytes + /// are an offset into the string table. + pub name: [u8; 8], + /// The value that is associated with the symbol. + /// + /// The interpretation of this field depends on `section_number` and + /// `storage_class`. A typical meaning is the relocatable address. + pub value: u32, + /// A one-based index into the section table. Zero and negative values have special meanings. + pub section_number: i16, + /// A number that represents type. + /// + /// Microsoft tools set this field to 0x20 (function) or 0x0 (not a function). + pub typ: u16, + /// An enumerated value that represents storage class. + pub storage_class: u8, + /// The number of auxiliary symbol table entries that follow this record. + /// + /// Each auxiliary record is the same size as a standard symbol-table record (18 bytes), + /// but rather than define a new symbol, the auxiliary record gives additional information + /// on the last symbol defined. + pub number_of_aux_symbols: u8, +} + +impl Symbol { + /// Parse the symbol at the given offset. + /// + /// If the symbol has an inline name, then also returns a reference to the name's + /// location in `bytes`. + pub fn parse<'a>(bytes: &'a [u8], offset: usize) -> error::Result<(Option<&'a str>, Symbol)> { + let symbol = bytes.pread::(offset)?; + let name = if symbol.name[0] != 0 { + bytes + .pread_with(offset, ctx::StrCtx::DelimiterUntil(0, 8)) + .ok() + } else { + None + }; + Ok((name, symbol)) + } + + /// Returns the symbol name. + /// + /// This may be a reference to an inline name in the symbol, or to + /// a strtab entry. + pub fn name<'a>(&'a self, strtab: &'a strtab::Strtab) -> error::Result<&'a str> { + if let Some(offset) = self.name_offset() { + strtab.get(offset as usize).unwrap_or_else(|| { + Err(error::Error::Malformed(format!( + "Invalid Symbol name offset {:#x}", + offset + ))) + }) + } else { + Ok(self.name.pread(0)?) + } + } + + /// Return the strtab offset of the symbol name. + /// + /// Returns `None` if the name is inline. + pub fn name_offset(&self) -> Option { + if self.name[0] == 0 { + self.name.pread_with(4, scroll::LE).ok() + } else { + None + } + } + + /// Set the strtab offset of the symbol name. + pub fn set_name_offset(&mut self, offset: u32) { + self.name[..4].copy_from_slice(&[0; 4]); + self.name.pwrite_with(offset, 4, scroll::LE).unwrap(); + } + + /// Return the base type of the symbol. + /// + /// This type uses the `IMAGE_SYM_TYPE_*` definitions. + pub fn base_type(&self) -> u16 { + self.typ & IMAGE_SYM_TYPE_MASK + } + + /// Return the derived type of the symbol. + /// + /// This type uses the `IMAGE_SYM_DTYPE_*` definitions. + pub fn derived_type(&self) -> u16 { + self.typ >> IMAGE_SYM_DTYPE_SHIFT + } + + /// Return true for function definitions. + /// + /// These symbols use `AuxFunctionDefinition` for auxiliary symbol records. + pub fn is_function_definition(&self) -> bool { + self.storage_class == IMAGE_SYM_CLASS_EXTERNAL + && self.derived_type() == IMAGE_SYM_DTYPE_FUNCTION + && self.section_number > 0 + } + + /// Return true for weak external symbols. + /// + /// These symbols use `AuxWeakExternal` for auxiliary symbol records. + pub fn is_weak_external(&self) -> bool { + self.storage_class == IMAGE_SYM_CLASS_WEAK_EXTERNAL + } + + /// Return true for file symbol records. + /// + /// The auxiliary records contain the name of the source code file. + pub fn is_file(&self) -> bool { + self.storage_class == IMAGE_SYM_CLASS_FILE + } + + /// Return true for section definitions. + /// + /// These symbols use `AuxSectionDefinition` for auxiliary symbol records. + pub fn is_section_definition(&self) -> bool { + self.storage_class == IMAGE_SYM_CLASS_STATIC && self.number_of_aux_symbols > 0 + } +} + +/// Auxiliary symbol record for function definitions. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct AuxFunctionDefinition { + /// The symbol-table index of the corresponding `.bf` (begin function) symbol record. + pub tag_index: u32, + /// The size of the executable code for the function itself. + /// + /// If the function is in its own section, the `size_of_raw_data` in the section header + /// is greater or equal to this field, depending on alignment considerations. + pub total_size: u32, + /// The file offset of the first COFF line-number entry for the function, + /// or zero if none exists. + pub pointer_to_line_number: u32, + /// The symbol-table index of the record for the next function. + /// + /// If the function is the last in the symbol table, this field is set to zero. + pub pointer_to_next_function: u32, + /// Unused padding. + pub unused: [u8; 2], +} + +/// Auxiliary symbol record for symbols with storage class `IMAGE_SYM_CLASS_FUNCTION`. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct AuxBeginAndEndFunction { + /// Unused padding. + pub unused1: [u8; 4], + /// The actual ordinal line number within the source file, corresponding + /// to the `.bf` or `.ef` record. + pub line_number: u16, + /// Unused padding. + pub unused2: [u8; 6], + /// The symbol-table index of the next `.bf` symbol record. + /// + /// If the function is the last in the symbol table, this field is set to zero. + /// It is not used for `.ef` records. + pub pointer_to_next_function: u32, + /// Unused padding. + pub unused3: [u8; 2], +} + +// Values for the `characteristics` field of `AuxWeakExternal`. + +/// Indicates that no library search for the symbol should be performed. +pub const IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY: u32 = 1; +/// Indicates that a library search for the symbol should be performed. +pub const IMAGE_WEAK_EXTERN_SEARCH_LIBRARY: u32 = 2; +/// Indicates that the symbol is an alias for the symbol given by the `tag_index` field. +pub const IMAGE_WEAK_EXTERN_SEARCH_ALIAS: u32 = 3; + +/// Auxiliary symbol record for weak external symbols. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct AuxWeakExternal { + /// The symbol-table index of the symbol to be linked if an external definition is not found. + pub tag_index: u32, + /// Flags that control how the symbol should be linked. + pub characteristics: u32, + /// Unused padding. + pub unused: [u8; 10], +} + +// Values for the `selection` field of `AuxSectionDefinition`. + +/// If this symbol is already defined, the linker issues a "multiply defined symbol" error. +pub const IMAGE_COMDAT_SELECT_NODUPLICATES: u8 = 1; +/// Any section that defines the same COMDAT symbol can be linked; the rest are removed. +pub const IMAGE_COMDAT_SELECT_ANY: u8 = 2; +/// The linker chooses an arbitrary section among the definitions for this symbol. +/// +/// If all definitions are not the same size, a "multiply defined symbol" error is issued. +pub const IMAGE_COMDAT_SELECT_SAME_SIZE: u8 = 3; +/// The linker chooses an arbitrary section among the definitions for this symbol. +/// +/// If all definitions do not match exactly, a "multiply defined symbol" error is issued. +pub const IMAGE_COMDAT_SELECT_EXACT_MATCH: u8 = 4; +/// The section is linked if a certain other COMDAT section is linked. +/// +/// This other section is indicated by the `number` field of the auxiliary symbol record +/// for the section definition. This setting is useful for definitions that have components +/// in multiple sections (for example, code in one and data in another), but where all must +/// be linked or discarded as a set. The other section with which this section is associated +/// must be a COMDAT section; it cannot be another associative COMDAT section (that is, the +/// other section cannot have `IMAGE_COMDAT_SELECT_ASSOCIATIVE` set). +pub const IMAGE_COMDAT_SELECT_ASSOCIATIVE: u8 = 5; +/// The linker chooses the largest definition from among all of the definitions for this symbol. +/// +/// If multiple definitions have this size, the choice between them is arbitrary. +pub const IMAGE_COMDAT_SELECT_LARGEST: u8 = 6; + +/// Auxiliary symbol record for section definitions. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct AuxSectionDefinition { + /// The size of section data; the same as `size_of_raw_data` in the section header. + pub length: u32, + /// The number of relocation entries for the section. + pub number_of_relocations: u16, + /// The number of line-number entries for the section. + pub number_of_line_numbers: u16, + /// The checksum for communal data. + /// + /// It is applicable if the `IMAGE_SCN_LNK_COMDAT` flag is set in the section header. + pub checksum: u32, + /// One-based index into the section table for the associated section. + /// + /// This is used when the `selection` field is `IMAGE_COMDAT_SELECT_ASSOCIATIVE`. + pub number: u16, + /// The COMDAT selection number. + /// + /// This is applicable if the section is a COMDAT section. + pub selection: u8, + /// Unused padding. + pub unused: [u8; 3], +} + +/// A COFF symbol table. +pub struct SymbolTable<'a> { + symbols: &'a [u8], +} + +impl<'a> SymbolTable<'a> { + /// Parse a COFF symbol table at the given offset. + /// + /// The offset and number of symbols should be from the COFF header. + pub fn parse(bytes: &'a [u8], offset: usize, number: usize) -> error::Result> { + let symbols = bytes.pread_with(offset, Self::size(number))?; + Ok(SymbolTable { symbols }) + } + + /// Get the size in bytes of the symbol table. + pub fn size(number: usize) -> usize { + number * COFF_SYMBOL_SIZE + } + + /// Get the symbol at the given index. + /// + /// If the symbol has an inline name, then also returns a reference to the name's + /// location in `bytes`. + pub fn get(&self, index: usize) -> Option<(Option<&'a str>, Symbol)> { + let offset = index * COFF_SYMBOL_SIZE; + Symbol::parse(self.symbols, offset).ok() + } + + /// Get the auxiliary symbol record for a function definition. + pub fn aux_function_definition(&self, index: usize) -> Option { + let offset = index * COFF_SYMBOL_SIZE; + self.symbols.pread(offset).ok() + } + + /// Get the auxiliary symbol record for a `.bf` or `.ef` symbol record. + pub fn aux_begin_and_end_function(&self, index: usize) -> Option { + let offset = index * COFF_SYMBOL_SIZE; + self.symbols.pread(offset).ok() + } + + /// Get the auxiliary symbol record for a weak external. + pub fn aux_weak_external(&self, index: usize) -> Option { + let offset = index * COFF_SYMBOL_SIZE; + self.symbols.pread(offset).ok() + } + + /// Get the file name from the auxiliary symbol record for a file symbol record. + pub fn aux_file(&self, index: usize, number: usize) -> Option<&'a str> { + let offset = index * COFF_SYMBOL_SIZE; + let length = number * COFF_SYMBOL_SIZE; + self.symbols + .pread_with(offset, ctx::StrCtx::DelimiterUntil(0, length)) + .ok() + } + + /// Get the auxiliary symbol record for a section definition. + pub fn aux_section_definition(&self, index: usize) -> Option { + let offset = index * COFF_SYMBOL_SIZE; + self.symbols.pread(offset).ok() + } + + /// Return an iterator for the COFF symbols. + /// + /// This iterator skips over auxiliary symbol records. + pub fn iter(&self) -> SymbolIterator<'a> { + SymbolIterator { + index: 0, + symbols: self.symbols, + } + } +} + +impl<'a> Debug for SymbolTable<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("SymbolTable") + .field("symbols", &self.iter().collect::>()) + .finish() + } +} + +/// An iterator for COFF symbols. +/// +/// This iterator skips over auxiliary symbol records. +#[derive(Default)] +pub struct SymbolIterator<'a> { + index: usize, + symbols: &'a [u8], +} + +impl<'a> Iterator for SymbolIterator<'a> { + type Item = (usize, Option<&'a str>, Symbol); + fn next(&mut self) -> Option { + let offset = self.index * COFF_SYMBOL_SIZE; + if offset >= self.symbols.len() { + None + } else { + let index = self.index; + let (name, symbol) = Symbol::parse(self.symbols, offset).ok()?; + self.index += 1 + symbol.number_of_aux_symbols as usize; + Some((index, name, symbol)) + } + } +} diff --git a/third_party/rust/goblin/src/pe/utils.rs b/third_party/rust/goblin/src/pe/utils.rs index 78ebba41a516..953d361ec9bd 100644 --- a/third_party/rust/goblin/src/pe/utils.rs +++ b/third_party/rust/goblin/src/pe/utils.rs @@ -1,25 +1,60 @@ -use scroll::{Pread}; -use alloc::string::ToString; -use error; +use scroll::Pread; +use crate::alloc::string::ToString; +use crate::error; use super::section_table; +use core::cmp; +use crate::pe::data_directories::DataDirectory; + +use log::debug; + pub fn is_in_range (rva: usize, r1: usize, r2: usize) -> bool { r1 <= rva && rva < r2 } +// reference: Peter Ferrie. Reliable algorithm to extract overlay of a PE. https://bit.ly/2vBX2bR +#[inline] +fn aligned_pointer_to_raw_data(pointer_to_raw_data: usize) -> usize { + const PHYSICAL_ALIGN: usize = 0x1ff; + pointer_to_raw_data & !PHYSICAL_ALIGN +} + +#[inline] +fn section_read_size(section: §ion_table::SectionTable, file_alignment: u32) -> usize { + fn round_size(size: usize) -> usize { + const PAGE_MASK: usize = 0xfff; + (size + PAGE_MASK) & !PAGE_MASK + } + + let file_alignment = file_alignment as usize; + let size_of_raw_data = section.size_of_raw_data as usize; + let virtual_size = section.virtual_size as usize; + let read_size = { + let read_size = (section.pointer_to_raw_data as usize + size_of_raw_data + file_alignment - 1) & !(file_alignment - 1); + cmp::min(read_size, round_size(size_of_raw_data)) + }; + + if virtual_size == 0 { + read_size + } else { + cmp::min(read_size, round_size(virtual_size)) + } +} + fn rva2offset (rva: usize, section: §ion_table::SectionTable) -> usize { - (rva - section.virtual_address as usize) + section.pointer_to_raw_data as usize + (rva - section.virtual_address as usize) + aligned_pointer_to_raw_data(section.pointer_to_raw_data as usize) } -fn is_in_section (rva: usize, section: §ion_table::SectionTable) -> bool { - section.virtual_address as usize <= rva && rva < (section.virtual_address + section.virtual_size) as usize +fn is_in_section (rva: usize, section: §ion_table::SectionTable, file_alignment: u32) -> bool { + let section_rva = section.virtual_address as usize; + is_in_range(rva, section_rva, section_rva + section_read_size(section, file_alignment)) } -pub fn find_offset (rva: usize, sections: &[section_table::SectionTable]) -> Option { +pub fn find_offset (rva: usize, sections: &[section_table::SectionTable], file_alignment: u32) -> Option { for (i, section) in sections.iter().enumerate() { debug!("Checking {} for {:#x} ∈ {:#x}..{:#x}", section.name().unwrap_or(""), rva, section.virtual_address, section.virtual_address + section.virtual_size); - if is_in_section(rva, §ion) { + if is_in_section(rva, §ion, file_alignment) { let offset = rva2offset(rva, §ion); debug!("Found in section {}({}), remapped into offset {:#x}", section.name().unwrap_or(""), i, offset); return Some(offset) @@ -28,12 +63,12 @@ pub fn find_offset (rva: usize, sections: &[section_table::SectionTable]) -> Opt None } -pub fn find_offset_or (rva: usize, sections: &[section_table::SectionTable], msg: &str) -> error::Result { - find_offset(rva, sections).ok_or(error::Error::Malformed(msg.to_string())) +pub fn find_offset_or (rva: usize, sections: &[section_table::SectionTable], file_alignment: u32, msg: &str) -> error::Result { + find_offset(rva, sections, file_alignment).ok_or_else(|| error::Error::Malformed(msg.to_string())) } -pub fn try_name<'a>(bytes: &'a [u8], rva: usize, sections: &[section_table::SectionTable]) -> error::Result<&'a str> { - match find_offset(rva, sections) { +pub fn try_name<'a>(bytes: &'a [u8], rva: usize, sections: &[section_table::SectionTable], file_alignment: u32) -> error::Result<&'a str> { + match find_offset(rva, sections, file_alignment) { Some(offset) => { Ok(bytes.pread::<&str>(offset)?) }, @@ -42,3 +77,12 @@ pub fn try_name<'a>(bytes: &'a [u8], rva: usize, sections: &[section_table::Sect } } } + +pub fn get_data<'a, T>(bytes: &'a [u8], sections: &[section_table::SectionTable], directory: DataDirectory, file_alignment: u32) -> error::Result + where T: scroll::ctx::TryFromCtx<'a, scroll::Endian, Size = usize, Error = scroll::Error> { + let rva = directory.virtual_address as usize; + let offset = find_offset(rva, sections, file_alignment) + .ok_or_else(||error::Error::Malformed(directory.virtual_address.to_string()))?; + let result: T = bytes.pread_with(offset, scroll::LE)?; + Ok(result) +} diff --git a/third_party/rust/goblin/src/strtab.rs b/third_party/rust/goblin/src/strtab.rs index de19eb383bcd..a40d6c9ae57a 100644 --- a/third_party/rust/goblin/src/strtab.rs +++ b/third_party/rust/goblin/src/strtab.rs @@ -5,10 +5,10 @@ use core::ops::Index; use core::slice; use core::str; use core::fmt; -use scroll::{self, ctx, Pread}; +use scroll::{ctx, Pread}; if_alloc! { - use error; - use alloc::vec::Vec; + use crate::error; + use crate::alloc::vec::Vec; } /// A common string table format which is indexed by byte offsets (and not @@ -27,7 +27,7 @@ fn get_str(offset: usize, bytes: &[u8], delim: ctx::StrCtx) -> scroll::Result<&s impl<'a> Strtab<'a> { /// Construct a new strtab with `bytes` as the backing string table, using `delim` as the delimiter between entries pub fn new (bytes: &'a [u8], delim: u8) -> Self { - Strtab { delim: ctx::StrCtx::Delimiter(delim), bytes: bytes } + Strtab { delim: ctx::StrCtx::Delimiter(delim), bytes } } /// Construct a strtab from a `ptr`, and a `size`, using `delim` as the delimiter pub unsafe fn from_raw(ptr: *const u8, size: usize, delim: u8) -> Strtab<'a> { @@ -44,7 +44,7 @@ impl<'a> Strtab<'a> { } #[cfg(feature = "alloc")] /// Converts the string table to a vector, with the original `delim` used to separate the strings - pub fn to_vec(self) -> error::Result> { + pub fn to_vec(&self) -> error::Result> { let len = self.bytes.len(); let mut strings = Vec::with_capacity(len); let mut i = 0; @@ -63,7 +63,7 @@ impl<'a> Strtab<'a> { if offset >= self.bytes.len() { None } else { - Some(get_str(offset, self.bytes, self.delim).map_err(|e| e.into())) + Some(get_str(offset, self.bytes, self.delim).map_err(core::convert::Into::into)) } } /// Gets a str reference from the backing bytes starting at byte `offset`. @@ -79,7 +79,10 @@ impl<'a> Strtab<'a> { impl<'a> fmt::Debug for Strtab<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "delim: {:?} {:?}", self.delim, str::from_utf8(self.bytes)) + f.debug_struct("Strtab") + .field("delim", &self.delim) + .field("bytes", &str::from_utf8(self.bytes)) + .finish() } } @@ -132,7 +135,7 @@ fn to_vec_final_null() { #[test] fn to_vec_newline_delim() { let bytes = b"\nprintf\nmemmove\nbusta\n"; - let strtab = unsafe { Strtab::from_raw(bytes.as_ptr(), bytes.len(), '\n' as u8) }; + let strtab = unsafe { Strtab::from_raw(bytes.as_ptr(), bytes.len(), b'\n') }; let vec = strtab.to_vec().unwrap(); assert_eq!(vec.len(), 4); assert_eq!(vec, vec!["", "printf", "memmove", "busta"]); diff --git a/third_party/rust/goblin/tests/archive.rs b/third_party/rust/goblin/tests/archive.rs index 144d0ae9e665..455ea0bc16e4 100644 --- a/third_party/rust/goblin/tests/archive.rs +++ b/third_party/rust/goblin/tests/archive.rs @@ -1,5 +1,3 @@ -extern crate scroll; -extern crate goblin; use goblin::archive::*; use scroll::Pread; use std::path::Path; @@ -18,7 +16,7 @@ fn parse_file_header() { 0x20, 0x20, 0x60, 0x0a]; let buffer = &file_header[..]; match buffer.pread::(0) { - Err(_) => assert!(false), + Err(e) => panic!("could not read the buffer: {:?}", e), Ok(file_header2) => { let file_header = MemberHeader { identifier: [0x2f,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,], @@ -36,7 +34,7 @@ fn parse_file_header() { #[test] fn parse_archive() { let crt1a: Vec = include!("../etc/crt1a.rs"); - const START: &'static str = "_start"; + const START: &str = "_start"; match Archive::parse(&crt1a) { Ok(archive) => { assert_eq!(archive.member_of_symbol(START), Some("crt1.o")); @@ -44,11 +42,10 @@ fn parse_archive() { assert_eq!(member.offset, 194); assert_eq!(member.size(), 1928) } else { - println!("could not get crt1.o"); - assert!(false) + panic!("could not get crt1.o"); } }, - Err(err) => {println!("could not parse archive: {:?}", err); assert!(false)} + Err(err) => panic!("could not parse archive: {:?}", err), }; } @@ -58,7 +55,7 @@ fn parse_self() { use std::io::Read; let mut path = Path::new("target").join("debug").join("libgoblin.rlib"); // https://github.com/m4b/goblin/issues/63 - if !fs::metadata(&path).is_ok() { + if fs::metadata(&path).is_err() { path = Path::new("target").join("release").join("libgoblin.rlib"); } let buffer = { @@ -71,36 +68,33 @@ fn parse_self() { let archive = Archive::parse(&buffer).expect("parse rlib"); // check that the archive has a useful symbol table by counting the total number of symbols - let symbol_count = archive.summarize().into_iter() + let symbol_count: usize = archive.summarize().into_iter() .map(|(_member_name, _member_index, ref symbols)| symbols.len()) - .fold(0, |sum,symbol_count| sum + symbol_count); + .sum(); assert!(symbol_count > 500); let goblin_object_name = archive.members() .into_iter() - .filter(|member| { + .find(|member| { println!("member: {:?}", member); member.ends_with("goblin-archive.o") // < 1.18 || (member.starts_with("goblin") && member.ends_with("0.o")) // >= 1.18 && < 1.22 || (member.starts_with("goblin") && member.ends_with("rust-cgu.o")) // = 1.22 || (member.starts_with("goblin") && member.ends_with("rcgu.o")) // >= nightly 1.23 }) - .next() .expect("goblin-.0.o not found"); let bytes = archive.extract(goblin_object_name, &buffer).expect("extract goblin object"); match goblin::Object::parse(&bytes).expect("parse object") { goblin::Object::Elf(elf) => { assert!(elf.entry == 0); - assert!(elf.bias == 0); } goblin::Object::Mach(goblin::mach::Mach::Binary(macho)) => { assert_eq!(macho.header.filetype, goblin::mach::header::MH_OBJECT); assert_eq!(macho.entry, 0); } other => { - println!("unexpected Object::parse result: {:?}", other); - assert!(false); + panic!("unexpected Object::parse result: {:?}", other); } } } diff --git a/third_party/rust/goblin/tests/macho.rs b/third_party/rust/goblin/tests/macho.rs index dc62f62a7091..2596573a1675 100644 --- a/third_party/rust/goblin/tests/macho.rs +++ b/third_party/rust/goblin/tests/macho.rs @@ -1,5 +1,3 @@ -extern crate goblin; - use goblin::mach::*; #[test] @@ -17,8 +15,7 @@ fn parse_fat_header() { assert_eq!(arches.get(2).is_none(), true); }, _ => { - println!("got mach binary from fat"); - assert!(false); + panic!("got mach binary from fat"); } } } @@ -59,11 +56,10 @@ fn parse_sections() { Mach::Binary(binary) => { println!("binary: {:?}", binary); let section = macho_get_section(&binary, "__text").unwrap(); - assert!(section.len() > 0); + assert!(!section.is_empty()); }, _ => { - println!("got mach fat from regular binary"); - assert!(false); + panic!("got mach fat from regular binary"); } } } @@ -79,24 +75,23 @@ fn iter_symbols() { for symbol in symbols.iter() { println!("symbol: {:?}", symbol); let (name, _symbol) = symbol.unwrap(); - assert!(name.len() > 0); + assert!(!name.is_empty()); } let symbols = symbols.iter().collect::>(); assert_eq!(symbols.len(), 4); }, _ => { - println!("got mach fat from regular binary"); - assert!(false); + panic!("got mach fat from regular binary"); } } } #[test] fn relocations() { - use relocation::*; + use crate::relocation::*; let reloc = RelocationInfo { r_address: 0, - r_info: 0xe000009 + r_info: 0xe00_0009 }; println!("reloc: {:?}", reloc); assert_eq!(reloc.r_length(), 3); @@ -105,7 +100,7 @@ fn relocations() { assert_eq!(reloc.is_extern(), true); let reloc = RelocationInfo { r_address: 0, - r_info: 0x15000002 + r_info: 0x1500_0002 }; println!("reloc: {:?}", reloc); assert_eq!(reloc.r_length(), 2); @@ -114,7 +109,7 @@ fn relocations() { assert_eq!(reloc.is_extern(), false); let reloc = RelocationInfo { r_address: 0, - r_info: 0x2d000002 + r_info: 0x2d00_0002 }; println!("reloc: {:?}", reloc); assert_eq!(reloc.r_length(), 2); diff --git a/third_party/rust/object/.cargo-checksum.json b/third_party/rust/object/.cargo-checksum.json index fd9851cc5eec..9ab70479c84b 100644 --- a/third_party/rust/object/.cargo-checksum.json +++ b/third_party/rust/object/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"5dd19cb665fa472a0d87e445804e9a4e8365792bbccd75a07e98ec53d53e336a","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"0b74dfa0bcee5c420c6b7f67b4b2658f9ab8388c97b8e733975f2cecbdd668a6","README.md":"5c498b93ff10c038784c5b62c346858c0d53d9d6d1591841c5b6724b0abc9415","examples/nm.rs":"8d5384228b373638aa22160213c077c59d59c407dc10e309872dcba78688d442","examples/objdump.rs":"52edabd474ddcc85c51ebb2ff1e8b3aed4bc1bfacf32ab5d70cb7d0a1409e0a8","src/elf.rs":"60c35dfbb17960676d1541b869333baa3e8b9512e54545a4b77fc405800c480f","src/lib.rs":"e18cba29299a43e63c71607814bdfd5cde77a64a6128f1981843dcc2fc46f05c","src/macho.rs":"7521a1c9f52a6ef9cfe325413fd181101645a6c1296b69523c6b56cdcdf089d0","src/pe.rs":"63a5a7d6410ac8f5f07fdca1a50bcb3e5141288e7e837cfbd4c555b671d2314f","src/traits.rs":"1d4590ba544e86ebfd4957a07941eac5097ae66384603677f23be37982a1cc09","src/wasm.rs":"03d5a74c3673bdb410934d7837e528debbcd3875b926d26d6a1fc849384f0c79"},"package":"6cca6ad89d0801138cb4ef606908ae12d83edc4c790ef5178fc7b4c72d959e90"} \ No newline at end of file +{"files":{"Cargo.toml":"3dd7528275c4d0eef0dd1b918dc6247c7e996383657355b43362f826f59b31fe","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"0b74dfa0bcee5c420c6b7f67b4b2658f9ab8388c97b8e733975f2cecbdd668a6","README.md":"5c498b93ff10c038784c5b62c346858c0d53d9d6d1591841c5b6724b0abc9415","examples/nm.rs":"9885cb85700512d63e537b4b60bd2c840aa076721eae2059ba6ae3651be1282e","examples/objcopy.rs":"a05e1b87318be30b6dd67c061c59d77b97efde2af9d5191c9c97978c0410a5eb","examples/objdump.rs":"07a23a2f74b7e46d4cdcf4dab23357a39410b4c4db43179c17e059011e40d45c","src/common.rs":"143f42a0e085e82a022b85680d42322ac912eefc4ab2cb2bee687368fa8615a5","src/lib.rs":"7e559b0af48faca419086a743e3f99794e10a91e8619f8c6e26f113d1935fe14","src/read/any.rs":"12be08836fb2f66026b34434b47cfe275f82cf31b05039ef0545fc324a3b9bce","src/read/coff.rs":"f3a16d71ec8c5692f5435bf51a3ecda49dc727d5d93f5cdef67e7853e31e6dfa","src/read/elf.rs":"68939fc291b2f2c0b6d3d112fd7edf5eaed8b5987d6fda35a1a843843511d325","src/read/macho.rs":"ee575a49c194fdaa9132e1230266269dc4cb497b9a8f1fed635173bba492ead2","src/read/mod.rs":"efdb99a566a971bca815e1d1dd85b9e9800fbe4e3572cf54a7b0ff54111469c2","src/read/pe.rs":"423527bb5fb5b234057d51925f6ac3ea05603618c1d8c6165de2f9c819978d02","src/read/traits.rs":"c73dd0ca832fc74a9addb414ab5ffe430e6c076a0bd934b31e6608e04c61dc5e","src/read/wasm.rs":"5f6e1e24d53429ac9d80f87e7784183a4608d08b3f465df629c86c68f1af56d4","src/write/coff.rs":"9c9ebc226cb585a61e3c96085099b69de0b2b75877093f644b3caacf03b6d03d","src/write/elf.rs":"d6e7bb6db9b29de1c105dfa75c7e561c85e42a05c75c70cf7baffe25d3009d06","src/write/macho.rs":"1ca4e4d75e45badc4bf5b5dfc8a663d1060d85e6c6a94236ffe9db3c09531c5e","src/write/mod.rs":"248ccbc34aa0cdd84e3c413913f05fe1478a4837ad41e3448414289bb73b2671","src/write/string.rs":"a0640f9e0626ca4618a2ac88e17e0d14b053f880d60635ea932ca78b50a114f5","src/write/util.rs":"9629903d556036aa7d6031cffce1fd38c033453a28c0a30eb34fc77aded4a11d","tests/round_trip.rs":"a28b57931275c31b704aed5350da92e43abf4c09c5fb94360c9ab5db6a7c4a78"},"package":"d89ec45bc6b810c6ee998e22953fbf387a40fcbf5014dcbb9e5ba9a09a81ee15"} \ No newline at end of file diff --git a/third_party/rust/object/Cargo.toml b/third_party/rust/object/Cargo.toml index ff8c77002ecc..de7baee0ca8b 100644 --- a/third_party/rust/object/Cargo.toml +++ b/third_party/rust/object/Cargo.toml @@ -3,7 +3,7 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g. crates.io) dependencies +# to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're @@ -11,39 +11,57 @@ # will likely look very different (and much more reasonable) [package] +edition = "2018" name = "object" -version = "0.10.0" +version = "0.13.0" authors = ["Nick Fitzgerald ", "Philip Craig "] exclude = ["/.coveralls.yml", "/.travis.yml"] description = "A unified interface for parsing object file formats." keywords = ["object", "loader", "elf", "mach-o", "pe"] license = "Apache-2.0/MIT" repository = "https://github.com/gimli-rs/object" + +[[example]] +name = "objcopy" +required-features = ["read", "write"] +[dependencies.crc32fast] +version = "1" +optional = true + [dependencies.flate2] version = "1" optional = true [dependencies.goblin] -version = "0.0.17" +version = "0.0.24" features = ["endian_fd", "elf32", "elf64", "mach32", "mach64", "pe32", "pe64", "archive"] default-features = false +[dependencies.indexmap] +version = "1" +optional = true + [dependencies.parity-wasm] -version = "0.31.0" +version = "0.40.0" optional = true [dependencies.scroll] version = "0.9" default-features = false +[dependencies.target-lexicon] +version = "0.4" + [dependencies.uuid] -version = "0.6" +version = "0.7" default-features = false [dev-dependencies.memmap] -version = "0.6" +version = "0.7" [features] compression = ["flate2"] -default = ["std", "compression", "wasm"] +default = ["read", "std", "compression", "wasm"] +read = [] std = ["goblin/std"] wasm = ["std", "parity-wasm"] +write = ["crc32fast", "indexmap", "std"] diff --git a/third_party/rust/object/examples/nm.rs b/third_party/rust/object/examples/nm.rs index ede511665fd1..eeece1718bcd 100644 --- a/third_party/rust/object/examples/nm.rs +++ b/third_party/rust/object/examples/nm.rs @@ -1,10 +1,7 @@ -extern crate memmap; -extern crate object; - +use object::{Object, ObjectSection, SectionIndex, SectionKind, Symbol, SymbolKind}; +use std::collections::HashMap; use std::{env, fs, process}; -use object::{Object, SectionKind, Symbol, SymbolKind}; - fn main() { let arg_len = env::args().len(); if arg_len <= 1 { @@ -40,53 +37,47 @@ fn main() { } }; + let section_kinds = file.sections().map(|s| (s.index(), s.kind())).collect(); + println!("Debugging symbols:"); - for symbol in file.symbols() { - print_symbol(&symbol); + for (_, symbol) in file.symbols() { + print_symbol(&symbol, §ion_kinds); } println!(); println!("Dynamic symbols:"); - for symbol in file.dynamic_symbols() { - print_symbol(&symbol); + for (_, symbol) in file.dynamic_symbols() { + print_symbol(&symbol, §ion_kinds); } } } -fn print_symbol(symbol: &Symbol) { - match symbol.kind() { - SymbolKind::Section | SymbolKind::File => return, - _ => {} +fn print_symbol(symbol: &Symbol<'_>, section_kinds: &HashMap) { + if let SymbolKind::Section | SymbolKind::File = symbol.kind() { + return; } - let kind = match symbol.section_kind() { - Some(SectionKind::Unknown) => '?', - Some(SectionKind::Text) => if symbol.is_global() { - 'T' - } else { - 't' - }, - Some(SectionKind::Data) => if symbol.is_global() { - 'D' - } else { - 'd' - }, - Some(SectionKind::ReadOnlyData) => if symbol.is_global() { - 'R' - } else { - 'r' - }, - Some(SectionKind::UninitializedData) => if symbol.is_global() { - 'B' - } else { - 'b' - }, - Some(SectionKind::Other) => if symbol.is_global() { - 'S' - } else { - 's' - }, + + let mut kind = match symbol + .section_index() + .and_then(|index| section_kinds.get(&index)) + { + Some(SectionKind::Unknown) + | Some(SectionKind::Other) + | Some(SectionKind::OtherString) + | Some(SectionKind::Debug) + | Some(SectionKind::Linker) + | Some(SectionKind::Metadata) => '?', + Some(SectionKind::Text) => 't', + Some(SectionKind::Data) | Some(SectionKind::Tls) | Some(SectionKind::TlsVariables) => 'd', + Some(SectionKind::ReadOnlyData) | Some(SectionKind::ReadOnlyString) => 'r', + Some(SectionKind::UninitializedData) | Some(SectionKind::UninitializedTls) => 'b', None => 'U', }; + + if symbol.is_global() { + kind = kind.to_ascii_uppercase(); + } + if symbol.is_undefined() { print!("{:16} ", ""); } else { diff --git a/third_party/rust/object/examples/objcopy.rs b/third_party/rust/object/examples/objcopy.rs new file mode 100644 index 000000000000..66ab3be600fd --- /dev/null +++ b/third_party/rust/object/examples/objcopy.rs @@ -0,0 +1,116 @@ +use std::collections::HashMap; +use std::{env, fs, process}; + +use object::{write, Object, ObjectSection, RelocationTarget, SectionKind, SymbolKind}; + +fn main() { + let mut args = env::args(); + if args.len() != 3 { + eprintln!("Usage: {} ", args.next().unwrap()); + process::exit(1); + } + + args.next(); + let in_file_path = args.next().unwrap(); + let out_file_path = args.next().unwrap(); + + let in_file = match fs::File::open(&in_file_path) { + Ok(file) => file, + Err(err) => { + eprintln!("Failed to open file '{}': {}", in_file_path, err,); + process::exit(1); + } + }; + let in_file = match unsafe { memmap::Mmap::map(&in_file) } { + Ok(mmap) => mmap, + Err(err) => { + eprintln!("Failed to map file '{}': {}", in_file_path, err,); + process::exit(1); + } + }; + let in_object = match object::File::parse(&*in_file) { + Ok(object) => object, + Err(err) => { + eprintln!("Failed to parse file '{}': {}", in_file_path, err); + process::exit(1); + } + }; + + let mut out_object = write::Object::new(in_object.format(), in_object.architecture()); + + let mut out_sections = HashMap::new(); + for in_section in in_object.sections() { + if in_section.kind() == SectionKind::Metadata { + continue; + } + let section_id = out_object.add_section( + in_section.segment_name().unwrap_or("").as_bytes().to_vec(), + in_section.name().unwrap_or("").as_bytes().to_vec(), + in_section.kind(), + ); + let out_section = out_object.section_mut(section_id); + if out_section.is_bss() { + out_section.append_bss(in_section.size(), in_section.align()); + } else { + out_section.set_data(in_section.uncompressed_data().into(), in_section.align()); + } + out_sections.insert(in_section.index(), section_id); + } + + let mut out_symbols = HashMap::new(); + for (symbol_index, in_symbol) in in_object.symbols() { + if in_symbol.kind() == SymbolKind::Null { + continue; + } + let (section, value) = match in_symbol.section_index() { + Some(index) => ( + Some(*out_sections.get(&index).unwrap()), + in_symbol.address() - in_object.section_by_index(index).unwrap().address(), + ), + None => (None, in_symbol.address()), + }; + let out_symbol = write::Symbol { + name: in_symbol.name().unwrap_or("").as_bytes().to_vec(), + value, + size: in_symbol.size(), + kind: in_symbol.kind(), + scope: in_symbol.scope(), + weak: in_symbol.is_weak(), + section, + }; + let symbol_id = out_object.add_symbol(out_symbol); + out_symbols.insert(symbol_index, symbol_id); + } + + for in_section in in_object.sections() { + if in_section.kind() == SectionKind::Metadata { + continue; + } + let out_section = *out_sections.get(&in_section.index()).unwrap(); + for (offset, in_relocation) in in_section.relocations() { + let symbol = match in_relocation.target() { + RelocationTarget::Symbol(symbol) => *out_symbols.get(&symbol).unwrap(), + RelocationTarget::Section(section) => { + out_object.section_symbol(*out_sections.get(§ion).unwrap()) + } + }; + let out_relocation = write::Relocation { + offset, + size: in_relocation.size(), + kind: in_relocation.kind(), + encoding: in_relocation.encoding(), + symbol, + addend: in_relocation.addend(), + }; + out_object + .add_relocation(out_section, out_relocation) + .unwrap(); + } + } + + let out_data = out_object.write().unwrap(); + if let Err(err) = fs::write(&out_file_path, out_data) { + eprintln!("Failed to write file '{}': {}", out_file_path, err); + process::exit(1); + } +} diff --git a/third_party/rust/object/examples/objdump.rs b/third_party/rust/object/examples/objdump.rs index 7a785565ba4c..52b227d9de44 100644 --- a/third_party/rust/object/examples/objdump.rs +++ b/third_party/rust/object/examples/objdump.rs @@ -1,10 +1,6 @@ -extern crate memmap; -extern crate object; - +use object::{Object, ObjectSection}; use std::{env, fs, process}; -use object::Object; - fn main() { let arg_len = env::args().len(); if arg_len <= 1 { @@ -47,15 +43,35 @@ fn main() { println!("Build ID: {:x?}", build_id); } if let Some((filename, crc)) = file.gnu_debuglink() { - println!("GNU debug link: {} CRC: {:08x}", String::from_utf8_lossy(filename), crc); + println!( + "GNU debug link: {} CRC: {:08x}", + String::from_utf8_lossy(filename), + crc + ); } for segment in file.segments() { println!("{:?}", segment); } + for (index, section) in file.sections().enumerate() { + println!("{}: {:?}", index, section); + } + + for (index, symbol) in file.symbols() { + println!("{}: {:?}", index.0, symbol); + } + for section in file.sections() { - println!("{:?}", section); + if section.relocations().next().is_some() { + println!( + "\n{} relocations", + section.name().unwrap_or("") + ); + for relocation in section.relocations() { + println!("{:?}", relocation); + } + } } } } diff --git a/third_party/rust/object/src/common.rs b/third_party/rust/object/src/common.rs new file mode 100644 index 000000000000..baee0c4144ba --- /dev/null +++ b/third_party/rust/object/src/common.rs @@ -0,0 +1,178 @@ +/// The kind of a section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SectionKind { + /// The section kind is unknown. + Unknown, + /// An executable code section. + /// + /// Example ELF sections: `.text` + /// + /// Example Mach-O sections: `__TEXT/__text` + Text, + /// A data section. + /// + /// Example ELF sections: `.data` + /// + /// Example Mach-O sections: `__DATA/__data` + Data, + /// A read only data section. + /// + /// Example ELF sections: `.rodata` + /// + /// Example Mach-O sections: `__TEXT/__const`, `__DATA/__const` + ReadOnlyData, + /// A loadable string section. + /// + /// Example ELF sections: `.rodata.str` + /// + /// Example Mach-O sections: `__TEXT/__cstring` + ReadOnlyString, + /// An uninitialized data section. + /// + /// Example ELF sections: `.bss` + /// + /// Example Mach-O sections: `__DATA/__bss` + UninitializedData, + /// A TLS data section. + /// + /// Example ELF sections: `.tdata` + /// + /// Example Mach-O sections: `__DATA/__thread_data` + Tls, + /// An uninitialized TLS data section. + /// + /// Example ELF sections: `.tbss` + /// + /// Example Mach-O sections: `__DATA/__thread_bss` + UninitializedTls, + /// A TLS variables section. + /// + /// This contains TLS variable structures, rather than the variable initializers. + /// + /// Example Mach-O sections: `__DATA/__thread_vars` + TlsVariables, + /// A non-loadable string section. + /// + /// Example ELF sections: `.comment`, `.debug_str` + OtherString, + /// Some other non-loadable section. + /// + /// Example ELF sections: `.debug_info` + Other, + /// Debug information. + /// + /// Example Mach-O sections: `__DWARF/__debug_info` + Debug, + /// Information for the linker. + /// + /// Example COFF sections: `.drectve` + Linker, + /// Metadata such as symbols or relocations. + /// + /// Example ELF sections: `.symtab`, `.strtab` + Metadata, +} + +/// The kind of a symbol. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SymbolKind { + /// The symbol kind is unknown. + Unknown, + /// The symbol is a null placeholder. + Null, + /// The symbol is for executable code. + Text, + /// The symbol is for a data object. + Data, + /// The symbol is for a section. + Section, + /// The symbol is the name of a file. It precedes symbols within that file. + File, + /// The symbol is for a code label. + Label, + /// The symbol is for an uninitialized common block. + Common, + /// The symbol is for a thread local storage entity. + Tls, +} + +/// A symbol scope. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SymbolScope { + /// Unknown scope. + Unknown, + /// Symbol is visible to the compilation unit. + Compilation, + /// Symbol is visible to the static linkage unit. + Linkage, + /// Symbol is visible to dynamically linked objects. + Dynamic, +} + +/// The operation used to calculate the result of the relocation. +/// +/// The relocation descriptions use the following definitions. Note that +/// these definitions probably don't match any ELF ABI. +/// +/// * A - The value of the addend. +/// * G - The address of the symbol's entry within the global offset table. +/// * L - The address of the symbol's entry within the procedure linkage table. +/// * P - The address of the place of the relocation. +/// * S - The address of the symbol. +/// * GotBase - The address of the global offset table. +/// * Image - The base address of the image. +/// * Section - The address of the section containing the symbol. +/// +/// 'XxxRelative' means 'Xxx + A - P'. 'XxxOffset' means 'S + A - Xxx'. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RelocationKind { + /// S + A + Absolute, + /// S + A - P + Relative, + /// G + A - GotBase + Got, + /// G + A - P + GotRelative, + /// GotBase + A - P + GotBaseRelative, + /// S + A - GotBase + GotBaseOffset, + /// L + A - P + PltRelative, + /// S + A - Image + ImageOffset, + /// S + A - Section + SectionOffset, + /// The index of the section containing the symbol. + SectionIndex, + /// Some other operation and encoding. The value is dependent on file format and machine. + Other(u32), +} + +/// Information about how the result of the relocation operation is encoded in the place. +/// +/// This is usually architecture specific, such as specifying an addressing mode or +/// a specific instruction. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RelocationEncoding { + /// Generic encoding. + Generic, + + /// x86 sign extension at runtime. + /// + /// Used with `RelocationKind::Absolute`. + X86Signed, + /// x86 rip-relative addressing. + /// + /// The `RelocationKind` must be PC relative. + X86RipRelative, + /// x86 rip-relative addressing in movq instruction. + /// + /// The `RelocationKind` must be PC relative. + X86RipRelativeMovq, + /// x86 branch instruction. + /// + /// The `RelocationKind` must be PC relative. + X86Branch, +} diff --git a/third_party/rust/object/src/elf.rs b/third_party/rust/object/src/elf.rs deleted file mode 100644 index 58082ab3a80d..000000000000 --- a/third_party/rust/object/src/elf.rs +++ /dev/null @@ -1,435 +0,0 @@ -use std::slice; -use alloc::borrow::Cow; -use alloc::fmt; -use alloc::vec::Vec; - -#[cfg(feature = "compression")] -use flate2::{Decompress, FlushDecompress}; - -use goblin::{elf, strtab}; -#[cfg(feature = "compression")] -use goblin::container; -use scroll::{self, Pread}; -#[cfg(feature = "compression")] -use scroll::ctx::TryFromCtx; - -use {Machine, Object, ObjectSection, ObjectSegment, SectionKind, Symbol, SymbolKind, SymbolMap}; - -/// An ELF object file. -#[derive(Debug)] -pub struct ElfFile<'data> { - elf: elf::Elf<'data>, - data: &'data [u8], -} - -/// An iterator over the segments of an `ElfFile`. -#[derive(Debug)] -pub struct ElfSegmentIterator<'data, 'file> -where - 'data: 'file, -{ - file: &'file ElfFile<'data>, - iter: slice::Iter<'file, elf::ProgramHeader>, -} - -/// A segment of an `ElfFile`. -#[derive(Debug)] -pub struct ElfSegment<'data, 'file> -where - 'data: 'file, -{ - file: &'file ElfFile<'data>, - segment: &'file elf::ProgramHeader, -} - -/// An iterator over the sections of an `ElfFile`. -#[derive(Debug)] -pub struct ElfSectionIterator<'data, 'file> -where - 'data: 'file, -{ - file: &'file ElfFile<'data>, - iter: slice::Iter<'file, elf::SectionHeader>, -} - -/// A section of an `ElfFile`. -#[derive(Debug)] -pub struct ElfSection<'data, 'file> -where - 'data: 'file, -{ - file: &'file ElfFile<'data>, - section: &'file elf::SectionHeader, -} - -/// An iterator over the symbols of an `ElfFile`. -pub struct ElfSymbolIterator<'data, 'file> -where - 'data: 'file, -{ - strtab: &'file strtab::Strtab<'data>, - symbols: elf::sym::SymIterator<'data>, - section_kinds: Vec, -} - -impl<'data> ElfFile<'data> { - /// Get the ELF headers of the file. - // TODO: this is temporary to allow access to features this crate doesn't provide yet - #[inline] - pub fn elf(&self) -> &elf::Elf<'data> { - &self.elf - } - - /// Parse the raw ELF file data. - pub fn parse(data: &'data [u8]) -> Result { - let elf = elf::Elf::parse(data).map_err(|_| "Could not parse ELF header")?; - Ok(ElfFile { elf, data }) - } - - #[cfg(feature = "compression")] - fn maybe_decompress_data(&self, header: &elf::SectionHeader) -> Cow<'data, [u8]> { - let data = &self.data[header.sh_offset as usize..][..header.sh_size as usize]; - if (header.sh_flags & elf::section_header::SHF_COMPRESSED as u64) == 0 { - Cow::Borrowed(data) - } else { - let container = match self.elf.header.container() { - Ok(c) => c, - Err(_) => return Cow::Borrowed(data), - }; - let endianness = match self.elf.header.endianness() { - Ok(e) => e, - Err(_) => return Cow::Borrowed(data), - }; - let ctx = container::Ctx::new(container, endianness); - let (compression_type, uncompressed_size, compressed_data) = - match elf::compression_header::CompressionHeader::try_from_ctx(data, ctx) { - Ok((chdr, size)) => (chdr.ch_type, chdr.ch_size, &data[size..]), - Err(_) => return Cow::Borrowed(data), - }; - if compression_type != elf::compression_header::ELFCOMPRESS_ZLIB { - return Cow::Borrowed(data); - } - - let mut decompressed = Vec::with_capacity(uncompressed_size as usize); - let mut decompress = Decompress::new(true); - if let Err(_) = decompress.decompress_vec( - compressed_data, &mut decompressed, FlushDecompress::Finish) { - return Cow::Borrowed(data); - } - Cow::Owned(decompressed) - } - } - - #[cfg(not(feature = "compression"))] - fn maybe_decompress_data(&self, header: &elf::SectionHeader) -> Cow<'data, [u8]> { - let data = &self.data[header.sh_offset as usize..][..header.sh_size as usize]; - Cow::Borrowed(data) - } - - #[cfg(feature = "compression")] - /// Try GNU-style "ZLIB" header decompression. - fn maybe_decompress_data_gnu(&self, data: Cow<'data, [u8]>) -> Cow<'data, [u8]> { - // Assume ZLIB-style uncompressed data is no more than 4GB to avoid accidentally - // huge allocations. This also reduces the chance of accidentally matching on a - // .debug_str that happens to start with "ZLIB". - if data.len() < 12 || &data[..8] != b"ZLIB\0\0\0\0" { - return data; - } - let uncompressed_size: u32 = data.pread_with(8, scroll::BE).unwrap(); - let mut decompressed = Vec::with_capacity(uncompressed_size as usize); - let mut decompress = Decompress::new(true); - if let Err(_) = decompress.decompress_vec( - &data[12..], &mut decompressed, FlushDecompress::Finish) { - return data; - } - Cow::Owned(decompressed) - } - - #[cfg(feature = "compression")] - /// Try GNU-style "ZLIB" header decompression. - fn try_zdebug_section_data(&self, section_name: &str) -> Option> { - if !section_name.starts_with(".debug_") { - return None; - } - let z_name = format!(".zdebug_{}", §ion_name[7..]); - // Note that we accept data in .zdebug_ that isn't actually compressed. - self.section_data_by_name(&z_name).map(|data| self.maybe_decompress_data_gnu(data)) - } - - #[cfg(not(feature = "compression"))] - fn try_zdebug_section_data(&self, _section_name: &str) -> Option> { - None - } -} - -impl<'data, 'file> Object<'data, 'file> for ElfFile<'data> -where - 'data: 'file, -{ - type Segment = ElfSegment<'data, 'file>; - type SegmentIterator = ElfSegmentIterator<'data, 'file>; - type Section = ElfSection<'data, 'file>; - type SectionIterator = ElfSectionIterator<'data, 'file>; - type SymbolIterator = ElfSymbolIterator<'data, 'file>; - - fn machine(&self) -> Machine { - match self.elf.header.e_machine { - elf::header::EM_ARM => Machine::Arm, - elf::header::EM_AARCH64 => Machine::Arm64, - elf::header::EM_386 => Machine::X86, - elf::header::EM_X86_64 => Machine::X86_64, - _ => Machine::Other, - } - } - - fn segments(&'file self) -> ElfSegmentIterator<'data, 'file> { - ElfSegmentIterator { - file: self, - iter: self.elf.program_headers.iter(), - } - } - - fn section_data_by_name(&self, section_name: &str) -> Option> { - for header in &self.elf.section_headers { - if let Some(Ok(name)) = self.elf.shdr_strtab.get(header.sh_name) { - if name == section_name { - return Some(self.maybe_decompress_data(header)); - } - } - } - self.try_zdebug_section_data(section_name) - } - - fn sections(&'file self) -> ElfSectionIterator<'data, 'file> { - ElfSectionIterator { - file: self, - iter: self.elf.section_headers.iter(), - } - } - - fn symbols(&'file self) -> ElfSymbolIterator<'data, 'file> { - ElfSymbolIterator { - strtab: &self.elf.strtab, - symbols: self.elf.syms.iter(), - section_kinds: self.sections().map(|x| x.kind()).collect(), - } - } - - fn dynamic_symbols(&'file self) -> ElfSymbolIterator<'data, 'file> { - ElfSymbolIterator { - strtab: &self.elf.dynstrtab, - symbols: self.elf.dynsyms.iter(), - section_kinds: self.sections().map(|x| x.kind()).collect(), - } - } - - fn symbol_map(&self) -> SymbolMap<'data> { - let mut symbols: Vec<_> = self.symbols().filter(SymbolMap::filter).collect(); - symbols.sort_by_key(|x| x.address); - SymbolMap { symbols } - } - - #[inline] - fn is_little_endian(&self) -> bool { - self.elf.little_endian - } - - fn has_debug_symbols(&self) -> bool { - for header in &self.elf.section_headers { - if let Some(Ok(name)) = self.elf.shdr_strtab.get(header.sh_name) { - if name == ".debug_info" || name == ".zdebug_info" { - return true; - } - } - } - false - } - - fn build_id(&self) -> Option<&'data [u8]> { - if let Some(notes) = self.elf.iter_note_headers(self.data) { - for note in notes { - if let Ok(note) = note { - if note.n_type == elf::note::NT_GNU_BUILD_ID { - return Some(note.desc); - } - } - } - } - if let Some(notes) = self.elf - .iter_note_sections(self.data, Some(".note.gnu.build-id")) - { - for note in notes { - if let Ok(note) = note { - if note.n_type == elf::note::NT_GNU_BUILD_ID { - return Some(note.desc); - } - } - } - } - None - } - - fn gnu_debuglink(&self) -> Option<(&'data [u8], u32)> { - if let Some(Cow::Borrowed(data)) = self.section_data_by_name(".gnu_debuglink") { - if let Some(filename_len) = data.iter().position(|x| *x == 0) { - let filename = &data[..filename_len]; - // Round to 4 byte alignment after null terminator. - let offset = (filename_len + 1 + 3) & !3; - if offset + 4 <= data.len() { - let endian = if self.is_little_endian() { - scroll::LE - } else { - scroll::BE - }; - let crc: u32 = data.pread_with(offset, endian).unwrap(); - return Some((filename, crc)); - } - } - } - None - } - - fn entry(&self) -> u64 { - self.elf.entry - } -} - -impl<'data, 'file> Iterator for ElfSegmentIterator<'data, 'file> { - type Item = ElfSegment<'data, 'file>; - - fn next(&mut self) -> Option { - while let Some(segment) = self.iter.next() { - if segment.p_type == elf::program_header::PT_LOAD { - return Some(ElfSegment { - file: self.file, - segment, - }); - } - } - None - } -} - -impl<'data, 'file> ObjectSegment<'data> for ElfSegment<'data, 'file> { - #[inline] - fn address(&self) -> u64 { - self.segment.p_vaddr - } - - #[inline] - fn size(&self) -> u64 { - self.segment.p_memsz - } - - fn data(&self) -> &'data [u8] { - &self.file.data[self.segment.p_offset as usize..][..self.segment.p_filesz as usize] - } - - #[inline] - fn name(&self) -> Option<&str> { - None - } -} - -impl<'data, 'file> Iterator for ElfSectionIterator<'data, 'file> { - type Item = ElfSection<'data, 'file>; - - fn next(&mut self) -> Option { - self.iter.next().map(|section| { - ElfSection { - file: self.file, - section, - } - }) - } -} - -impl<'data, 'file> ObjectSection<'data> for ElfSection<'data, 'file> { - #[inline] - fn address(&self) -> u64 { - self.section.sh_addr - } - - #[inline] - fn size(&self) -> u64 { - self.section.sh_size - } - - fn data(&self) -> Cow<'data, [u8]> { - Cow::from(if self.section.sh_type == elf::section_header::SHT_NOBITS { - &[] - } else { - &self.file.data[self.section.sh_offset as usize..][..self.section.sh_size as usize] - }) - } - - fn name(&self) -> Option<&str> { - self.file - .elf - .shdr_strtab - .get(self.section.sh_name) - .and_then(Result::ok) - } - - #[inline] - fn segment_name(&self) -> Option<&str> { - None - } - - fn kind(&self) -> SectionKind { - match self.section.sh_type { - elf::section_header::SHT_PROGBITS => { - if self.section.sh_flags & u64::from(elf::section_header::SHF_ALLOC) == 0 { - SectionKind::Unknown - } else if self.section.sh_flags & u64::from(elf::section_header::SHF_EXECINSTR) != 0 - { - SectionKind::Text - } else if self.section.sh_flags & u64::from(elf::section_header::SHF_WRITE) != 0 { - SectionKind::Data - } else { - SectionKind::ReadOnlyData - } - } - elf::section_header::SHT_NOBITS => SectionKind::UninitializedData, - _ => SectionKind::Unknown, - } - } -} - -impl<'data, 'file> fmt::Debug for ElfSymbolIterator<'data, 'file> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_struct("ElfSymbolIterator").finish() - } -} - -impl<'data, 'file> Iterator for ElfSymbolIterator<'data, 'file> { - type Item = Symbol<'data>; - - fn next(&mut self) -> Option { - self.symbols.next().map(|symbol| { - let name = self.strtab.get(symbol.st_name).and_then(Result::ok); - let kind = match elf::sym::st_type(symbol.st_info) { - elf::sym::STT_OBJECT => SymbolKind::Data, - elf::sym::STT_FUNC => SymbolKind::Text, - elf::sym::STT_SECTION => SymbolKind::Section, - elf::sym::STT_FILE => SymbolKind::File, - elf::sym::STT_COMMON => SymbolKind::Common, - elf::sym::STT_TLS => SymbolKind::Tls, - _ => SymbolKind::Unknown, - }; - let section_kind = if symbol.st_shndx == elf::section_header::SHN_UNDEF as usize { - None - } else { - self.section_kinds.get(symbol.st_shndx).cloned() - }; - Symbol { - name, - address: symbol.st_value, - size: symbol.st_size, - kind, - section_kind, - global: elf::sym::st_bind(symbol.st_info) != elf::sym::STB_LOCAL, - } - }) - } -} diff --git a/third_party/rust/object/src/lib.rs b/third_party/rust/object/src/lib.rs index 7872807d03c9..9719575ea8be 100644 --- a/third_party/rust/object/src/lib.rs +++ b/third_party/rust/object/src/lib.rs @@ -3,7 +3,7 @@ //! The `object` crate provides a unified interface to working with object files //! across platforms. //! -//! See the [`File` struct](./struct.File.html) for details. +//! See the [`File` struct](./read/struct.File.html) for details. #![deny(missing_docs)] #![deny(missing_debug_implementations)] @@ -14,24 +14,14 @@ #[macro_use] extern crate std; -#[cfg(all(not(feature = "std"), feature="compression"))] +#[cfg(all(not(feature = "std"), feature = "compression"))] #[macro_use] extern crate alloc; -#[cfg(all(not(feature = "std"), not(feature="compression")))] +#[cfg(all(not(feature = "std"), not(feature = "compression")))] extern crate alloc; #[cfg(not(feature = "std"))] extern crate core as std; -#[cfg(feature = "compression")] -extern crate flate2; - -extern crate goblin; -extern crate scroll; -extern crate uuid; - -#[cfg(feature = "wasm")] -extern crate parity_wasm; - #[cfg(feature = "std")] mod alloc { pub use std::borrow; @@ -39,582 +29,17 @@ mod alloc { pub use std::vec; } -use alloc::borrow::Cow; -use alloc::fmt; -use alloc::vec::Vec; +// Re-export since these are used in public signatures. +pub use target_lexicon; +pub use uuid; -mod elf; -pub use elf::*; +mod common; +pub use common::*; -mod macho; -pub use macho::*; +#[cfg(feature = "read")] +pub mod read; +#[cfg(feature = "read")] +pub use read::*; -mod pe; -pub use pe::*; - -mod traits; -pub use traits::*; - -#[cfg(feature = "wasm")] -mod wasm; -#[cfg(feature = "wasm")] -pub use wasm::*; - -pub use uuid::Uuid; - -/// The native object file for the target platform. -#[cfg(target_os = "linux")] -pub type NativeFile<'data> = ElfFile<'data>; - -/// The native object file for the target platform. -#[cfg(target_os = "macos")] -pub type NativeFile<'data> = MachOFile<'data>; - -/// The native object file for the target platform. -#[cfg(target_os = "windows")] -pub type NativeFile<'data> = PeFile<'data>; - -/// The native object file for the target platform. -#[cfg(all(feature = "wasm", target_arch = "wasm32"))] -pub type NativeFile<'data> = WasmFile<'data>; - -/// An object file. -#[derive(Debug)] -pub struct File<'data> { - inner: FileInternal<'data>, -} - -#[derive(Debug)] -enum FileInternal<'data> { - Elf(ElfFile<'data>), - MachO(MachOFile<'data>), - Pe(PeFile<'data>), - #[cfg(feature = "wasm")] - Wasm(WasmFile), -} - -/// The machine type of an object file. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Machine { - /// An unrecognized machine type. - Other, - /// ARM - Arm, - /// ARM64 - Arm64, - /// x86 - X86, - /// x86-64 - #[allow(non_camel_case_types)] - X86_64, -} - -/// An iterator over the segments of a `File`. -#[derive(Debug)] -pub struct SegmentIterator<'data, 'file> -where - 'data: 'file, -{ - inner: SegmentIteratorInternal<'data, 'file>, -} - -#[derive(Debug)] -enum SegmentIteratorInternal<'data, 'file> -where - 'data: 'file, -{ - Elf(ElfSegmentIterator<'data, 'file>), - MachO(MachOSegmentIterator<'data, 'file>), - Pe(PeSegmentIterator<'data, 'file>), - #[cfg(feature = "wasm")] - Wasm(WasmSegmentIterator<'file>), -} - -/// A segment of a `File`. -pub struct Segment<'data, 'file> -where - 'data: 'file, -{ - inner: SegmentInternal<'data, 'file>, -} - -#[derive(Debug)] -enum SegmentInternal<'data, 'file> -where - 'data: 'file, -{ - Elf(ElfSegment<'data, 'file>), - MachO(MachOSegment<'data, 'file>), - Pe(PeSegment<'data, 'file>), - #[cfg(feature = "wasm")] - Wasm(WasmSegment<'file>), -} - -/// An iterator of the sections of a `File`. -#[derive(Debug)] -pub struct SectionIterator<'data, 'file> -where - 'data: 'file, -{ - inner: SectionIteratorInternal<'data, 'file>, -} - -// we wrap our enums in a struct so that they are kept private. -#[derive(Debug)] -enum SectionIteratorInternal<'data, 'file> -where - 'data: 'file, -{ - Elf(ElfSectionIterator<'data, 'file>), - MachO(MachOSectionIterator<'data, 'file>), - Pe(PeSectionIterator<'data, 'file>), - #[cfg(feature = "wasm")] - Wasm(WasmSectionIterator<'file>), -} - -/// A Section of a File -pub struct Section<'data, 'file> -where - 'data: 'file, -{ - inner: SectionInternal<'data, 'file>, -} - -enum SectionInternal<'data, 'file> -where - 'data: 'file, -{ - Elf(ElfSection<'data, 'file>), - MachO(MachOSection<'data>), - Pe(PeSection<'data, 'file>), - #[cfg(feature = "wasm")] - Wasm(WasmSection<'file>), -} - -/// The kind of a section. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum SectionKind { - /// The section kind is unknown. - Unknown, - /// An executable code section. - Text, - /// A data section. - Data, - /// A read only data section. - ReadOnlyData, - /// An uninitialized data section. - UninitializedData, - /// Some other type of text or data section. - Other, -} - -/// An iterator over symbol table entries. -#[derive(Debug)] -pub struct SymbolIterator<'data, 'file> -where - 'data: 'file, -{ - inner: SymbolIteratorInternal<'data, 'file>, -} - -#[derive(Debug)] -enum SymbolIteratorInternal<'data, 'file> -where - 'data: 'file, -{ - Elf(ElfSymbolIterator<'data, 'file>), - MachO(MachOSymbolIterator<'data>), - Pe(PeSymbolIterator<'data, 'file>), - #[cfg(feature = "wasm")] - Wasm(WasmSymbolIterator<'file>), -} - -/// A symbol table entry. -#[derive(Debug)] -pub struct Symbol<'data> { - kind: SymbolKind, - section_kind: Option, - global: bool, - name: Option<&'data str>, - address: u64, - size: u64, -} - -/// The kind of a symbol. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum SymbolKind { - /// The symbol kind is unknown. - Unknown, - /// The symbol is for executable code. - Text, - /// The symbol is for a data object. - Data, - /// The symbol is for a section. - Section, - /// The symbol is the name of a file. It precedes symbols within that file. - File, - /// The symbol is for an uninitialized common block. - Common, - /// The symbol is for a thread local storage entity. - Tls, -} - -/// A map from addresses to symbols. -#[derive(Debug)] -pub struct SymbolMap<'data> { - symbols: Vec>, -} - -/// Evaluate an expression on the contents of a file format enum. -/// -/// This is a hack to avoid virtual calls. -macro_rules! with_inner { - ($inner:expr, $enum:ident, | $var:ident | $body:expr) => { - match $inner { - $enum::Elf(ref $var) => $body, - $enum::MachO(ref $var) => $body, - $enum::Pe(ref $var) => $body, - #[cfg(feature = "wasm")] - $enum::Wasm(ref $var) => $body, - } - }; -} - -macro_rules! with_inner_mut { - ($inner:expr, $enum:ident, | $var:ident | $body:expr) => { - match $inner { - $enum::Elf(ref mut $var) => $body, - $enum::MachO(ref mut $var) => $body, - $enum::Pe(ref mut $var) => $body, - #[cfg(feature = "wasm")] - $enum::Wasm(ref mut $var) => $body, - } - }; -} - -/// Like `with_inner!`, but wraps the result in another enum. -macro_rules! map_inner { - ($inner:expr, $from:ident, $to:ident, | $var:ident | $body:expr) => { - match $inner { - $from::Elf(ref $var) => $to::Elf($body), - $from::MachO(ref $var) => $to::MachO($body), - $from::Pe(ref $var) => $to::Pe($body), - #[cfg(feature = "wasm")] - $from::Wasm(ref $var) => $to::Wasm($body), - } - }; -} - -/// Call `next` for a file format iterator. -macro_rules! next_inner { - ($inner:expr, $from:ident, $to:ident) => { - match $inner { - $from::Elf(ref mut iter) => iter.next().map($to::Elf), - $from::MachO(ref mut iter) => iter.next().map($to::MachO), - $from::Pe(ref mut iter) => iter.next().map($to::Pe), - #[cfg(feature = "wasm")] - $from::Wasm(ref mut iter) => iter.next().map($to::Wasm), - } - }; -} - -#[cfg(feature = "wasm")] -fn parse_wasm(data: &[u8]) -> Result, &'static str> { - const WASM_MAGIC: &[u8] = &[0x00, 0x61, 0x73, 0x6D]; - - if &data[..4] == WASM_MAGIC { - let inner = FileInternal::Wasm(WasmFile::parse(data)?); - return Ok(Some(File { inner })); - } - - Ok(None) -} - -#[cfg(not(feature = "wasm"))] -fn parse_wasm(_data: &[u8]) -> Result, &'static str> { - Ok(None) -} - -impl<'data> File<'data> { - /// Parse the raw file data. - pub fn parse(data: &'data [u8]) -> Result { - if data.len() < 16 { - return Err("File too short"); - } - - if let Some(wasm) = parse_wasm(data)? { - return Ok(wasm); - } - - let mut bytes = [0u8; 16]; - bytes.clone_from_slice(&data[..16]); - let inner = match goblin::peek_bytes(&bytes).map_err(|_| "Could not parse file magic")? { - goblin::Hint::Elf(_) => FileInternal::Elf(ElfFile::parse(data)?), - goblin::Hint::Mach(_) => FileInternal::MachO(MachOFile::parse(data)?), - goblin::Hint::PE => FileInternal::Pe(PeFile::parse(data)?), - _ => return Err("Unknown file magic"), - }; - Ok(File { inner }) - } -} - -impl<'data, 'file> Object<'data, 'file> for File<'data> -where - 'data: 'file, -{ - type Segment = Segment<'data, 'file>; - type SegmentIterator = SegmentIterator<'data, 'file>; - type Section = Section<'data, 'file>; - type SectionIterator = SectionIterator<'data, 'file>; - type SymbolIterator = SymbolIterator<'data, 'file>; - - fn machine(&self) -> Machine { - with_inner!(self.inner, FileInternal, |x| x.machine()) - } - - fn segments(&'file self) -> SegmentIterator<'data, 'file> { - SegmentIterator { - inner: map_inner!(self.inner, FileInternal, SegmentIteratorInternal, |x| { - x.segments() - }), - } - } - - fn section_data_by_name(&self, section_name: &str) -> Option> { - with_inner!(self.inner, FileInternal, |x| x.section_data_by_name( - section_name - )) - } - - fn sections(&'file self) -> SectionIterator<'data, 'file> { - SectionIterator { - inner: map_inner!(self.inner, FileInternal, SectionIteratorInternal, |x| { - x.sections() - }), - } - } - - fn symbols(&'file self) -> SymbolIterator<'data, 'file> { - SymbolIterator { - inner: map_inner!(self.inner, FileInternal, SymbolIteratorInternal, |x| { - x.symbols() - }), - } - } - - fn dynamic_symbols(&'file self) -> SymbolIterator<'data, 'file> { - SymbolIterator { - inner: map_inner!(self.inner, FileInternal, SymbolIteratorInternal, |x| { - x.dynamic_symbols() - }), - } - } - - fn symbol_map(&self) -> SymbolMap<'data> { - with_inner!(self.inner, FileInternal, |x| x.symbol_map()) - } - - fn is_little_endian(&self) -> bool { - with_inner!(self.inner, FileInternal, |x| x.is_little_endian()) - } - - fn has_debug_symbols(&self) -> bool { - with_inner!(self.inner, FileInternal, |x| x.has_debug_symbols()) - } - - #[inline] - fn mach_uuid(&self) -> Option { - with_inner!(self.inner, FileInternal, |x| x.mach_uuid()) - } - - #[inline] - fn build_id(&self) -> Option<&'data [u8]> { - with_inner!(self.inner, FileInternal, |x| x.build_id()) - } - - #[inline] - fn gnu_debuglink(&self) -> Option<(&'data [u8], u32)> { - with_inner!(self.inner, FileInternal, |x| x.gnu_debuglink()) - } - - fn entry(&self) -> u64 { - with_inner!(self.inner, FileInternal, |x| x.entry()) - } -} - -impl<'data, 'file> Iterator for SegmentIterator<'data, 'file> { - type Item = Segment<'data, 'file>; - - fn next(&mut self) -> Option { - next_inner!(self.inner, SegmentIteratorInternal, SegmentInternal) - .map(|inner| Segment { inner }) - } -} - -impl<'data, 'file> fmt::Debug for Segment<'data, 'file> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - // It's painful to do much better than this - f.debug_struct("Segment") - .field("name", &self.name().unwrap_or("")) - .field("address", &self.address()) - .field("size", &self.data().len()) - .finish() - } -} - -impl<'data, 'file> ObjectSegment<'data> for Segment<'data, 'file> { - fn address(&self) -> u64 { - with_inner!(self.inner, SegmentInternal, |x| x.address()) - } - - fn size(&self) -> u64 { - with_inner!(self.inner, SegmentInternal, |x| x.size()) - } - - fn data(&self) -> &'data [u8] { - with_inner!(self.inner, SegmentInternal, |x| x.data()) - } - - fn name(&self) -> Option<&str> { - with_inner!(self.inner, SegmentInternal, |x| x.name()) - } -} - -impl<'data, 'file> Iterator for SectionIterator<'data, 'file> { - type Item = Section<'data, 'file>; - - fn next(&mut self) -> Option { - next_inner!(self.inner, SectionIteratorInternal, SectionInternal) - .map(|inner| Section { inner }) - } -} - -impl<'data, 'file> fmt::Debug for Section<'data, 'file> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - // It's painful to do much better than this - f.debug_struct("Section") - .field("name", &self.name().unwrap_or("")) - .field("address", &self.address()) - .field("size", &self.data().len()) - .field("kind", &self.kind()) - .finish() - } -} - -impl<'data, 'file> ObjectSection<'data> for Section<'data, 'file> { - fn address(&self) -> u64 { - with_inner!(self.inner, SectionInternal, |x| x.address()) - } - - fn size(&self) -> u64 { - with_inner!(self.inner, SectionInternal, |x| x.size()) - } - - fn data(&self) -> Cow<'data, [u8]> { - with_inner!(self.inner, SectionInternal, |x| x.data()) - } - - fn name(&self) -> Option<&str> { - with_inner!(self.inner, SectionInternal, |x| x.name()) - } - - fn segment_name(&self) -> Option<&str> { - with_inner!(self.inner, SectionInternal, |x| x.segment_name()) - } - - fn kind(&self) -> SectionKind { - with_inner!(self.inner, SectionInternal, |x| x.kind()) - } -} - -impl<'data, 'file> Iterator for SymbolIterator<'data, 'file> { - type Item = Symbol<'data>; - - fn next(&mut self) -> Option { - with_inner_mut!(self.inner, SymbolIteratorInternal, |x| x.next()) - } -} - -impl<'data> Symbol<'data> { - /// Return the kind of this symbol. - #[inline] - pub fn kind(&self) -> SymbolKind { - self.kind - } - - /// Returns the section kind for the symbol, or `None` if the symbol is undefined. - #[inline] - pub fn section_kind(&self) -> Option { - self.section_kind - } - - /// Return true if the symbol is undefined. - #[inline] - pub fn is_undefined(&self) -> bool { - self.section_kind.is_none() - } - - /// Return true if the symbol is global. - #[inline] - pub fn is_global(&self) -> bool { - self.global - } - - /// Return true if the symbol is local. - #[inline] - pub fn is_local(&self) -> bool { - !self.is_global() - } - - /// The name of the symbol. - #[inline] - pub fn name(&self) -> Option<&'data str> { - self.name - } - - /// The address of the symbol. May be zero if the address is unknown. - #[inline] - pub fn address(&self) -> u64 { - self.address - } - - /// The size of the symbol. May be zero if the size is unknown. - #[inline] - pub fn size(&self) -> u64 { - self.size - } -} - -impl<'data> SymbolMap<'data> { - /// Get the symbol containing the given address. - pub fn get(&self, address: u64) -> Option<&Symbol<'data>> { - self.symbols - .binary_search_by(|symbol| { - if address < symbol.address { - std::cmp::Ordering::Greater - } else if address < symbol.address + symbol.size { - std::cmp::Ordering::Equal - } else { - std::cmp::Ordering::Less - } - }) - .ok() - .and_then(|index| self.symbols.get(index)) - } - - /// Get all symbols in the map. - pub fn symbols(&self) -> &[Symbol<'data>] { - &self.symbols - } - - /// Return true for symbols that should be included in the map. - fn filter(symbol: &Symbol) -> bool { - match symbol.kind() { - SymbolKind::Unknown | SymbolKind::Text | SymbolKind::Data => {} - SymbolKind::Section | SymbolKind::File | SymbolKind::Common | SymbolKind::Tls => { - return false - } - } - !symbol.is_undefined() && symbol.size() > 0 - } -} +#[cfg(feature = "write")] +pub mod write; diff --git a/third_party/rust/object/src/macho.rs b/third_party/rust/object/src/macho.rs deleted file mode 100644 index 0d9bff6deaec..000000000000 --- a/third_party/rust/object/src/macho.rs +++ /dev/null @@ -1,376 +0,0 @@ -use alloc::borrow::Cow; -use std::fmt; -use std::slice; -use alloc::vec::Vec; - -use goblin::mach; -use goblin::mach::load_command::CommandVariant; -use uuid::Uuid; - -use {Machine, Object, ObjectSection, ObjectSegment, SectionKind, Symbol, SymbolKind, SymbolMap}; - -/// A Mach-O object file. -#[derive(Debug)] -pub struct MachOFile<'data> { - macho: mach::MachO<'data>, -} - -/// An iterator over the segments of a `MachOFile`. -#[derive(Debug)] -pub struct MachOSegmentIterator<'data, 'file> -where - 'data: 'file, -{ - segments: slice::Iter<'file, mach::segment::Segment<'data>>, -} - -/// A segment of a `MachOFile`. -#[derive(Debug)] -pub struct MachOSegment<'data, 'file> -where - 'data: 'file, -{ - segment: &'file mach::segment::Segment<'data>, -} - -/// An iterator over the sections of a `MachOFile`. -pub struct MachOSectionIterator<'data, 'file> -where - 'data: 'file, -{ - segments: slice::Iter<'file, mach::segment::Segment<'data>>, - sections: Option>, -} - -/// A section of a `MachOFile`. -#[derive(Debug)] -pub struct MachOSection<'data> { - section: mach::segment::Section, - data: mach::segment::SectionData<'data>, -} - -/// An iterator over the symbols of a `MachOFile`. -pub struct MachOSymbolIterator<'data> { - symbols: mach::symbols::SymbolIterator<'data>, - section_kinds: Vec, -} - -impl<'data> MachOFile<'data> { - /// Get the Mach-O headers of the file. - // TODO: this is temporary to allow access to features this crate doesn't provide yet - #[inline] - pub fn macho(&self) -> &mach::MachO<'data> { - &self.macho - } - - /// Parse the raw Mach-O file data. - pub fn parse(data: &'data [u8]) -> Result { - let macho = mach::MachO::parse(data, 0).map_err(|_| "Could not parse Mach-O header")?; - Ok(MachOFile { macho }) - } -} - -impl<'data, 'file> Object<'data, 'file> for MachOFile<'data> -where - 'data: 'file, -{ - type Segment = MachOSegment<'data, 'file>; - type SegmentIterator = MachOSegmentIterator<'data, 'file>; - type Section = MachOSection<'data>; - type SectionIterator = MachOSectionIterator<'data, 'file>; - type SymbolIterator = MachOSymbolIterator<'data>; - - fn machine(&self) -> Machine { - match self.macho.header.cputype { - mach::cputype::CPU_TYPE_ARM => Machine::Arm, - mach::cputype::CPU_TYPE_ARM64 => Machine::Arm64, - mach::cputype::CPU_TYPE_X86 => Machine::X86, - mach::cputype::CPU_TYPE_X86_64 => Machine::X86_64, - _ => Machine::Other, - } - } - - fn segments(&'file self) -> MachOSegmentIterator<'data, 'file> { - MachOSegmentIterator { - segments: self.macho.segments.iter(), - } - } - - fn section_data_by_name(&self, section_name: &str) -> Option> { - // Translate the "." prefix to the "__" prefix used by OSX/Mach-O, eg - // ".debug_info" to "__debug_info". - let (system_section, section_name) = if section_name.starts_with('.') { - (true, §ion_name[1..]) - } else { - (false, section_name) - }; - let cmp_section_name = |name: &str| if system_section { - name.starts_with("__") && section_name == &name[2..] - } else { - section_name == name - }; - - for segment in &self.macho.segments { - for section in segment { - if let Ok((section, data)) = section { - if let Ok(name) = section.name() { - if cmp_section_name(name) { - return Some(Cow::from(data)); - } - } - } - } - } - None - } - - fn sections(&'file self) -> MachOSectionIterator<'data, 'file> { - MachOSectionIterator { - segments: self.macho.segments.iter(), - sections: None, - } - } - - fn symbols(&'file self) -> MachOSymbolIterator<'data> { - let symbols = match self.macho.symbols { - Some(ref symbols) => symbols.into_iter(), - None => mach::symbols::SymbolIterator::default(), - }; - - let mut section_kinds = Vec::new(); - // Don't use MachOSectionIterator because it skips sections it fails to parse, - // and the section index is important. - for segment in &self.macho.segments { - for section in segment { - if let Ok((section, data)) = section { - let section = MachOSection { section, data }; - section_kinds.push(section.kind()); - } else { - // Add placeholder so that indexing works. - section_kinds.push(SectionKind::Unknown); - } - } - } - - MachOSymbolIterator { - symbols, - section_kinds, - } - } - - fn dynamic_symbols(&'file self) -> MachOSymbolIterator<'data> { - // The LC_DYSYMTAB command contains indices into the same symbol - // table as the LC_SYMTAB command, so return all of them. - self.symbols() - } - - fn symbol_map(&self) -> SymbolMap<'data> { - let mut symbols: Vec<_> = self.symbols().collect(); - - // Add symbols for the end of each section. - for section in self.sections() { - symbols.push(Symbol { - name: None, - address: section.address() + section.size(), - size: 0, - kind: SymbolKind::Section, - section_kind: None, - global: false, - }); - } - - // Calculate symbol sizes by sorting and finding the next symbol. - symbols.sort_by(|a, b| { - a.address.cmp(&b.address).then_with(|| { - // Place the end of section symbols last. - (a.kind == SymbolKind::Section).cmp(&(b.kind == SymbolKind::Section)) - }) - }); - - for i in 0..symbols.len() { - let (before, after) = symbols.split_at_mut(i + 1); - let symbol = &mut before[i]; - if symbol.kind != SymbolKind::Section { - if let Some(next) = after - .iter() - .skip_while(|x| x.kind != SymbolKind::Section && x.address == symbol.address) - .next() - { - symbol.size = next.address - symbol.address; - } - } - } - - symbols.retain(SymbolMap::filter); - SymbolMap { symbols } - } - - #[inline] - fn is_little_endian(&self) -> bool { - self.macho.little_endian - } - - fn has_debug_symbols(&self) -> bool { - self.section_data_by_name(".debug_info").is_some() - } - - fn mach_uuid(&self) -> Option { - // Return the UUID from the `LC_UUID` load command, if one is present. - self.macho.load_commands.iter().filter_map(|lc| { - match lc.command { - CommandVariant::Uuid(ref cmd) => { - //TODO: Uuid should have a `from_array` method that can't fail. - Uuid::from_bytes(&cmd.uuid).ok() - } - _ => None, - } - }).nth(0) - } - - fn entry(&self) -> u64 { - self.macho.entry - } -} - -impl<'data, 'file> Iterator for MachOSegmentIterator<'data, 'file> { - type Item = MachOSegment<'data, 'file>; - - fn next(&mut self) -> Option { - self.segments.next().map(|segment| MachOSegment { segment }) - } -} - -impl<'data, 'file> ObjectSegment<'data> for MachOSegment<'data, 'file> { - #[inline] - fn address(&self) -> u64 { - self.segment.vmaddr - } - - #[inline] - fn size(&self) -> u64 { - self.segment.vmsize - } - - #[inline] - fn data(&self) -> &'data [u8] { - self.segment.data - } - - #[inline] - fn name(&self) -> Option<&str> { - self.segment.name().ok() - } -} - -impl<'data, 'file> fmt::Debug for MachOSectionIterator<'data, 'file> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - // It's painful to do much better than this - f.debug_struct("MachOSectionIterator").finish() - } -} - -impl<'data, 'file> Iterator for MachOSectionIterator<'data, 'file> { - type Item = MachOSection<'data>; - - fn next(&mut self) -> Option { - loop { - if let Some(ref mut sections) = self.sections { - while let Some(section) = sections.next() { - if let Ok((section, data)) = section { - return Some(MachOSection { section, data }); - } - } - } - match self.segments.next() { - None => return None, - Some(segment) => { - self.sections = Some(segment.into_iter()); - } - } - } - } -} - -impl<'data> ObjectSection<'data> for MachOSection<'data> { - #[inline] - fn address(&self) -> u64 { - self.section.addr - } - - #[inline] - fn size(&self) -> u64 { - self.section.size - } - - #[inline] - fn data(&self) -> Cow<'data, [u8]> { - Cow::from(self.data) - } - - #[inline] - fn name(&self) -> Option<&str> { - self.section.name().ok() - } - - #[inline] - fn segment_name(&self) -> Option<&str> { - self.section.segname().ok() - } - - fn kind(&self) -> SectionKind { - match (self.segment_name(), self.name()) { - (Some("__TEXT"), Some("__text")) => SectionKind::Text, - (Some("__DATA"), Some("__data")) => SectionKind::Data, - (Some("__DATA"), Some("__bss")) => SectionKind::UninitializedData, - _ => SectionKind::Other, - } - } -} - -impl<'data> fmt::Debug for MachOSymbolIterator<'data> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_struct("MachOSymbolIterator").finish() - } -} - -impl<'data> Iterator for MachOSymbolIterator<'data> { - type Item = Symbol<'data>; - - fn next(&mut self) -> Option { - while let Some(symbol) = self.symbols.next() { - if let Ok((name, nlist)) = symbol { - if nlist.n_type & mach::symbols::N_STAB != 0 { - continue; - } - let n_type = nlist.n_type & mach::symbols::NLIST_TYPE_MASK; - let section_kind = if n_type == mach::symbols::N_SECT { - if nlist.n_sect == 0 { - None - } else { - self.section_kinds.get(nlist.n_sect - 1).cloned() - } - } else { - // TODO: better handling for other n_type values - None - }; - let kind = match section_kind { - Some(SectionKind::Text) => SymbolKind::Text, - Some(SectionKind::Data) - | Some(SectionKind::ReadOnlyData) - | Some(SectionKind::UninitializedData) => SymbolKind::Data, - _ => SymbolKind::Unknown, - }; - return Some(Symbol { - name: Some(name), - address: nlist.n_value, - // Only calculated for symbol maps - size: 0, - kind, - section_kind, - global: nlist.is_global(), - }); - } - } - None - } -} diff --git a/third_party/rust/object/src/read/any.rs b/third_party/rust/object/src/read/any.rs new file mode 100644 index 000000000000..98b69b092010 --- /dev/null +++ b/third_party/rust/object/src/read/any.rs @@ -0,0 +1,515 @@ +use crate::alloc::borrow::Cow; +use crate::alloc::fmt; +use target_lexicon::{Architecture, BinaryFormat}; +use uuid::Uuid; + +#[cfg(feature = "wasm")] +use crate::read::wasm; +use crate::read::{coff, elf, macho, pe}; +use crate::read::{ + Object, ObjectSection, ObjectSegment, Relocation, SectionIndex, SectionKind, Symbol, + SymbolIndex, SymbolMap, +}; + +/// Evaluate an expression on the contents of a file format enum. +/// +/// This is a hack to avoid virtual calls. +macro_rules! with_inner { + ($inner:expr, $enum:ident, | $var:ident | $body:expr) => { + match $inner { + $enum::Coff(ref $var) => $body, + $enum::Elf(ref $var) => $body, + $enum::MachO(ref $var) => $body, + $enum::Pe(ref $var) => $body, + #[cfg(feature = "wasm")] + $enum::Wasm(ref $var) => $body, + } + }; +} + +macro_rules! with_inner_mut { + ($inner:expr, $enum:ident, | $var:ident | $body:expr) => { + match $inner { + $enum::Coff(ref mut $var) => $body, + $enum::Elf(ref mut $var) => $body, + $enum::MachO(ref mut $var) => $body, + $enum::Pe(ref mut $var) => $body, + #[cfg(feature = "wasm")] + $enum::Wasm(ref mut $var) => $body, + } + }; +} + +/// Like `with_inner!`, but wraps the result in another enum. +macro_rules! map_inner { + ($inner:expr, $from:ident, $to:ident, | $var:ident | $body:expr) => { + match $inner { + $from::Coff(ref $var) => $to::Coff($body), + $from::Elf(ref $var) => $to::Elf($body), + $from::MachO(ref $var) => $to::MachO($body), + $from::Pe(ref $var) => $to::Pe($body), + #[cfg(feature = "wasm")] + $from::Wasm(ref $var) => $to::Wasm($body), + } + }; +} + +/// Like `map_inner!`, but the result is a Result or Option. +macro_rules! map_inner_option { + ($inner:expr, $from:ident, $to:ident, | $var:ident | $body:expr) => { + match $inner { + $from::Coff(ref $var) => $body.map($to::Coff), + $from::Elf(ref $var) => $body.map($to::Elf), + $from::MachO(ref $var) => $body.map($to::MachO), + $from::Pe(ref $var) => $body.map($to::Pe), + #[cfg(feature = "wasm")] + $from::Wasm(ref $var) => $body.map($to::Wasm), + } + }; +} + +/// Call `next` for a file format iterator. +macro_rules! next_inner { + ($inner:expr, $from:ident, $to:ident) => { + match $inner { + $from::Coff(ref mut iter) => iter.next().map($to::Coff), + $from::Elf(ref mut iter) => iter.next().map($to::Elf), + $from::MachO(ref mut iter) => iter.next().map($to::MachO), + $from::Pe(ref mut iter) => iter.next().map($to::Pe), + #[cfg(feature = "wasm")] + $from::Wasm(ref mut iter) => iter.next().map($to::Wasm), + } + }; +} + +/// An object file. +/// +/// Most functionality is provided by the `Object` trait implementation. +#[derive(Debug)] +pub struct File<'data> { + inner: FileInternal<'data>, +} + +#[allow(clippy::large_enum_variant)] +#[derive(Debug)] +enum FileInternal<'data> { + Coff(coff::CoffFile<'data>), + Elf(elf::ElfFile<'data>), + MachO(macho::MachOFile<'data>), + Pe(pe::PeFile<'data>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmFile), +} + +impl<'data> File<'data> { + /// Parse the raw file data. + pub fn parse(data: &'data [u8]) -> Result { + if data.len() < 16 { + return Err("File too short"); + } + + let inner = match [data[0], data[1], data[2], data[3]] { + // ELF + [0x7f, b'E', b'L', b'F'] => FileInternal::Elf(elf::ElfFile::parse(data)?), + // 32-bit Mach-O + [0xfe, 0xed, 0xfa, 0xce] + | [0xce, 0xfa, 0xed, 0xfe] + // 64-bit Mach-O + | [0xfe, 0xed, 0xfa, 0xcf] + | [0xcf, 0xfa, 0xed, 0xfe] => FileInternal::MachO(macho::MachOFile::parse(data)?), + // WASM + #[cfg(feature = "wasm")] + [0x00, b'a', b's', b'm'] => FileInternal::Wasm(wasm::WasmFile::parse(data)?), + // MS-DOS, assume stub for Windows PE + [b'M', b'Z', _, _] => FileInternal::Pe(pe::PeFile::parse(data)?), + // TODO: more COFF machines + // COFF x86 + [0x4c, 0x01, _, _] + // COFF x86-64 + | [0x64, 0x86, _, _] => FileInternal::Coff(coff::CoffFile::parse(data)?), + _ => return Err("Unknown file magic"), + }; + Ok(File { inner }) + } + + /// Return the file format. + pub fn format(&self) -> BinaryFormat { + match self.inner { + FileInternal::Elf(_) => BinaryFormat::Elf, + FileInternal::MachO(_) => BinaryFormat::Macho, + FileInternal::Coff(_) | FileInternal::Pe(_) => BinaryFormat::Coff, + #[cfg(feature = "wasm")] + FileInternal::Wasm(_) => BinaryFormat::Wasm, + } + } +} + +impl<'data, 'file> Object<'data, 'file> for File<'data> +where + 'data: 'file, +{ + type Segment = Segment<'data, 'file>; + type SegmentIterator = SegmentIterator<'data, 'file>; + type Section = Section<'data, 'file>; + type SectionIterator = SectionIterator<'data, 'file>; + type SymbolIterator = SymbolIterator<'data, 'file>; + + fn architecture(&self) -> Architecture { + with_inner!(self.inner, FileInternal, |x| x.architecture()) + } + + fn is_little_endian(&self) -> bool { + with_inner!(self.inner, FileInternal, |x| x.is_little_endian()) + } + + fn is_64(&self) -> bool { + with_inner!(self.inner, FileInternal, |x| x.is_64()) + } + + fn segments(&'file self) -> SegmentIterator<'data, 'file> { + SegmentIterator { + inner: map_inner!(self.inner, FileInternal, SegmentIteratorInternal, |x| x + .segments()), + } + } + + fn section_by_name(&'file self, section_name: &str) -> Option> { + map_inner_option!(self.inner, FileInternal, SectionInternal, |x| x + .section_by_name(section_name)) + .map(|inner| Section { inner }) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Option> { + map_inner_option!(self.inner, FileInternal, SectionInternal, |x| x + .section_by_index(index)) + .map(|inner| Section { inner }) + } + + fn section_data_by_name(&self, section_name: &str) -> Option> { + with_inner!(self.inner, FileInternal, |x| x + .section_data_by_name(section_name)) + } + + fn sections(&'file self) -> SectionIterator<'data, 'file> { + SectionIterator { + inner: map_inner!(self.inner, FileInternal, SectionIteratorInternal, |x| x + .sections()), + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Option> { + with_inner!(self.inner, FileInternal, |x| x.symbol_by_index(index)) + } + + fn symbols(&'file self) -> SymbolIterator<'data, 'file> { + SymbolIterator { + inner: map_inner!(self.inner, FileInternal, SymbolIteratorInternal, |x| x + .symbols()), + } + } + + fn dynamic_symbols(&'file self) -> SymbolIterator<'data, 'file> { + SymbolIterator { + inner: map_inner!(self.inner, FileInternal, SymbolIteratorInternal, |x| x + .dynamic_symbols()), + } + } + + fn symbol_map(&self) -> SymbolMap<'data> { + with_inner!(self.inner, FileInternal, |x| x.symbol_map()) + } + + fn has_debug_symbols(&self) -> bool { + with_inner!(self.inner, FileInternal, |x| x.has_debug_symbols()) + } + + #[inline] + fn mach_uuid(&self) -> Option { + with_inner!(self.inner, FileInternal, |x| x.mach_uuid()) + } + + #[inline] + fn build_id(&self) -> Option<&'data [u8]> { + with_inner!(self.inner, FileInternal, |x| x.build_id()) + } + + #[inline] + fn gnu_debuglink(&self) -> Option<(&'data [u8], u32)> { + with_inner!(self.inner, FileInternal, |x| x.gnu_debuglink()) + } + + fn entry(&self) -> u64 { + with_inner!(self.inner, FileInternal, |x| x.entry()) + } +} + +/// An iterator over the segments of a `File`. +#[derive(Debug)] +pub struct SegmentIterator<'data, 'file> +where + 'data: 'file, +{ + inner: SegmentIteratorInternal<'data, 'file>, +} + +#[derive(Debug)] +enum SegmentIteratorInternal<'data, 'file> +where + 'data: 'file, +{ + Coff(coff::CoffSegmentIterator<'data, 'file>), + Elf(elf::ElfSegmentIterator<'data, 'file>), + MachO(macho::MachOSegmentIterator<'data, 'file>), + Pe(pe::PeSegmentIterator<'data, 'file>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmSegmentIterator<'file>), +} + +impl<'data, 'file> Iterator for SegmentIterator<'data, 'file> { + type Item = Segment<'data, 'file>; + + fn next(&mut self) -> Option { + next_inner!(self.inner, SegmentIteratorInternal, SegmentInternal) + .map(|inner| Segment { inner }) + } +} + +/// A segment of a `File`. +pub struct Segment<'data, 'file> +where + 'data: 'file, +{ + inner: SegmentInternal<'data, 'file>, +} + +#[derive(Debug)] +enum SegmentInternal<'data, 'file> +where + 'data: 'file, +{ + Coff(coff::CoffSegment<'data, 'file>), + Elf(elf::ElfSegment<'data, 'file>), + MachO(macho::MachOSegment<'data, 'file>), + Pe(pe::PeSegment<'data, 'file>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmSegment<'file>), +} + +impl<'data, 'file> fmt::Debug for Segment<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // It's painful to do much better than this + f.debug_struct("Segment") + .field("name", &self.name().unwrap_or("")) + .field("address", &self.address()) + .field("size", &self.data().len()) + .finish() + } +} + +impl<'data, 'file> ObjectSegment<'data> for Segment<'data, 'file> { + fn address(&self) -> u64 { + with_inner!(self.inner, SegmentInternal, |x| x.address()) + } + + fn size(&self) -> u64 { + with_inner!(self.inner, SegmentInternal, |x| x.size()) + } + + fn align(&self) -> u64 { + with_inner!(self.inner, SegmentInternal, |x| x.align()) + } + + fn data(&self) -> &'data [u8] { + with_inner!(self.inner, SegmentInternal, |x| x.data()) + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + with_inner!(self.inner, SegmentInternal, |x| x.data_range(address, size)) + } + + fn name(&self) -> Option<&str> { + with_inner!(self.inner, SegmentInternal, |x| x.name()) + } +} + +/// An iterator of the sections of a `File`. +#[derive(Debug)] +pub struct SectionIterator<'data, 'file> +where + 'data: 'file, +{ + inner: SectionIteratorInternal<'data, 'file>, +} + +// we wrap our enums in a struct so that they are kept private. +#[derive(Debug)] +enum SectionIteratorInternal<'data, 'file> +where + 'data: 'file, +{ + Coff(coff::CoffSectionIterator<'data, 'file>), + Elf(elf::ElfSectionIterator<'data, 'file>), + MachO(macho::MachOSectionIterator<'data, 'file>), + Pe(pe::PeSectionIterator<'data, 'file>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmSectionIterator<'file>), +} + +impl<'data, 'file> Iterator for SectionIterator<'data, 'file> { + type Item = Section<'data, 'file>; + + fn next(&mut self) -> Option { + next_inner!(self.inner, SectionIteratorInternal, SectionInternal) + .map(|inner| Section { inner }) + } +} + +/// A Section of a File +pub struct Section<'data, 'file> +where + 'data: 'file, +{ + inner: SectionInternal<'data, 'file>, +} + +enum SectionInternal<'data, 'file> +where + 'data: 'file, +{ + Coff(coff::CoffSection<'data, 'file>), + Elf(elf::ElfSection<'data, 'file>), + MachO(macho::MachOSection<'data, 'file>), + Pe(pe::PeSection<'data, 'file>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmSection<'file>), +} + +impl<'data, 'file> fmt::Debug for Section<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // It's painful to do much better than this + let mut s = f.debug_struct("Section"); + if let Some(segment) = self.segment_name() { + s.field("segment", &segment); + } + s.field("name", &self.name().unwrap_or("")) + .field("address", &self.address()) + .field("size", &self.data().len()) + .field("kind", &self.kind()) + .finish() + } +} + +impl<'data, 'file> ObjectSection<'data> for Section<'data, 'file> { + type RelocationIterator = RelocationIterator<'data, 'file>; + + fn index(&self) -> SectionIndex { + with_inner!(self.inner, SectionInternal, |x| x.index()) + } + + fn address(&self) -> u64 { + with_inner!(self.inner, SectionInternal, |x| x.address()) + } + + fn size(&self) -> u64 { + with_inner!(self.inner, SectionInternal, |x| x.size()) + } + + fn align(&self) -> u64 { + with_inner!(self.inner, SectionInternal, |x| x.align()) + } + + fn data(&self) -> Cow<'data, [u8]> { + with_inner!(self.inner, SectionInternal, |x| x.data()) + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + with_inner!(self.inner, SectionInternal, |x| x.data_range(address, size)) + } + + fn uncompressed_data(&self) -> Cow<'data, [u8]> { + with_inner!(self.inner, SectionInternal, |x| x.uncompressed_data()) + } + + fn name(&self) -> Option<&str> { + with_inner!(self.inner, SectionInternal, |x| x.name()) + } + + fn segment_name(&self) -> Option<&str> { + with_inner!(self.inner, SectionInternal, |x| x.segment_name()) + } + + fn kind(&self) -> SectionKind { + with_inner!(self.inner, SectionInternal, |x| x.kind()) + } + + fn relocations(&self) -> RelocationIterator<'data, 'file> { + RelocationIterator { + inner: map_inner!( + self.inner, + SectionInternal, + RelocationIteratorInternal, + |x| x.relocations() + ), + } + } +} + +/// An iterator over symbol table entries. +#[derive(Debug)] +pub struct SymbolIterator<'data, 'file> +where + 'data: 'file, +{ + inner: SymbolIteratorInternal<'data, 'file>, +} + +#[derive(Debug)] +enum SymbolIteratorInternal<'data, 'file> +where + 'data: 'file, +{ + Coff(coff::CoffSymbolIterator<'data, 'file>), + Elf(elf::ElfSymbolIterator<'data, 'file>), + MachO(macho::MachOSymbolIterator<'data, 'file>), + Pe(pe::PeSymbolIterator<'data, 'file>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmSymbolIterator<'file>), +} + +impl<'data, 'file> Iterator for SymbolIterator<'data, 'file> { + type Item = (SymbolIndex, Symbol<'data>); + + fn next(&mut self) -> Option { + with_inner_mut!(self.inner, SymbolIteratorInternal, |x| x.next()) + } +} + +/// An iterator over relocation entries +#[derive(Debug)] +pub struct RelocationIterator<'data, 'file> +where + 'data: 'file, +{ + inner: RelocationIteratorInternal<'data, 'file>, +} + +#[derive(Debug)] +enum RelocationIteratorInternal<'data, 'file> +where + 'data: 'file, +{ + Coff(coff::CoffRelocationIterator<'data, 'file>), + Elf(elf::ElfRelocationIterator<'data, 'file>), + MachO(macho::MachORelocationIterator<'data, 'file>), + Pe(pe::PeRelocationIterator), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmRelocationIterator), +} + +impl<'data, 'file> Iterator for RelocationIterator<'data, 'file> { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option { + with_inner_mut!(self.inner, RelocationIteratorInternal, |x| x.next()) + } +} diff --git a/third_party/rust/object/src/read/coff.rs b/third_party/rust/object/src/read/coff.rs new file mode 100644 index 000000000000..1015e34537bc --- /dev/null +++ b/third_party/rust/object/src/read/coff.rs @@ -0,0 +1,511 @@ +use crate::alloc::borrow::Cow; +use crate::alloc::fmt; +use crate::alloc::vec::Vec; +use goblin::pe; +use std::{iter, slice}; +use target_lexicon::Architecture; + +use crate::read::{ + self, Object, ObjectSection, ObjectSegment, Relocation, RelocationEncoding, RelocationKind, + RelocationTarget, SectionIndex, SectionKind, Symbol, SymbolIndex, SymbolKind, SymbolMap, + SymbolScope, +}; + +/// A COFF object file. +#[derive(Debug)] +pub struct CoffFile<'data> { + coff: pe::Coff<'data>, + data: &'data [u8], +} + +/// An iterator over the loadable sections of a `CoffFile`. +#[derive(Debug)] +pub struct CoffSegmentIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file CoffFile<'data>, + iter: slice::Iter<'file, pe::section_table::SectionTable>, +} + +/// A loadable section of a `CoffFile`. +#[derive(Debug)] +pub struct CoffSegment<'data, 'file> +where + 'data: 'file, +{ + file: &'file CoffFile<'data>, + section: &'file pe::section_table::SectionTable, +} + +/// An iterator over the sections of a `CoffFile`. +#[derive(Debug)] +pub struct CoffSectionIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file CoffFile<'data>, + iter: iter::Enumerate>, +} + +/// A section of a `CoffFile`. +#[derive(Debug)] +pub struct CoffSection<'data, 'file> +where + 'data: 'file, +{ + file: &'file CoffFile<'data>, + index: SectionIndex, + section: &'file pe::section_table::SectionTable, +} + +/// An iterator over the symbols of a `CoffFile`. +pub struct CoffSymbolIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file CoffFile<'data>, + symbols: pe::symbol::SymbolIterator<'data>, +} + +/// An iterator over the relocations in an `CoffSection`. +pub struct CoffRelocationIterator<'data, 'file> { + file: &'file CoffFile<'data>, + relocations: pe::relocation::Relocations<'data>, +} + +impl<'data> CoffFile<'data> { + /// Get the COFF headers of the file. + // TODO: this is temporary to allow access to features this crate doesn't provide yet + #[inline] + pub fn coff(&self) -> &pe::Coff<'data> { + &self.coff + } + + /// Parse the raw COFF file data. + pub fn parse(data: &'data [u8]) -> Result { + let coff = pe::Coff::parse(data).map_err(|_| "Could not parse COFF header")?; + Ok(CoffFile { coff, data }) + } +} + +impl<'data, 'file> Object<'data, 'file> for CoffFile<'data> +where + 'data: 'file, +{ + type Segment = CoffSegment<'data, 'file>; + type SegmentIterator = CoffSegmentIterator<'data, 'file>; + type Section = CoffSection<'data, 'file>; + type SectionIterator = CoffSectionIterator<'data, 'file>; + type SymbolIterator = CoffSymbolIterator<'data, 'file>; + + fn architecture(&self) -> Architecture { + match self.coff.header.machine { + pe::header::COFF_MACHINE_X86 => Architecture::I386, + pe::header::COFF_MACHINE_X86_64 => Architecture::X86_64, + _ => Architecture::Unknown, + } + } + + #[inline] + fn is_little_endian(&self) -> bool { + true + } + + #[inline] + fn is_64(&self) -> bool { + false + } + + fn segments(&'file self) -> CoffSegmentIterator<'data, 'file> { + CoffSegmentIterator { + file: self, + iter: self.coff.sections.iter(), + } + } + + fn section_by_name(&'file self, section_name: &str) -> Option> { + self.sections() + .find(|section| section.name() == Some(section_name)) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Option> { + self.sections().find(|section| section.index() == index) + } + + fn sections(&'file self) -> CoffSectionIterator<'data, 'file> { + CoffSectionIterator { + file: self, + iter: self.coff.sections.iter().enumerate(), + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Option> { + self.coff + .symbols + .get(index.0) + .map(|(name, symbol)| parse_symbol(index.0, name, &symbol, &self.coff)) + } + + fn symbols(&'file self) -> CoffSymbolIterator<'data, 'file> { + CoffSymbolIterator { + file: self, + symbols: self.coff.symbols.iter(), + } + } + + fn dynamic_symbols(&'file self) -> CoffSymbolIterator<'data, 'file> { + CoffSymbolIterator { + file: self, + symbols: goblin::pe::symbol::SymbolIterator::default(), + } + } + + fn symbol_map(&self) -> SymbolMap<'data> { + // TODO: untested + let mut symbols: Vec<_> = self + .symbols() + .map(|(_, s)| s) + .filter(SymbolMap::filter) + .collect(); + symbols.sort_by_key(|x| x.address); + SymbolMap { symbols } + } + + fn has_debug_symbols(&self) -> bool { + for section in &self.coff.sections { + if let Ok(name) = section.name() { + if name == ".debug_info" { + return true; + } + } + } + false + } + + fn entry(&self) -> u64 { + 0 + } +} + +impl<'data, 'file> Iterator for CoffSegmentIterator<'data, 'file> { + type Item = CoffSegment<'data, 'file>; + + fn next(&mut self) -> Option { + self.iter.next().map(|section| CoffSegment { + file: self.file, + section, + }) + } +} + +fn section_alignment(characteristics: u32) -> u64 { + match characteristics & pe::section_table::IMAGE_SCN_ALIGN_MASK { + pe::section_table::IMAGE_SCN_ALIGN_2BYTES => 2, + pe::section_table::IMAGE_SCN_ALIGN_4BYTES => 4, + pe::section_table::IMAGE_SCN_ALIGN_8BYTES => 8, + pe::section_table::IMAGE_SCN_ALIGN_16BYTES => 16, + pe::section_table::IMAGE_SCN_ALIGN_32BYTES => 32, + pe::section_table::IMAGE_SCN_ALIGN_64BYTES => 64, + pe::section_table::IMAGE_SCN_ALIGN_128BYTES => 128, + pe::section_table::IMAGE_SCN_ALIGN_256BYTES => 256, + pe::section_table::IMAGE_SCN_ALIGN_512BYTES => 512, + pe::section_table::IMAGE_SCN_ALIGN_1024BYTES => 1024, + pe::section_table::IMAGE_SCN_ALIGN_2048BYTES => 2048, + pe::section_table::IMAGE_SCN_ALIGN_4096BYTES => 4096, + pe::section_table::IMAGE_SCN_ALIGN_8192BYTES => 8192, + _ => 1, + } +} + +impl<'data, 'file> ObjectSegment<'data> for CoffSegment<'data, 'file> { + #[inline] + fn address(&self) -> u64 { + u64::from(self.section.virtual_address) + } + + #[inline] + fn size(&self) -> u64 { + u64::from(self.section.virtual_size) + } + + #[inline] + fn align(&self) -> u64 { + section_alignment(self.section.characteristics) + } + + fn data(&self) -> &'data [u8] { + let offset = self.section.pointer_to_raw_data as usize; + let size = self.section.size_of_raw_data as usize; + &self.file.data[offset..][..size] + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.data(), self.address(), address, size) + } + + #[inline] + fn name(&self) -> Option<&str> { + self.section.name().ok() + } +} + +impl<'data, 'file> Iterator for CoffSectionIterator<'data, 'file> { + type Item = CoffSection<'data, 'file>; + + fn next(&mut self) -> Option { + self.iter.next().map(|(index, section)| CoffSection { + file: self.file, + index: SectionIndex(index), + section, + }) + } +} + +impl<'data, 'file> CoffSection<'data, 'file> { + fn raw_data(&self) -> &'data [u8] { + let offset = self.section.pointer_to_raw_data as usize; + let size = self.section.size_of_raw_data as usize; + &self.file.data[offset..][..size] + } +} + +impl<'data, 'file> ObjectSection<'data> for CoffSection<'data, 'file> { + type RelocationIterator = CoffRelocationIterator<'data, 'file>; + + #[inline] + fn index(&self) -> SectionIndex { + self.index + } + + #[inline] + fn address(&self) -> u64 { + u64::from(self.section.virtual_address) + } + + #[inline] + fn size(&self) -> u64 { + u64::from(self.section.size_of_raw_data) + } + + #[inline] + fn align(&self) -> u64 { + section_alignment(self.section.characteristics) + } + + fn data(&self) -> Cow<'data, [u8]> { + Cow::from(self.raw_data()) + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.raw_data(), self.address(), address, size) + } + + #[inline] + fn uncompressed_data(&self) -> Cow<'data, [u8]> { + self.data() + } + + fn name(&self) -> Option<&str> { + self.section.name().ok() + } + + #[inline] + fn segment_name(&self) -> Option<&str> { + None + } + + #[inline] + fn kind(&self) -> SectionKind { + if self.section.characteristics + & (pe::section_table::IMAGE_SCN_CNT_CODE | pe::section_table::IMAGE_SCN_MEM_EXECUTE) + != 0 + { + SectionKind::Text + } else if self.section.characteristics & pe::section_table::IMAGE_SCN_CNT_INITIALIZED_DATA + != 0 + { + if self.section.characteristics & pe::section_table::IMAGE_SCN_MEM_DISCARDABLE != 0 { + SectionKind::Other + } else if self.section.characteristics & pe::section_table::IMAGE_SCN_MEM_WRITE != 0 { + SectionKind::Data + } else { + SectionKind::ReadOnlyData + } + } else if self.section.characteristics & pe::section_table::IMAGE_SCN_CNT_UNINITIALIZED_DATA + != 0 + { + SectionKind::UninitializedData + } else if self.section.characteristics & pe::section_table::IMAGE_SCN_LNK_INFO != 0 { + SectionKind::Linker + } else { + SectionKind::Unknown + } + } + + fn relocations(&self) -> CoffRelocationIterator<'data, 'file> { + CoffRelocationIterator { + file: self.file, + relocations: self.section.relocations(self.file.data).unwrap_or_default(), + } + } +} + +impl<'data, 'file> fmt::Debug for CoffSymbolIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CoffSymbolIterator").finish() + } +} + +impl<'data, 'file> Iterator for CoffSymbolIterator<'data, 'file> { + type Item = (SymbolIndex, Symbol<'data>); + + fn next(&mut self) -> Option { + self.symbols.next().map(|(index, name, symbol)| { + ( + SymbolIndex(index), + parse_symbol(index, name, &symbol, &self.file.coff), + ) + }) + } +} + +fn parse_symbol<'data>( + index: usize, + name: Option<&'data str>, + symbol: &pe::symbol::Symbol, + coff: &pe::Coff<'data>, +) -> Symbol<'data> { + let name = if symbol.is_file() { + coff.symbols + .aux_file(index + 1, symbol.number_of_aux_symbols as usize) + } else { + name.or_else(|| { + symbol.name_offset().and_then(|offset| { + coff.strings + .get(offset as usize) + .map(Result::ok) + .unwrap_or_default() + }) + }) + }; + let size = if symbol.is_function_definition() && symbol.number_of_aux_symbols > 0 { + coff.symbols + .aux_function_definition(index + 1) + .map(|aux| u64::from(aux.total_size)) + .unwrap_or(0) + } else { + 0 + }; + let kind = if symbol.is_section_definition() { + SymbolKind::Section + } else { + match symbol.storage_class { + pe::symbol::IMAGE_SYM_CLASS_SECTION => SymbolKind::Section, + pe::symbol::IMAGE_SYM_CLASS_FILE => SymbolKind::File, + pe::symbol::IMAGE_SYM_CLASS_LABEL => SymbolKind::Label, + pe::symbol::IMAGE_SYM_CLASS_EXTERNAL + | pe::symbol::IMAGE_SYM_CLASS_WEAK_EXTERNAL + | pe::symbol::IMAGE_SYM_CLASS_STATIC => { + if symbol.derived_type() == pe::symbol::IMAGE_SYM_DTYPE_FUNCTION { + SymbolKind::Text + } else { + SymbolKind::Data + } + } + _ => SymbolKind::Unknown, + } + }; + let section_index = if symbol.section_number <= 0 { + None + } else { + Some(SectionIndex(symbol.section_number as usize - 1)) + }; + let undefined = symbol.section_number == pe::symbol::IMAGE_SYM_UNDEFINED; + let weak = symbol.storage_class == pe::symbol::IMAGE_SYM_CLASS_WEAK_EXTERNAL; + let scope = match symbol.storage_class { + _ if undefined => SymbolScope::Unknown, + pe::symbol::IMAGE_SYM_CLASS_EXTERNAL + | pe::symbol::IMAGE_SYM_CLASS_EXTERNAL_DEF + | pe::symbol::IMAGE_SYM_CLASS_WEAK_EXTERNAL => { + // TODO: determine if symbol is exported + SymbolScope::Linkage + } + _ => SymbolScope::Compilation, + }; + Symbol { + name, + address: u64::from(symbol.value), + size, + kind, + section_index, + undefined, + weak, + scope, + } +} + +impl<'data, 'file> Iterator for CoffRelocationIterator<'data, 'file> { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option { + self.relocations.next().map(|relocation| { + let (kind, size, addend) = match self.file.coff.header.machine { + pe::header::COFF_MACHINE_X86 => match relocation.typ { + pe::relocation::IMAGE_REL_I386_DIR16 => (RelocationKind::Absolute, 16, 0), + pe::relocation::IMAGE_REL_I386_REL16 => (RelocationKind::Relative, 16, 0), + pe::relocation::IMAGE_REL_I386_DIR32 => (RelocationKind::Absolute, 32, 0), + pe::relocation::IMAGE_REL_I386_DIR32NB => (RelocationKind::ImageOffset, 32, 0), + pe::relocation::IMAGE_REL_I386_SECTION => (RelocationKind::SectionIndex, 16, 0), + pe::relocation::IMAGE_REL_I386_SECREL => (RelocationKind::SectionOffset, 32, 0), + pe::relocation::IMAGE_REL_I386_SECREL7 => (RelocationKind::SectionOffset, 7, 0), + pe::relocation::IMAGE_REL_I386_REL32 => (RelocationKind::Relative, 32, -4), + _ => (RelocationKind::Other(u32::from(relocation.typ)), 0, 0), + }, + pe::header::COFF_MACHINE_X86_64 => match relocation.typ { + pe::relocation::IMAGE_REL_AMD64_ADDR64 => (RelocationKind::Absolute, 64, 0), + pe::relocation::IMAGE_REL_AMD64_ADDR32 => (RelocationKind::Absolute, 32, 0), + pe::relocation::IMAGE_REL_AMD64_ADDR32NB => { + (RelocationKind::ImageOffset, 32, 0) + } + pe::relocation::IMAGE_REL_AMD64_REL32 => (RelocationKind::Relative, 32, -4), + pe::relocation::IMAGE_REL_AMD64_REL32_1 => (RelocationKind::Relative, 32, -5), + pe::relocation::IMAGE_REL_AMD64_REL32_2 => (RelocationKind::Relative, 32, -6), + pe::relocation::IMAGE_REL_AMD64_REL32_3 => (RelocationKind::Relative, 32, -7), + pe::relocation::IMAGE_REL_AMD64_REL32_4 => (RelocationKind::Relative, 32, -8), + pe::relocation::IMAGE_REL_AMD64_REL32_5 => (RelocationKind::Relative, 32, -9), + pe::relocation::IMAGE_REL_AMD64_SECTION => { + (RelocationKind::SectionIndex, 16, 0) + } + pe::relocation::IMAGE_REL_AMD64_SECREL => { + (RelocationKind::SectionOffset, 32, 0) + } + pe::relocation::IMAGE_REL_AMD64_SECREL7 => { + (RelocationKind::SectionOffset, 7, 0) + } + _ => (RelocationKind::Other(u32::from(relocation.typ)), 0, 0), + }, + _ => (RelocationKind::Other(u32::from(relocation.typ)), 0, 0), + }; + let target = + RelocationTarget::Symbol(SymbolIndex(relocation.symbol_table_index as usize)); + ( + u64::from(relocation.virtual_address), + Relocation { + kind, + encoding: RelocationEncoding::Generic, + size, + target, + addend, + implicit_addend: true, + }, + ) + }) + } +} + +impl<'data, 'file> fmt::Debug for CoffRelocationIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CoffRelocationIterator").finish() + } +} diff --git a/third_party/rust/object/src/read/elf.rs b/third_party/rust/object/src/read/elf.rs new file mode 100644 index 000000000000..8a7e98f8194d --- /dev/null +++ b/third_party/rust/object/src/read/elf.rs @@ -0,0 +1,691 @@ +use crate::alloc::borrow::Cow; +use crate::alloc::fmt; +use crate::alloc::vec::Vec; +#[cfg(feature = "compression")] +use flate2::{Decompress, FlushDecompress}; +#[cfg(feature = "compression")] +use goblin::container; +use goblin::{elf, strtab}; +#[cfg(feature = "compression")] +use scroll::ctx::TryFromCtx; +use scroll::{self, Pread}; +use std::{iter, slice}; +use target_lexicon::Architecture; + +use crate::read::{ + self, Object, ObjectSection, ObjectSegment, Relocation, RelocationEncoding, RelocationKind, + RelocationTarget, SectionIndex, SectionKind, Symbol, SymbolIndex, SymbolKind, SymbolMap, + SymbolScope, +}; + +/// An ELF object file. +#[derive(Debug)] +pub struct ElfFile<'data> { + elf: elf::Elf<'data>, + data: &'data [u8], +} + +impl<'data> ElfFile<'data> { + /// Get the ELF headers of the file. + // TODO: this is temporary to allow access to features this crate doesn't provide yet + #[inline] + pub fn elf(&self) -> &elf::Elf<'data> { + &self.elf + } + + /// Parse the raw ELF file data. + pub fn parse(data: &'data [u8]) -> Result { + let elf = elf::Elf::parse(data).map_err(|_| "Could not parse ELF header")?; + Ok(ElfFile { elf, data }) + } + + fn raw_section_by_name<'file>( + &'file self, + section_name: &str, + ) -> Option> { + for (index, section) in self.elf.section_headers.iter().enumerate() { + if let Some(Ok(name)) = self.elf.shdr_strtab.get(section.sh_name) { + if name == section_name { + return Some(ElfSection { + file: self, + index: SectionIndex(index), + section, + }); + } + } + } + None + } + + #[cfg(feature = "compression")] + fn zdebug_section_by_name<'file>( + &'file self, + section_name: &str, + ) -> Option> { + if !section_name.starts_with(".debug_") { + return None; + } + self.raw_section_by_name(&format!(".zdebug_{}", §ion_name[7..])) + } + + #[cfg(not(feature = "compression"))] + fn zdebug_section_by_name<'file>( + &'file self, + _section_name: &str, + ) -> Option> { + None + } +} + +impl<'data, 'file> Object<'data, 'file> for ElfFile<'data> +where + 'data: 'file, +{ + type Segment = ElfSegment<'data, 'file>; + type SegmentIterator = ElfSegmentIterator<'data, 'file>; + type Section = ElfSection<'data, 'file>; + type SectionIterator = ElfSectionIterator<'data, 'file>; + type SymbolIterator = ElfSymbolIterator<'data, 'file>; + + fn architecture(&self) -> Architecture { + match self.elf.header.e_machine { + elf::header::EM_ARM => Architecture::Arm, + elf::header::EM_AARCH64 => Architecture::Aarch64, + elf::header::EM_386 => Architecture::I386, + elf::header::EM_X86_64 => Architecture::X86_64, + elf::header::EM_MIPS => Architecture::Mips, + _ => Architecture::Unknown, + } + } + + #[inline] + fn is_little_endian(&self) -> bool { + self.elf.little_endian + } + + #[inline] + fn is_64(&self) -> bool { + self.elf.is_64 + } + + fn segments(&'file self) -> ElfSegmentIterator<'data, 'file> { + ElfSegmentIterator { + file: self, + iter: self.elf.program_headers.iter(), + } + } + + fn section_by_name(&'file self, section_name: &str) -> Option> { + self.raw_section_by_name(section_name) + .or_else(|| self.zdebug_section_by_name(section_name)) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Option> { + self.elf + .section_headers + .get(index.0) + .map(|section| ElfSection { + file: self, + index, + section, + }) + } + + fn sections(&'file self) -> ElfSectionIterator<'data, 'file> { + ElfSectionIterator { + file: self, + iter: self.elf.section_headers.iter().enumerate(), + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Option> { + self.elf + .syms + .get(index.0) + .map(|symbol| parse_symbol(index.0, &symbol, &self.elf.strtab)) + } + + fn symbols(&'file self) -> ElfSymbolIterator<'data, 'file> { + ElfSymbolIterator { + strtab: &self.elf.strtab, + symbols: self.elf.syms.iter().enumerate(), + } + } + + fn dynamic_symbols(&'file self) -> ElfSymbolIterator<'data, 'file> { + ElfSymbolIterator { + strtab: &self.elf.dynstrtab, + symbols: self.elf.dynsyms.iter().enumerate(), + } + } + + fn symbol_map(&self) -> SymbolMap<'data> { + let mut symbols: Vec<_> = self + .symbols() + .map(|(_, s)| s) + .filter(SymbolMap::filter) + .collect(); + symbols.sort_by_key(|x| x.address); + SymbolMap { symbols } + } + + fn has_debug_symbols(&self) -> bool { + for header in &self.elf.section_headers { + if let Some(Ok(name)) = self.elf.shdr_strtab.get(header.sh_name) { + if name == ".debug_info" || name == ".zdebug_info" { + return true; + } + } + } + false + } + + fn build_id(&self) -> Option<&'data [u8]> { + if let Some(mut notes) = self.elf.iter_note_headers(self.data) { + while let Some(Ok(note)) = notes.next() { + if note.n_type == elf::note::NT_GNU_BUILD_ID { + return Some(note.desc); + } + } + } + if let Some(mut notes) = self + .elf + .iter_note_sections(self.data, Some(".note.gnu.build-id")) + { + while let Some(Ok(note)) = notes.next() { + if note.n_type == elf::note::NT_GNU_BUILD_ID { + return Some(note.desc); + } + } + } + None + } + + fn gnu_debuglink(&self) -> Option<(&'data [u8], u32)> { + if let Some(Cow::Borrowed(data)) = self.section_data_by_name(".gnu_debuglink") { + if let Some(filename_len) = data.iter().position(|x| *x == 0) { + let filename = &data[..filename_len]; + // Round to 4 byte alignment after null terminator. + let offset = (filename_len + 1 + 3) & !3; + if offset + 4 <= data.len() { + let endian = if self.is_little_endian() { + scroll::LE + } else { + scroll::BE + }; + let crc: u32 = data.pread_with(offset, endian).unwrap(); + return Some((filename, crc)); + } + } + } + None + } + + fn entry(&self) -> u64 { + self.elf.entry + } +} + +/// An iterator over the segments of an `ElfFile`. +#[derive(Debug)] +pub struct ElfSegmentIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file ElfFile<'data>, + iter: slice::Iter<'file, elf::ProgramHeader>, +} + +impl<'data, 'file> Iterator for ElfSegmentIterator<'data, 'file> { + type Item = ElfSegment<'data, 'file>; + + fn next(&mut self) -> Option { + while let Some(segment) = self.iter.next() { + if segment.p_type == elf::program_header::PT_LOAD { + return Some(ElfSegment { + file: self.file, + segment, + }); + } + } + None + } +} + +/// A segment of an `ElfFile`. +#[derive(Debug)] +pub struct ElfSegment<'data, 'file> +where + 'data: 'file, +{ + file: &'file ElfFile<'data>, + segment: &'file elf::ProgramHeader, +} + +impl<'data, 'file> ObjectSegment<'data> for ElfSegment<'data, 'file> { + #[inline] + fn address(&self) -> u64 { + self.segment.p_vaddr + } + + #[inline] + fn size(&self) -> u64 { + self.segment.p_memsz + } + + #[inline] + fn align(&self) -> u64 { + self.segment.p_align + } + + fn data(&self) -> &'data [u8] { + &self.file.data[self.segment.p_offset as usize..][..self.segment.p_filesz as usize] + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.data(), self.address(), address, size) + } + + #[inline] + fn name(&self) -> Option<&str> { + None + } +} + +/// An iterator over the sections of an `ElfFile`. +#[derive(Debug)] +pub struct ElfSectionIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file ElfFile<'data>, + iter: iter::Enumerate>, +} + +impl<'data, 'file> Iterator for ElfSectionIterator<'data, 'file> { + type Item = ElfSection<'data, 'file>; + + fn next(&mut self) -> Option { + self.iter.next().map(|(index, section)| ElfSection { + index: SectionIndex(index), + file: self.file, + section, + }) + } +} + +/// A section of an `ElfFile`. +#[derive(Debug)] +pub struct ElfSection<'data, 'file> +where + 'data: 'file, +{ + file: &'file ElfFile<'data>, + index: SectionIndex, + section: &'file elf::SectionHeader, +} + +impl<'data, 'file> ElfSection<'data, 'file> { + fn raw_data(&self) -> &'data [u8] { + if self.section.sh_type == elf::section_header::SHT_NOBITS { + &[] + } else { + &self.file.data[self.section.sh_offset as usize..][..self.section.sh_size as usize] + } + } + + #[cfg(feature = "compression")] + fn maybe_decompress_data(&self) -> Option> { + if (self.section.sh_flags & u64::from(elf::section_header::SHF_COMPRESSED)) == 0 { + return None; + } + + let container = match self.file.elf.header.container() { + Ok(c) => c, + Err(_) => return None, + }; + let endianness = match self.file.elf.header.endianness() { + Ok(e) => e, + Err(_) => return None, + }; + let ctx = container::Ctx::new(container, endianness); + let data = self.raw_data(); + let (compression_type, uncompressed_size, compressed_data) = + match elf::compression_header::CompressionHeader::try_from_ctx(&data, ctx) { + Ok((chdr, size)) => (chdr.ch_type, chdr.ch_size, &data[size..]), + Err(_) => return None, + }; + if compression_type != elf::compression_header::ELFCOMPRESS_ZLIB { + return None; + } + + let mut decompressed = Vec::with_capacity(uncompressed_size as usize); + let mut decompress = Decompress::new(true); + if decompress + .decompress_vec(compressed_data, &mut decompressed, FlushDecompress::Finish) + .is_err() + { + return None; + } + Some(Cow::Owned(decompressed)) + } + + /// Try GNU-style "ZLIB" header decompression. + #[cfg(feature = "compression")] + fn maybe_decompress_data_gnu(&self) -> Option> { + let name = match self.name() { + Some(name) => name, + None => return None, + }; + if !name.starts_with(".zdebug_") { + return None; + } + let data = self.raw_data(); + // Assume ZLIB-style uncompressed data is no more than 4GB to avoid accidentally + // huge allocations. This also reduces the chance of accidentally matching on a + // .debug_str that happens to start with "ZLIB". + if data.len() < 12 || &data[..8] != b"ZLIB\0\0\0\0" { + return None; + } + let uncompressed_size: u32 = data.pread_with(8, scroll::BE).unwrap(); + let mut decompressed = Vec::with_capacity(uncompressed_size as usize); + let mut decompress = Decompress::new(true); + if decompress + .decompress_vec(&data[12..], &mut decompressed, FlushDecompress::Finish) + .is_err() + { + return None; + } + Some(Cow::Owned(decompressed)) + } +} + +impl<'data, 'file> ObjectSection<'data> for ElfSection<'data, 'file> { + type RelocationIterator = ElfRelocationIterator<'data, 'file>; + + #[inline] + fn index(&self) -> SectionIndex { + self.index + } + + #[inline] + fn address(&self) -> u64 { + self.section.sh_addr + } + + #[inline] + fn size(&self) -> u64 { + self.section.sh_size + } + + #[inline] + fn align(&self) -> u64 { + self.section.sh_addralign + } + + #[inline] + fn data(&self) -> Cow<'data, [u8]> { + Cow::from(self.raw_data()) + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.raw_data(), self.address(), address, size) + } + + #[cfg(feature = "compression")] + fn uncompressed_data(&self) -> Cow<'data, [u8]> { + self.maybe_decompress_data() + .or_else(|| self.maybe_decompress_data_gnu()) + .unwrap_or_else(|| self.data()) + } + + #[cfg(not(feature = "compression"))] + #[inline] + fn uncompressed_data(&self) -> Cow<'data, [u8]> { + self.data() + } + + fn name(&self) -> Option<&str> { + self.file + .elf + .shdr_strtab + .get(self.section.sh_name) + .and_then(Result::ok) + } + + #[inline] + fn segment_name(&self) -> Option<&str> { + None + } + + fn kind(&self) -> SectionKind { + match self.section.sh_type { + elf::section_header::SHT_PROGBITS => { + if self.section.sh_flags & u64::from(elf::section_header::SHF_ALLOC) != 0 { + if self.section.sh_flags & u64::from(elf::section_header::SHF_EXECINSTR) != 0 { + SectionKind::Text + } else if self.section.sh_flags & u64::from(elf::section_header::SHF_TLS) != 0 { + SectionKind::Tls + } else if self.section.sh_flags & u64::from(elf::section_header::SHF_WRITE) != 0 + { + SectionKind::Data + } else if self.section.sh_flags & u64::from(elf::section_header::SHF_STRINGS) + != 0 + { + SectionKind::ReadOnlyString + } else { + SectionKind::ReadOnlyData + } + } else if self.section.sh_flags & u64::from(elf::section_header::SHF_STRINGS) != 0 { + SectionKind::OtherString + } else { + SectionKind::Other + } + } + elf::section_header::SHT_NOBITS => { + if self.section.sh_flags & u64::from(elf::section_header::SHF_TLS) != 0 { + SectionKind::UninitializedTls + } else { + SectionKind::UninitializedData + } + } + elf::section_header::SHT_NULL + | elf::section_header::SHT_SYMTAB + | elf::section_header::SHT_STRTAB + | elf::section_header::SHT_RELA + | elf::section_header::SHT_HASH + | elf::section_header::SHT_DYNAMIC + | elf::section_header::SHT_REL + | elf::section_header::SHT_DYNSYM => SectionKind::Metadata, + _ => { + // TODO: maybe add more specialised kinds based on sh_type (e.g. Unwind) + SectionKind::Unknown + } + } + } + + fn relocations(&self) -> ElfRelocationIterator<'data, 'file> { + ElfRelocationIterator { + section_index: self.index, + file: self.file, + sections: self.file.elf.shdr_relocs.iter(), + relocations: None, + } + } +} + +/// An iterator over the symbols of an `ElfFile`. +pub struct ElfSymbolIterator<'data, 'file> +where + 'data: 'file, +{ + strtab: &'file strtab::Strtab<'data>, + symbols: iter::Enumerate>, +} + +impl<'data, 'file> fmt::Debug for ElfSymbolIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ElfSymbolIterator").finish() + } +} + +impl<'data, 'file> Iterator for ElfSymbolIterator<'data, 'file> { + type Item = (SymbolIndex, Symbol<'data>); + + fn next(&mut self) -> Option { + self.symbols.next().map(|(index, symbol)| { + ( + SymbolIndex(index), + parse_symbol(index, &symbol, self.strtab), + ) + }) + } +} + +fn parse_symbol<'data>( + index: usize, + symbol: &elf::sym::Sym, + strtab: &strtab::Strtab<'data>, +) -> Symbol<'data> { + let name = strtab.get(symbol.st_name).and_then(Result::ok); + let kind = match elf::sym::st_type(symbol.st_info) { + elf::sym::STT_NOTYPE if index == 0 => SymbolKind::Null, + elf::sym::STT_OBJECT => SymbolKind::Data, + elf::sym::STT_FUNC => SymbolKind::Text, + elf::sym::STT_SECTION => SymbolKind::Section, + elf::sym::STT_FILE => SymbolKind::File, + elf::sym::STT_COMMON => SymbolKind::Common, + elf::sym::STT_TLS => SymbolKind::Tls, + _ => SymbolKind::Unknown, + }; + let undefined = symbol.st_shndx == elf::section_header::SHN_UNDEF as usize; + let section_index = + if undefined || symbol.st_shndx >= elf::section_header::SHN_LORESERVE as usize { + None + } else { + Some(SectionIndex(symbol.st_shndx)) + }; + let weak = symbol.st_bind() == elf::sym::STB_WEAK; + let scope = match symbol.st_bind() { + _ if undefined => SymbolScope::Unknown, + elf::sym::STB_LOCAL => SymbolScope::Compilation, + elf::sym::STB_GLOBAL | elf::sym::STB_WEAK => { + if symbol.st_visibility() == elf::sym::STV_HIDDEN { + SymbolScope::Linkage + } else { + SymbolScope::Dynamic + } + } + _ => SymbolScope::Unknown, + }; + Symbol { + name, + address: symbol.st_value, + size: symbol.st_size, + kind, + section_index, + undefined, + weak, + scope, + } +} + +/// An iterator over the relocations in an `ElfSection`. +pub struct ElfRelocationIterator<'data, 'file> +where + 'data: 'file, +{ + /// The index of the section that the relocations apply to. + section_index: SectionIndex, + file: &'file ElfFile<'data>, + sections: slice::Iter<'file, (elf::ShdrIdx, elf::RelocSection<'data>)>, + relocations: Option>, +} + +impl<'data, 'file> Iterator for ElfRelocationIterator<'data, 'file> { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option { + loop { + if let Some(ref mut relocations) = self.relocations { + if let Some(reloc) = relocations.next() { + let mut encoding = RelocationEncoding::Generic; + let (kind, size) = match self.file.elf.header.e_machine { + elf::header::EM_ARM => match reloc.r_type { + elf::reloc::R_ARM_ABS32 => (RelocationKind::Absolute, 32), + _ => (RelocationKind::Other(reloc.r_type), 0), + }, + elf::header::EM_AARCH64 => match reloc.r_type { + elf::reloc::R_AARCH64_ABS64 => (RelocationKind::Absolute, 64), + elf::reloc::R_AARCH64_ABS32 => (RelocationKind::Absolute, 32), + elf::reloc::R_AARCH64_ABS16 => (RelocationKind::Absolute, 16), + elf::reloc::R_AARCH64_PREL64 => (RelocationKind::Relative, 64), + elf::reloc::R_AARCH64_PREL32 => (RelocationKind::Relative, 32), + elf::reloc::R_AARCH64_PREL16 => (RelocationKind::Relative, 16), + _ => (RelocationKind::Other(reloc.r_type), 0), + }, + elf::header::EM_386 => match reloc.r_type { + elf::reloc::R_386_32 => (RelocationKind::Absolute, 32), + elf::reloc::R_386_PC32 => (RelocationKind::Relative, 32), + elf::reloc::R_386_GOT32 => (RelocationKind::Got, 32), + elf::reloc::R_386_PLT32 => (RelocationKind::PltRelative, 32), + elf::reloc::R_386_GOTOFF => (RelocationKind::GotBaseOffset, 32), + elf::reloc::R_386_GOTPC => (RelocationKind::GotBaseRelative, 32), + elf::reloc::R_386_16 => (RelocationKind::Absolute, 16), + elf::reloc::R_386_PC16 => (RelocationKind::Relative, 16), + elf::reloc::R_386_8 => (RelocationKind::Absolute, 8), + elf::reloc::R_386_PC8 => (RelocationKind::Relative, 8), + _ => (RelocationKind::Other(reloc.r_type), 0), + }, + elf::header::EM_X86_64 => match reloc.r_type { + elf::reloc::R_X86_64_64 => (RelocationKind::Absolute, 64), + elf::reloc::R_X86_64_PC32 => (RelocationKind::Relative, 32), + elf::reloc::R_X86_64_GOT32 => (RelocationKind::Got, 32), + elf::reloc::R_X86_64_PLT32 => (RelocationKind::PltRelative, 32), + elf::reloc::R_X86_64_GOTPCREL => (RelocationKind::GotRelative, 32), + elf::reloc::R_X86_64_32 => (RelocationKind::Absolute, 32), + elf::reloc::R_X86_64_32S => { + encoding = RelocationEncoding::X86Signed; + (RelocationKind::Absolute, 32) + } + elf::reloc::R_X86_64_16 => (RelocationKind::Absolute, 16), + elf::reloc::R_X86_64_PC16 => (RelocationKind::Relative, 16), + elf::reloc::R_X86_64_8 => (RelocationKind::Absolute, 8), + elf::reloc::R_X86_64_PC8 => (RelocationKind::Relative, 8), + _ => (RelocationKind::Other(reloc.r_type), 0), + }, + _ => (RelocationKind::Other(reloc.r_type), 0), + }; + let target = RelocationTarget::Symbol(SymbolIndex(reloc.r_sym as usize)); + return Some(( + reloc.r_offset, + Relocation { + kind, + encoding, + size, + target, + addend: reloc.r_addend.unwrap_or(0), + implicit_addend: reloc.r_addend.is_none(), + }, + )); + } + } + match self.sections.next() { + None => return None, + Some((index, relocs)) => { + let section = &self.file.elf.section_headers[*index]; + if section.sh_info as usize == self.section_index.0 { + self.relocations = Some(relocs.into_iter()); + } + // TODO: do we need to return section.sh_link? + } + } + } + } +} + +impl<'data, 'file> fmt::Debug for ElfRelocationIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ElfRelocationIterator").finish() + } +} diff --git a/third_party/rust/object/src/read/macho.rs b/third_party/rust/object/src/read/macho.rs new file mode 100644 index 000000000000..8fe8a508a714 --- /dev/null +++ b/third_party/rust/object/src/read/macho.rs @@ -0,0 +1,589 @@ +use crate::alloc::borrow::Cow; +use crate::alloc::vec::Vec; +use goblin::container; +use goblin::mach; +use goblin::mach::load_command::CommandVariant; +use std::{fmt, iter, ops, slice}; +use target_lexicon::Architecture; +use uuid::Uuid; + +use crate::read::{ + self, Object, ObjectSection, ObjectSegment, Relocation, RelocationEncoding, RelocationKind, + RelocationTarget, SectionIndex, SectionKind, Symbol, SymbolIndex, SymbolKind, SymbolMap, + SymbolScope, +}; + +/// A Mach-O object file. +#[derive(Debug)] +pub struct MachOFile<'data> { + macho: mach::MachO<'data>, + data: &'data [u8], + ctx: container::Ctx, + sections: Vec>, +} + +impl<'data> MachOFile<'data> { + /// Get the Mach-O headers of the file. + // TODO: this is temporary to allow access to features this crate doesn't provide yet + #[inline] + pub fn macho(&self) -> &mach::MachO<'data> { + &self.macho + } + + /// Parse the raw Mach-O file data. + pub fn parse(data: &'data [u8]) -> Result { + let (_magic, ctx) = + mach::parse_magic_and_ctx(data, 0).map_err(|_| "Could not parse Mach-O magic")?; + let ctx = ctx.ok_or("Invalid Mach-O magic")?; + let macho = mach::MachO::parse(data, 0).map_err(|_| "Could not parse Mach-O header")?; + // Build a list of sections to make some operations more efficient. + let mut sections = Vec::new(); + 'segments: for segment in &macho.segments { + for section_result in segment { + if let Ok((section, data)) = section_result { + sections.push(MachOSectionInternal::parse(section, data)); + } else { + break 'segments; + } + } + } + Ok(MachOFile { + macho, + data, + ctx, + sections, + }) + } + + /// Return the section at the given index. + #[inline] + fn section_internal(&self, index: SectionIndex) -> Option<&MachOSectionInternal<'data>> { + index + .0 + .checked_sub(1) + .and_then(|index| self.sections.get(index)) + } +} + +impl<'data, 'file> Object<'data, 'file> for MachOFile<'data> +where + 'data: 'file, +{ + type Segment = MachOSegment<'data, 'file>; + type SegmentIterator = MachOSegmentIterator<'data, 'file>; + type Section = MachOSection<'data, 'file>; + type SectionIterator = MachOSectionIterator<'data, 'file>; + type SymbolIterator = MachOSymbolIterator<'data, 'file>; + + fn architecture(&self) -> Architecture { + match self.macho.header.cputype { + mach::cputype::CPU_TYPE_ARM => Architecture::Arm, + mach::cputype::CPU_TYPE_ARM64 => Architecture::Aarch64, + mach::cputype::CPU_TYPE_X86 => Architecture::I386, + mach::cputype::CPU_TYPE_X86_64 => Architecture::X86_64, + mach::cputype::CPU_TYPE_MIPS => Architecture::Mips, + _ => Architecture::Unknown, + } + } + + #[inline] + fn is_little_endian(&self) -> bool { + self.macho.little_endian + } + + #[inline] + fn is_64(&self) -> bool { + self.macho.is_64 + } + + fn segments(&'file self) -> MachOSegmentIterator<'data, 'file> { + MachOSegmentIterator { + segments: self.macho.segments.iter(), + } + } + + fn section_by_name(&'file self, section_name: &str) -> Option> { + // Translate the "." prefix to the "__" prefix used by OSX/Mach-O, eg + // ".debug_info" to "__debug_info". + let system_section = section_name.starts_with('.'); + let cmp_section_name = |section: &MachOSection| { + section + .name() + .map(|name| { + section_name == name + || (system_section + && name.starts_with("__") + && §ion_name[1..] == &name[2..]) + }) + .unwrap_or(false) + }; + + self.sections().find(cmp_section_name) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Option> { + self.section_internal(index) + .map(|_| MachOSection { file: self, index }) + } + + fn sections(&'file self) -> MachOSectionIterator<'data, 'file> { + MachOSectionIterator { + file: self, + iter: 0..self.sections.len(), + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Option> { + self.macho + .symbols + .as_ref() + .and_then(|symbols| symbols.get(index.0).ok()) + .and_then(|(name, nlist)| parse_symbol(self, name, &nlist)) + } + + fn symbols(&'file self) -> MachOSymbolIterator<'data, 'file> { + let symbols = match self.macho.symbols { + Some(ref symbols) => symbols.into_iter(), + None => mach::symbols::SymbolIterator::default(), + } + .enumerate(); + + MachOSymbolIterator { + file: self, + symbols, + } + } + + fn dynamic_symbols(&'file self) -> MachOSymbolIterator<'data, 'file> { + // The LC_DYSYMTAB command contains indices into the same symbol + // table as the LC_SYMTAB command, so return all of them. + self.symbols() + } + + fn symbol_map(&self) -> SymbolMap<'data> { + let mut symbols: Vec<_> = self.symbols().map(|(_, s)| s).collect(); + + // Add symbols for the end of each section. + for section in self.sections() { + symbols.push(Symbol { + name: None, + address: section.address() + section.size(), + size: 0, + kind: SymbolKind::Section, + section_index: None, + undefined: false, + weak: false, + scope: SymbolScope::Compilation, + }); + } + + // Calculate symbol sizes by sorting and finding the next symbol. + symbols.sort_by(|a, b| { + a.address.cmp(&b.address).then_with(|| { + // Place the end of section symbols last. + (a.kind == SymbolKind::Section).cmp(&(b.kind == SymbolKind::Section)) + }) + }); + + for i in 0..symbols.len() { + let (before, after) = symbols.split_at_mut(i + 1); + let symbol = &mut before[i]; + if symbol.kind != SymbolKind::Section { + if let Some(next) = after + .iter() + .skip_while(|x| x.kind != SymbolKind::Section && x.address == symbol.address) + .next() + { + symbol.size = next.address - symbol.address; + } + } + } + + symbols.retain(SymbolMap::filter); + SymbolMap { symbols } + } + + fn has_debug_symbols(&self) -> bool { + self.section_data_by_name(".debug_info").is_some() + } + + fn mach_uuid(&self) -> Option { + // Return the UUID from the `LC_UUID` load command, if one is present. + self.macho + .load_commands + .iter() + .filter_map(|lc| { + match lc.command { + CommandVariant::Uuid(ref cmd) => { + //TODO: Uuid should have a `from_array` method that can't fail. + Uuid::from_slice(&cmd.uuid).ok() + } + _ => None, + } + }) + .nth(0) + } + + fn entry(&self) -> u64 { + self.macho.entry + } +} + +/// An iterator over the segments of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSegmentIterator<'data, 'file> +where + 'data: 'file, +{ + segments: slice::Iter<'file, mach::segment::Segment<'data>>, +} + +impl<'data, 'file> Iterator for MachOSegmentIterator<'data, 'file> { + type Item = MachOSegment<'data, 'file>; + + fn next(&mut self) -> Option { + self.segments.next().map(|segment| MachOSegment { segment }) + } +} + +/// A segment of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSegment<'data, 'file> +where + 'data: 'file, +{ + segment: &'file mach::segment::Segment<'data>, +} + +impl<'data, 'file> ObjectSegment<'data> for MachOSegment<'data, 'file> { + #[inline] + fn address(&self) -> u64 { + self.segment.vmaddr + } + + #[inline] + fn size(&self) -> u64 { + self.segment.vmsize + } + + #[inline] + fn align(&self) -> u64 { + // Page size. + 0x1000 + } + + #[inline] + fn data(&self) -> &'data [u8] { + self.segment.data + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.data(), self.address(), address, size) + } + + #[inline] + fn name(&self) -> Option<&str> { + self.segment.name().ok() + } +} + +/// An iterator over the sections of a `MachOFile`. +pub struct MachOSectionIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file MachOFile<'data>, + iter: ops::Range, +} + +impl<'data, 'file> fmt::Debug for MachOSectionIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // It's painful to do much better than this + f.debug_struct("MachOSectionIterator").finish() + } +} + +impl<'data, 'file> Iterator for MachOSectionIterator<'data, 'file> { + type Item = MachOSection<'data, 'file>; + + fn next(&mut self) -> Option { + self.iter.next().map(|index| MachOSection { + file: self.file, + index: SectionIndex(index + 1), + }) + } +} + +/// A section of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSection<'data, 'file> +where + 'data: 'file, +{ + file: &'file MachOFile<'data>, + index: SectionIndex, +} + +impl<'data, 'file> MachOSection<'data, 'file> { + #[inline] + fn internal(&self) -> &'file MachOSectionInternal<'data> { + // We ensure the index is always valid. + &self.file.section_internal(self.index).unwrap() + } +} + +impl<'data, 'file> ObjectSection<'data> for MachOSection<'data, 'file> { + type RelocationIterator = MachORelocationIterator<'data, 'file>; + + #[inline] + fn index(&self) -> SectionIndex { + self.index + } + + #[inline] + fn address(&self) -> u64 { + self.internal().section.addr + } + + #[inline] + fn size(&self) -> u64 { + self.internal().section.size + } + + #[inline] + fn align(&self) -> u64 { + 1 << self.internal().section.align + } + + #[inline] + fn data(&self) -> Cow<'data, [u8]> { + Cow::from(self.internal().data) + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.internal().data, self.address(), address, size) + } + + #[inline] + fn uncompressed_data(&self) -> Cow<'data, [u8]> { + // TODO: does MachO support compression? + self.data() + } + + #[inline] + fn name(&self) -> Option<&str> { + self.internal().section.name().ok() + } + + #[inline] + fn segment_name(&self) -> Option<&str> { + self.internal().section.segname().ok() + } + + fn kind(&self) -> SectionKind { + self.internal().kind + } + + fn relocations(&self) -> MachORelocationIterator<'data, 'file> { + MachORelocationIterator { + file: self.file, + relocations: self + .internal() + .section + .iter_relocations(self.file.data, self.file.ctx), + } + } +} + +#[derive(Debug)] +struct MachOSectionInternal<'data> { + section: mach::segment::Section, + data: mach::segment::SectionData<'data>, + kind: SectionKind, +} + +impl<'data> MachOSectionInternal<'data> { + fn parse(section: mach::segment::Section, data: mach::segment::SectionData<'data>) -> Self { + let kind = if let (Ok(segname), Ok(name)) = (section.segname(), section.name()) { + match (segname, name) { + ("__TEXT", "__text") => SectionKind::Text, + ("__TEXT", "__const") => SectionKind::ReadOnlyData, + ("__TEXT", "__cstring") => SectionKind::ReadOnlyString, + ("__TEXT", "__eh_frame") => SectionKind::ReadOnlyData, + ("__TEXT", "__gcc_except_tab") => SectionKind::ReadOnlyData, + ("__DATA", "__data") => SectionKind::Data, + ("__DATA", "__const") => SectionKind::ReadOnlyData, + ("__DATA", "__bss") => SectionKind::UninitializedData, + ("__DATA", "__thread_data") => SectionKind::Tls, + ("__DATA", "__thread_bss") => SectionKind::UninitializedTls, + ("__DATA", "__thread_vars") => SectionKind::TlsVariables, + ("__DWARF", _) => SectionKind::Debug, + _ => SectionKind::Unknown, + } + } else { + SectionKind::Unknown + }; + MachOSectionInternal { + section, + data, + kind, + } + } +} + +/// An iterator over the symbols of a `MachOFile`. +pub struct MachOSymbolIterator<'data, 'file> { + file: &'file MachOFile<'data>, + symbols: iter::Enumerate>, +} + +impl<'data, 'file> fmt::Debug for MachOSymbolIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("MachOSymbolIterator").finish() + } +} + +impl<'data, 'file> Iterator for MachOSymbolIterator<'data, 'file> { + type Item = (SymbolIndex, Symbol<'data>); + + fn next(&mut self) -> Option { + while let Some((index, Ok((name, nlist)))) = self.symbols.next() { + if let Some(symbol) = parse_symbol(self.file, name, &nlist) { + return Some((SymbolIndex(index), symbol)); + } + } + None + } +} + +fn parse_symbol<'data>( + file: &MachOFile<'data>, + name: &'data str, + nlist: &mach::symbols::Nlist, +) -> Option> { + if nlist.n_type & mach::symbols::N_STAB != 0 { + return None; + } + let n_type = nlist.n_type & mach::symbols::NLIST_TYPE_MASK; + let section_index = if n_type == mach::symbols::N_SECT { + if nlist.n_sect == 0 { + None + } else { + Some(SectionIndex(nlist.n_sect)) + } + } else { + // TODO: better handling for other n_type values + None + }; + let kind = section_index + .and_then(|index| file.section_internal(index)) + .map(|section| match section.kind { + SectionKind::Text => SymbolKind::Text, + SectionKind::Data + | SectionKind::ReadOnlyData + | SectionKind::ReadOnlyString + | SectionKind::UninitializedData => SymbolKind::Data, + SectionKind::Tls | SectionKind::UninitializedTls | SectionKind::TlsVariables => { + SymbolKind::Tls + } + _ => SymbolKind::Unknown, + }) + .unwrap_or(SymbolKind::Unknown); + let undefined = nlist.is_undefined(); + let weak = nlist.is_weak(); + let scope = if undefined { + SymbolScope::Unknown + } else if nlist.n_type & mach::symbols::N_EXT == 0 { + SymbolScope::Compilation + } else if nlist.n_type & mach::symbols::N_PEXT != 0 { + SymbolScope::Linkage + } else { + SymbolScope::Dynamic + }; + Some(Symbol { + name: Some(name), + address: nlist.n_value, + // Only calculated for symbol maps + size: 0, + kind, + section_index, + undefined, + weak, + scope, + }) +} + +/// An iterator over the relocations in an `MachOSection`. +pub struct MachORelocationIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file MachOFile<'data>, + relocations: mach::segment::RelocationIterator<'data>, +} + +impl<'data, 'file> Iterator for MachORelocationIterator<'data, 'file> { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option { + self.relocations.next()?.ok().map(|reloc| { + let mut encoding = RelocationEncoding::Generic; + let kind = match self.file.macho.header.cputype { + mach::cputype::CPU_TYPE_ARM => match reloc.r_type() { + mach::relocation::ARM_RELOC_VANILLA => RelocationKind::Absolute, + _ => RelocationKind::Other(reloc.r_info), + }, + mach::cputype::CPU_TYPE_ARM64 => match reloc.r_type() { + mach::relocation::ARM64_RELOC_UNSIGNED => RelocationKind::Absolute, + _ => RelocationKind::Other(reloc.r_info), + }, + mach::cputype::CPU_TYPE_X86 => match reloc.r_type() { + mach::relocation::GENERIC_RELOC_VANILLA => RelocationKind::Absolute, + _ => RelocationKind::Other(reloc.r_info), + }, + mach::cputype::CPU_TYPE_X86_64 => match reloc.r_type() { + mach::relocation::X86_64_RELOC_UNSIGNED => RelocationKind::Absolute, + mach::relocation::X86_64_RELOC_SIGNED => { + encoding = RelocationEncoding::X86RipRelative; + RelocationKind::Relative + } + mach::relocation::X86_64_RELOC_BRANCH => { + encoding = RelocationEncoding::X86Branch; + RelocationKind::Relative + } + mach::relocation::X86_64_RELOC_GOT => RelocationKind::GotRelative, + mach::relocation::X86_64_RELOC_GOT_LOAD => { + encoding = RelocationEncoding::X86RipRelativeMovq; + RelocationKind::GotRelative + } + _ => RelocationKind::Other(reloc.r_info), + }, + _ => RelocationKind::Other(reloc.r_info), + }; + let size = 8 << reloc.r_length(); + let target = if reloc.is_extern() { + RelocationTarget::Symbol(SymbolIndex(reloc.r_symbolnum())) + } else { + RelocationTarget::Section(SectionIndex(reloc.r_symbolnum())) + }; + let addend = if reloc.r_pcrel() != 0 { -4 } else { 0 }; + ( + reloc.r_address as u64, + Relocation { + kind, + encoding, + size, + target, + addend, + implicit_addend: true, + }, + ) + }) + } +} + +impl<'data, 'file> fmt::Debug for MachORelocationIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("MachORelocationIterator").finish() + } +} diff --git a/third_party/rust/object/src/read/mod.rs b/third_party/rust/object/src/read/mod.rs new file mode 100644 index 000000000000..c120db9a834a --- /dev/null +++ b/third_party/rust/object/src/read/mod.rs @@ -0,0 +1,250 @@ +//! Interface for reading object files. + +use crate::alloc::vec::Vec; +use crate::common::{RelocationEncoding, RelocationKind, SectionKind, SymbolKind, SymbolScope}; + +mod any; +pub use any::*; + +mod coff; +pub use coff::*; + +mod elf; +pub use elf::*; + +mod macho; +pub use macho::*; + +mod pe; +pub use pe::*; + +mod traits; +pub use traits::*; + +#[cfg(feature = "wasm")] +mod wasm; +#[cfg(feature = "wasm")] +pub use wasm::*; + +/// The native object file for the target platform. +#[cfg(target_os = "linux")] +pub type NativeFile<'data> = ElfFile<'data>; + +/// The native object file for the target platform. +#[cfg(target_os = "macos")] +pub type NativeFile<'data> = MachOFile<'data>; + +/// The native object file for the target platform. +#[cfg(target_os = "windows")] +pub type NativeFile<'data> = PeFile<'data>; + +/// The native object file for the target platform. +#[cfg(all(feature = "wasm", target_arch = "wasm32"))] +pub type NativeFile<'data> = WasmFile<'data>; + +/// The index used to identify a section of a file. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SectionIndex(pub usize); + +/// The index used to identify a symbol of a file. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SymbolIndex(pub usize); + +/// A symbol table entry. +#[derive(Debug)] +pub struct Symbol<'data> { + name: Option<&'data str>, + address: u64, + size: u64, + kind: SymbolKind, + section_index: Option, + undefined: bool, + weak: bool, + scope: SymbolScope, +} + +impl<'data> Symbol<'data> { + /// Return the kind of this symbol. + #[inline] + pub fn kind(&self) -> SymbolKind { + self.kind + } + + /// Returns the section index for the section containing this symbol. + /// + /// May return `None` if the section is unknown or the symbol is undefined. + #[inline] + pub fn section_index(&self) -> Option { + self.section_index + } + + /// Return true if the symbol is undefined. + #[inline] + pub fn is_undefined(&self) -> bool { + self.undefined + } + + /// Return true if the symbol is weak. + #[inline] + pub fn is_weak(&self) -> bool { + self.weak + } + + /// Return true if the symbol visible outside of the compilation unit. + /// + /// This treats `SymbolScope::Unknown` as global. + #[inline] + pub fn is_global(&self) -> bool { + !self.is_local() + } + + /// Return true if the symbol is only visible within the compilation unit. + #[inline] + pub fn is_local(&self) -> bool { + self.scope == SymbolScope::Compilation + } + + /// Returns the symbol scope. + #[inline] + pub fn scope(&self) -> SymbolScope { + self.scope + } + + /// The name of the symbol. + #[inline] + pub fn name(&self) -> Option<&'data str> { + self.name + } + + /// The address of the symbol. May be zero if the address is unknown. + #[inline] + pub fn address(&self) -> u64 { + self.address + } + + /// The size of the symbol. May be zero if the size is unknown. + #[inline] + pub fn size(&self) -> u64 { + self.size + } +} + +/// A map from addresses to symbols. +#[derive(Debug)] +pub struct SymbolMap<'data> { + symbols: Vec>, +} + +impl<'data> SymbolMap<'data> { + /// Get the symbol containing the given address. + pub fn get(&self, address: u64) -> Option<&Symbol<'data>> { + self.symbols + .binary_search_by(|symbol| { + if address < symbol.address { + std::cmp::Ordering::Greater + } else if address < symbol.address + symbol.size { + std::cmp::Ordering::Equal + } else { + std::cmp::Ordering::Less + } + }) + .ok() + .and_then(|index| self.symbols.get(index)) + } + + /// Get all symbols in the map. + pub fn symbols(&self) -> &[Symbol<'data>] { + &self.symbols + } + + /// Return true for symbols that should be included in the map. + fn filter(symbol: &Symbol<'_>) -> bool { + match symbol.kind() { + SymbolKind::Unknown | SymbolKind::Text | SymbolKind::Data => {} + SymbolKind::Null + | SymbolKind::Section + | SymbolKind::File + | SymbolKind::Label + | SymbolKind::Common + | SymbolKind::Tls => { + return false; + } + } + !symbol.is_undefined() && symbol.size() > 0 + } +} + +/// The target referenced by a relocation. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum RelocationTarget { + /// The target is a symbol. + Symbol(SymbolIndex), + /// The target is a section. + Section(SectionIndex), +} + +/// A relocation entry. +#[derive(Debug)] +pub struct Relocation { + kind: RelocationKind, + encoding: RelocationEncoding, + size: u8, + target: RelocationTarget, + addend: i64, + implicit_addend: bool, +} + +impl Relocation { + /// The operation used to calculate the result of the relocation. + #[inline] + pub fn kind(&self) -> RelocationKind { + self.kind + } + + /// Information about how the result of the relocation operation is encoded in the place. + #[inline] + pub fn encoding(&self) -> RelocationEncoding { + self.encoding + } + + /// The size in bits of the place of the relocation. + /// + /// If 0, then the size is determined by the relocation kind. + #[inline] + pub fn size(&self) -> u8 { + self.size + } + + /// The target of the relocation. + #[inline] + pub fn target(&self) -> RelocationTarget { + self.target + } + + /// The addend to use in the relocation calculation. + pub fn addend(&self) -> i64 { + self.addend + } + + /// Set the addend to use in the relocation calculation. + pub fn set_addend(&mut self, addend: i64) { + self.addend = addend + } + + /// Returns true if there is an implicit addend stored in the data at the offset + /// to be relocated. + pub fn has_implicit_addend(&self) -> bool { + self.implicit_addend + } +} + +fn data_range(data: &[u8], data_address: u64, range_address: u64, size: u64) -> Option<&[u8]> { + if range_address >= data_address { + let start_offset = (range_address - data_address) as usize; + let end_offset = start_offset + size as usize; + if end_offset <= data.len() { + return Some(&data[start_offset..end_offset]); + } + } + None +} diff --git a/third_party/rust/object/src/pe.rs b/third_party/rust/object/src/read/pe.rs similarity index 56% rename from third_party/rust/object/src/pe.rs rename to third_party/rust/object/src/read/pe.rs index 77e0aa433279..5b752adbb058 100644 --- a/third_party/rust/object/src/pe.rs +++ b/third_party/rust/object/src/read/pe.rs @@ -1,11 +1,12 @@ -use alloc::borrow::Cow; -use alloc::vec::Vec; -use std::slice; - +use crate::alloc::borrow::Cow; +use crate::alloc::vec::Vec; use goblin::pe; +use std::{cmp, iter, slice}; +use target_lexicon::Architecture; -use { - Machine, Object, ObjectSection, ObjectSegment, SectionKind, Symbol, SymbolKind, SymbolMap, +use crate::read::{ + self, Object, ObjectSection, ObjectSegment, Relocation, SectionIndex, SectionKind, Symbol, + SymbolIndex, SymbolKind, SymbolMap, SymbolScope, }; /// A PE object file. @@ -15,56 +16,6 @@ pub struct PeFile<'data> { data: &'data [u8], } -/// An iterator over the loadable sections of a `PeFile`. -#[derive(Debug)] -pub struct PeSegmentIterator<'data, 'file> -where - 'data: 'file, -{ - file: &'file PeFile<'data>, - iter: slice::Iter<'file, pe::section_table::SectionTable>, -} - -/// A loadable section of a `PeFile`. -#[derive(Debug)] -pub struct PeSegment<'data, 'file> -where - 'data: 'file, -{ - file: &'file PeFile<'data>, - section: &'file pe::section_table::SectionTable, -} - -/// An iterator over the sections of a `PeFile`. -#[derive(Debug)] -pub struct PeSectionIterator<'data, 'file> -where - 'data: 'file, -{ - file: &'file PeFile<'data>, - iter: slice::Iter<'file, pe::section_table::SectionTable>, -} - -/// A section of a `PeFile`. -#[derive(Debug)] -pub struct PeSection<'data, 'file> -where - 'data: 'file, -{ - file: &'file PeFile<'data>, - section: &'file pe::section_table::SectionTable, -} - -/// An iterator over the symbols of a `PeFile`. -#[derive(Debug)] -pub struct PeSymbolIterator<'data, 'file> -where - 'data: 'file, -{ - exports: slice::Iter<'file, pe::export::Export<'data>>, - imports: slice::Iter<'file, pe::import::Import<'data>>, -} - impl<'data> PeFile<'data> { /// Get the PE headers of the file. // TODO: this is temporary to allow access to features this crate doesn't provide yet @@ -78,6 +29,16 @@ impl<'data> PeFile<'data> { let pe = pe::PE::parse(data).map_err(|_| "Could not parse PE header")?; Ok(PeFile { pe, data }) } + + fn section_alignment(&self) -> u64 { + u64::from( + self.pe + .header + .optional_header + .map(|h| h.windows_fields.section_alignment) + .unwrap_or(0x1000), + ) + } } impl<'data, 'file> Object<'data, 'file> for PeFile<'data> @@ -90,65 +51,15 @@ where type SectionIterator = PeSectionIterator<'data, 'file>; type SymbolIterator = PeSymbolIterator<'data, 'file>; - fn machine(&self) -> Machine { + fn architecture(&self) -> Architecture { match self.pe.header.coff_header.machine { // TODO: Arm/Arm64 - pe::header::COFF_MACHINE_X86 => Machine::X86, - pe::header::COFF_MACHINE_X86_64 => Machine::X86_64, - _ => Machine::Other, + pe::header::COFF_MACHINE_X86 => Architecture::I386, + pe::header::COFF_MACHINE_X86_64 => Architecture::X86_64, + _ => Architecture::Unknown, } } - fn segments(&'file self) -> PeSegmentIterator<'data, 'file> { - PeSegmentIterator { - file: self, - iter: self.pe.sections.iter(), - } - } - - fn section_data_by_name(&self, section_name: &str) -> Option> { - for section in &self.pe.sections { - if let Ok(name) = section.name() { - if name == section_name { - return Some(Cow::from( - &self.data[section.pointer_to_raw_data as usize..] - [..section.size_of_raw_data as usize], - )); - } - } - } - None - } - - fn sections(&'file self) -> PeSectionIterator<'data, 'file> { - PeSectionIterator { - file: self, - iter: self.pe.sections.iter(), - } - } - - fn symbols(&'file self) -> PeSymbolIterator<'data, 'file> { - // TODO: return COFF symbols for object files - PeSymbolIterator { - exports: [].iter(), - imports: [].iter(), - } - } - - fn dynamic_symbols(&'file self) -> PeSymbolIterator<'data, 'file> { - PeSymbolIterator { - exports: self.pe.exports.iter(), - imports: self.pe.imports.iter(), - } - } - - fn symbol_map(&self) -> SymbolMap<'data> { - // TODO: untested - let mut symbols: Vec<_> = self.symbols().filter(SymbolMap::filter).collect(); - symbols.sort_by_key(|x| x.address); - SymbolMap { symbols } - } - #[inline] fn is_little_endian(&self) -> bool { // TODO: always little endian? The COFF header has some bits in the @@ -157,9 +68,75 @@ where } #[inline] + fn is_64(&self) -> bool { + self.pe.is_64 + } + + fn segments(&'file self) -> PeSegmentIterator<'data, 'file> { + PeSegmentIterator { + file: self, + iter: self.pe.sections.iter(), + } + } + + fn section_by_name(&'file self, section_name: &str) -> Option> { + self.sections() + .find(|section| section.name() == Some(section_name)) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Option> { + self.sections().find(|section| section.index() == index) + } + + fn sections(&'file self) -> PeSectionIterator<'data, 'file> { + PeSectionIterator { + file: self, + iter: self.pe.sections.iter().enumerate(), + } + } + + fn symbol_by_index(&self, _index: SymbolIndex) -> Option> { + // TODO: return COFF symbols for object files + None + } + + fn symbols(&'file self) -> PeSymbolIterator<'data, 'file> { + // TODO: return COFF symbols for object files + PeSymbolIterator { + index: 0, + exports: [].iter(), + imports: [].iter(), + } + } + + fn dynamic_symbols(&'file self) -> PeSymbolIterator<'data, 'file> { + PeSymbolIterator { + index: 0, + exports: self.pe.exports.iter(), + imports: self.pe.imports.iter(), + } + } + + fn symbol_map(&self) -> SymbolMap<'data> { + // TODO: untested + let mut symbols: Vec<_> = self + .symbols() + .map(|(_, s)| s) + .filter(SymbolMap::filter) + .collect(); + symbols.sort_by_key(|x| x.address); + SymbolMap { symbols } + } + fn has_debug_symbols(&self) -> bool { - // TODO: look at what the mingw toolchain does with DWARF-in-PE, and also - // whether CodeView-in-PE still works? + // TODO: check if CodeView-in-PE still works + for section in &self.pe.sections { + if let Ok(name) = section.name() { + if name == ".debug_info" { + return true; + } + } + } false } @@ -168,6 +145,16 @@ where } } +/// An iterator over the loadable sections of a `PeFile`. +#[derive(Debug)] +pub struct PeSegmentIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file PeFile<'data>, + iter: slice::Iter<'file, pe::section_table::SectionTable>, +} + impl<'data, 'file> Iterator for PeSegmentIterator<'data, 'file> { type Item = PeSegment<'data, 'file>; @@ -179,6 +166,16 @@ impl<'data, 'file> Iterator for PeSegmentIterator<'data, 'file> { } } +/// A loadable section of a `PeFile`. +#[derive(Debug)] +pub struct PeSegment<'data, 'file> +where + 'data: 'file, +{ + file: &'file PeFile<'data>, + section: &'file pe::section_table::SectionTable, +} + impl<'data, 'file> ObjectSegment<'data> for PeSegment<'data, 'file> { #[inline] fn address(&self) -> u64 { @@ -190,9 +187,19 @@ impl<'data, 'file> ObjectSegment<'data> for PeSegment<'data, 'file> { u64::from(self.section.virtual_size) } + #[inline] + fn align(&self) -> u64 { + self.file.section_alignment() + } + fn data(&self) -> &'data [u8] { - &self.file.data[self.section.pointer_to_raw_data as usize..] - [..self.section.size_of_raw_data as usize] + let offset = self.section.pointer_to_raw_data as usize; + let size = cmp::min(self.section.virtual_size, self.section.size_of_raw_data) as usize; + &self.file.data[offset..][..size] + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.data(), self.address(), address, size) } #[inline] @@ -201,18 +208,55 @@ impl<'data, 'file> ObjectSegment<'data> for PeSegment<'data, 'file> { } } +/// An iterator over the sections of a `PeFile`. +#[derive(Debug)] +pub struct PeSectionIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file PeFile<'data>, + iter: iter::Enumerate>, +} + impl<'data, 'file> Iterator for PeSectionIterator<'data, 'file> { type Item = PeSection<'data, 'file>; fn next(&mut self) -> Option { - self.iter.next().map(|section| PeSection { + self.iter.next().map(|(index, section)| PeSection { file: self.file, + index: SectionIndex(index), section, }) } } +/// A section of a `PeFile`. +#[derive(Debug)] +pub struct PeSection<'data, 'file> +where + 'data: 'file, +{ + file: &'file PeFile<'data>, + index: SectionIndex, + section: &'file pe::section_table::SectionTable, +} + +impl<'data, 'file> PeSection<'data, 'file> { + fn raw_data(&self) -> &'data [u8] { + let offset = self.section.pointer_to_raw_data as usize; + let size = cmp::min(self.section.virtual_size, self.section.size_of_raw_data) as usize; + &self.file.data[offset..][..size] + } +} + impl<'data, 'file> ObjectSection<'data> for PeSection<'data, 'file> { + type RelocationIterator = PeRelocationIterator; + + #[inline] + fn index(&self) -> SectionIndex { + self.index + } + #[inline] fn address(&self) -> u64 { u64::from(self.section.virtual_address) @@ -223,11 +267,23 @@ impl<'data, 'file> ObjectSection<'data> for PeSection<'data, 'file> { u64::from(self.section.virtual_size) } + #[inline] + fn align(&self) -> u64 { + self.file.section_alignment() + } + fn data(&self) -> Cow<'data, [u8]> { - Cow::from( - &self.file.data[self.section.pointer_to_raw_data as usize..] - [..self.section.size_of_raw_data as usize], - ) + Cow::from(self.raw_data()) + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.raw_data(), self.address(), address, size) + } + + #[inline] + fn uncompressed_data(&self) -> Cow<'data, [u8]> { + // TODO: does PE support compression? + self.data() } fn name(&self) -> Option<&str> { @@ -258,36 +314,78 @@ impl<'data, 'file> ObjectSection<'data> for PeSection<'data, 'file> { SectionKind::Unknown } } + + fn relocations(&self) -> PeRelocationIterator { + PeRelocationIterator + } +} + +/// An iterator over the symbols of a `PeFile`. +#[derive(Debug)] +pub struct PeSymbolIterator<'data, 'file> +where + 'data: 'file, +{ + index: usize, + exports: slice::Iter<'file, pe::export::Export<'data>>, + imports: slice::Iter<'file, pe::import::Import<'data>>, } impl<'data, 'file> Iterator for PeSymbolIterator<'data, 'file> { - type Item = Symbol<'data>; + type Item = (SymbolIndex, Symbol<'data>); fn next(&mut self) -> Option { if let Some(export) = self.exports.next() { - return Some(Symbol { - kind: SymbolKind::Unknown, - section_kind: Some(SectionKind::Unknown), - global: true, - name: export.name, - address: export.rva as u64, - size: 0, - }); + let index = SymbolIndex(self.index); + self.index += 1; + return Some(( + index, + Symbol { + name: export.name, + address: export.rva as u64, + size: 0, + kind: SymbolKind::Unknown, + // TODO: can we find a section? + section_index: None, + undefined: false, + weak: false, + scope: SymbolScope::Dynamic, + }, + )); } if let Some(import) = self.imports.next() { + let index = SymbolIndex(self.index); + self.index += 1; let name = match import.name { Cow::Borrowed(name) => Some(name), _ => None, }; - return Some(Symbol { - kind: SymbolKind::Unknown, - section_kind: None, - global: true, - name: name, - address: 0, - size: 0, - }); + return Some(( + index, + Symbol { + name, + address: 0, + size: 0, + kind: SymbolKind::Unknown, + section_index: None, + undefined: true, + weak: false, + scope: SymbolScope::Dynamic, + }, + )); } None } } + +/// An iterator over the relocations in an `PeSection`. +#[derive(Debug)] +pub struct PeRelocationIterator; + +impl Iterator for PeRelocationIterator { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option { + None + } +} diff --git a/third_party/rust/object/src/read/traits.rs b/third_party/rust/object/src/read/traits.rs new file mode 100644 index 000000000000..1a950d0451ac --- /dev/null +++ b/third_party/rust/object/src/read/traits.rs @@ -0,0 +1,214 @@ +use crate::alloc::borrow::Cow; +use crate::{Relocation, SectionIndex, SectionKind, Symbol, SymbolIndex, SymbolMap}; +use target_lexicon::{Architecture, Endianness}; +use uuid::Uuid; + +/// An object file. +pub trait Object<'data, 'file> { + /// A segment in the object file. + type Segment: ObjectSegment<'data>; + + /// An iterator over the segments in the object file. + type SegmentIterator: Iterator; + + /// A section in the object file. + type Section: ObjectSection<'data>; + + /// An iterator over the sections in the object file. + type SectionIterator: Iterator; + + /// An iterator over the symbols in the object file. + type SymbolIterator: Iterator)>; + + /// Get the architecture type of the file. + fn architecture(&self) -> Architecture; + + /// Get the endianness of the file. + #[inline] + fn endianness(&self) -> Endianness { + if self.is_little_endian() { + Endianness::Little + } else { + Endianness::Big + } + } + + /// Return true if the file is little endian, false if it is big endian. + fn is_little_endian(&self) -> bool; + + /// Return true if the file can contain 64-bit addresses. + fn is_64(&self) -> bool; + + /// Get an iterator over the segments in the file. + fn segments(&'file self) -> Self::SegmentIterator; + + /// Get the entry point address of the binary + fn entry(&'file self) -> u64; + + /// Get the section named `section_name`, if such a section exists. + /// + /// If `section_name` starts with a '.' then it is treated as a system section name, + /// and is compared using the conventions specific to the object file format. This + /// includes: + /// - if ".text" is requested for a Mach-O object file, then the actual + /// section name that is searched for is "__text". + /// - if ".debug_info" is requested for an ELF object file, then + /// ".zdebug_info" may be returned (and similarly for other debug sections). + /// + /// For some object files, multiple segments may contain sections with the same + /// name. In this case, the first matching section will be used. + fn section_by_name(&'file self, section_name: &str) -> Option; + + /// Get the section at the given index. + /// + /// The meaning of the index depends on the object file. + /// + /// For some object files, this requires iterating through all sections. + fn section_by_index(&'file self, index: SectionIndex) -> Option; + + /// Get the contents of the section named `section_name`, if such + /// a section exists. + /// + /// The `section_name` is interpreted according to `Self::section_by_name`. + /// + /// This may decompress section data. + fn section_data_by_name(&'file self, section_name: &str) -> Option> { + self.section_by_name(section_name) + .map(|section| section.uncompressed_data()) + } + + /// Get an iterator over the sections in the file. + fn sections(&'file self) -> Self::SectionIterator; + + /// Get the debugging symbol at the given index. + /// + /// This is similar to `self.symbols().nth(index)`, except that + /// the index will take into account malformed or unsupported symbols. + fn symbol_by_index(&self, index: SymbolIndex) -> Option>; + + /// Get an iterator over the debugging symbols in the file. + /// + /// This may skip over symbols that are malformed or unsupported. + fn symbols(&'file self) -> Self::SymbolIterator; + + /// Get the data for the given symbol. + fn symbol_data(&'file self, symbol: &Symbol<'data>) -> Option<&'data [u8]> { + if symbol.is_undefined() { + return None; + } + let address = symbol.address(); + let size = symbol.size(); + if let Some(index) = symbol.section_index() { + self.section_by_index(index) + .and_then(|section| section.data_range(address, size)) + } else { + self.segments() + .find_map(|segment| segment.data_range(address, size)) + } + } + + /// Get an iterator over the dynamic linking symbols in the file. + /// + /// This may skip over symbols that are malformed or unsupported. + fn dynamic_symbols(&'file self) -> Self::SymbolIterator; + + /// Construct a map from addresses to symbols. + fn symbol_map(&self) -> SymbolMap<'data>; + + /// Return true if the file contains debug information sections, false if not. + fn has_debug_symbols(&self) -> bool; + + /// The UUID from a Mach-O `LC_UUID` load command. + #[inline] + fn mach_uuid(&self) -> Option { + None + } + + /// The build ID from an ELF `NT_GNU_BUILD_ID` note. + #[inline] + fn build_id(&self) -> Option<&'data [u8]> { + None + } + + /// The filename and CRC from a `.gnu_debuglink` section. + #[inline] + fn gnu_debuglink(&self) -> Option<(&'data [u8], u32)> { + None + } +} + +/// A loadable segment defined in an object file. +/// +/// For ELF, this is a program header with type `PT_LOAD`. +/// For Mach-O, this is a load command with type `LC_SEGMENT` or `LC_SEGMENT_64`. +pub trait ObjectSegment<'data> { + /// Returns the virtual address of the segment. + fn address(&self) -> u64; + + /// Returns the size of the segment in memory. + fn size(&self) -> u64; + + /// Returns the alignment of the segment in memory. + fn align(&self) -> u64; + + /// Returns a reference to the file contents of the segment. + /// The length of this data may be different from the size of the + /// segment in memory. + fn data(&self) -> &'data [u8]; + + /// Return the segment data in the given range. + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]>; + + /// Returns the name of the segment. + fn name(&self) -> Option<&str>; +} + +/// A section defined in an object file. +pub trait ObjectSection<'data> { + /// An iterator over the relocations for a section. + /// + /// The first field in the item tuple is the section offset + /// that the relocation applies to. + type RelocationIterator: Iterator; + + /// Returns the section index. + fn index(&self) -> SectionIndex; + + /// Returns the address of the section. + fn address(&self) -> u64; + + /// Returns the size of the section in memory. + fn size(&self) -> u64; + + /// Returns the alignment of the section in memory. + fn align(&self) -> u64; + + /// Returns the raw contents of the section. + /// The length of this data may be different from the size of the + /// section in memory. + /// + /// This does not do any decompression. + fn data(&self) -> Cow<'data, [u8]>; + + /// Return the raw contents of the section data in the given range. + /// + /// This does not do any decompression. + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]>; + + /// Returns the uncompressed contents of the section. + /// The length of this data may be different from the size of the + /// section in memory. + fn uncompressed_data(&self) -> Cow<'data, [u8]>; + + /// Returns the name of the section. + fn name(&self) -> Option<&str>; + + /// Returns the name of the segment for this section. + fn segment_name(&self) -> Option<&str>; + + /// Return the kind of this section. + fn kind(&self) -> SectionKind; + + /// Get the relocations for this section. + fn relocations(&self) -> Self::RelocationIterator; +} diff --git a/third_party/rust/object/src/wasm.rs b/third_party/rust/object/src/read/wasm.rs similarity index 64% rename from third_party/rust/object/src/wasm.rs rename to third_party/rust/object/src/read/wasm.rs index ae96137f695d..99fe2f5a035c 100644 --- a/third_party/rust/object/src/wasm.rs +++ b/third_party/rust/object/src/read/wasm.rs @@ -1,11 +1,12 @@ -use alloc::vec::Vec; +use crate::alloc::vec::Vec; use parity_wasm::elements::{self, Deserialize}; -use std::borrow::{Cow, ToOwned}; -use std::slice; -use std::u64; +use std::borrow::Cow; +use std::{iter, slice}; +use target_lexicon::Architecture; -use { - Machine, Object, ObjectSection, ObjectSegment, SectionKind, Symbol, SymbolMap, +use crate::read::{ + Object, ObjectSection, ObjectSegment, Relocation, SectionIndex, SectionKind, Symbol, + SymbolIndex, SymbolMap, }; /// A WebAssembly object file. @@ -17,43 +18,12 @@ pub struct WasmFile { impl<'data> WasmFile { /// Parse the raw wasm data. pub fn parse(mut data: &'data [u8]) -> Result { - let module = elements::Module::deserialize(&mut data).map_err(|_| "failed to parse wasm")?; - Ok(WasmFile { - module, - }) + let module = + elements::Module::deserialize(&mut data).map_err(|_| "failed to parse wasm")?; + Ok(WasmFile { module }) } } -/// An iterator over the segments of an `WasmFile`. -#[derive(Debug)] -pub struct WasmSegmentIterator<'file> { - file: &'file WasmFile, -} - -/// A segment of an `WasmFile`. -#[derive(Debug)] -pub struct WasmSegment<'file> { - file: &'file WasmFile, -} - -/// An iterator over the sections of an `WasmFile`. -#[derive(Debug)] -pub struct WasmSectionIterator<'file> { - sections: slice::Iter<'file, elements::Section>, -} - -/// A section of an `WasmFile`. -#[derive(Debug)] -pub struct WasmSection<'file> { - section: &'file elements::Section, -} - -/// An iterator over the symbols of an `WasmFile`. -#[derive(Debug)] -pub struct WasmSymbolIterator<'file> { - file: &'file WasmFile, -} - fn serialize_to_cow<'a, S>(s: S) -> Option> where S: elements::Serialize, @@ -70,86 +40,56 @@ impl<'file> Object<'static, 'file> for WasmFile { type SectionIterator = WasmSectionIterator<'file>; type SymbolIterator = WasmSymbolIterator<'file>; - fn machine(&self) -> Machine { - Machine::Other + #[inline] + fn architecture(&self) -> Architecture { + Architecture::Wasm32 + } + + #[inline] + fn is_little_endian(&self) -> bool { + true + } + + #[inline] + fn is_64(&self) -> bool { + false } fn segments(&'file self) -> Self::SegmentIterator { - WasmSegmentIterator { - file: self, - } + WasmSegmentIterator { file: self } } fn entry(&'file self) -> u64 { - self.module.start_section().map_or(u64::MAX, |s| s as u64) + self.module + .start_section() + .map_or(u64::max_value(), u64::from) } - fn section_data_by_name(&self, section_name: &str) -> Option> { - match section_name { - // Known wasm section names. - "Type" => self.module - .type_section() - .and_then(|s| serialize_to_cow(s.clone())), - "Import" => self.module - .import_section() - .and_then(|s| serialize_to_cow(s.clone())), - "Function" => self.module - .function_section() - .and_then(|s| serialize_to_cow(s.clone())), - "Table" => self.module - .table_section() - .and_then(|s| serialize_to_cow(s.clone())), - "Memory" => self.module - .memory_section() - .and_then(|s| serialize_to_cow(s.clone())), - "Global" => self.module - .global_section() - .and_then(|s| serialize_to_cow(s.clone())), - "Export" => self.module - .export_section() - .and_then(|s| serialize_to_cow(s.clone())), - "Start" => self.module - .start_section() - .and_then(|s| serialize_to_cow(elements::VarUint32::from(s))), - "Element" => self.module - .elements_section() - .and_then(|s| serialize_to_cow(s.clone())), - "Code" => self.module - .code_section() - .and_then(|s| serialize_to_cow(s.clone())), - "Data" => self.module - .data_section() - .and_then(|s| serialize_to_cow(s.clone())), - // Custom sections. - _ => { - for s in self.module.sections() { - if let elements::Section::Custom(ref c) = *s { - if c.name() == section_name { - return Some(Cow::from(c.payload().to_owned())); - } - } - } - None - } - } + fn section_by_name(&'file self, section_name: &str) -> Option> { + self.sections() + .find(|section| section.name() == Some(section_name)) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Option> { + self.sections().find(|section| section.index() == index) } fn sections(&'file self) -> Self::SectionIterator { WasmSectionIterator { - sections: self.module.sections().iter(), + sections: self.module.sections().iter().enumerate(), } } + fn symbol_by_index(&self, _index: SymbolIndex) -> Option> { + unimplemented!() + } + fn symbols(&'file self) -> Self::SymbolIterator { - WasmSymbolIterator { - file: self, - } + WasmSymbolIterator { file: self } } fn dynamic_symbols(&'file self) -> Self::SymbolIterator { - WasmSymbolIterator { - file: self, - } + WasmSymbolIterator { file: self } } fn symbol_map(&self) -> SymbolMap<'static> { @@ -158,10 +98,6 @@ impl<'file> Object<'static, 'file> for WasmFile { } } - fn is_little_endian(&self) -> bool { - true - } - fn has_debug_symbols(&self) -> bool { // We ignore the "name" section, and use this to mean whether the wasm // has DWARF. @@ -172,6 +108,12 @@ impl<'file> Object<'static, 'file> for WasmFile { } } +/// An iterator over the segments of an `WasmFile`. +#[derive(Debug)] +pub struct WasmSegmentIterator<'file> { + file: &'file WasmFile, +} + impl<'file> Iterator for WasmSegmentIterator<'file> { type Item = WasmSegment<'file>; @@ -180,6 +122,12 @@ impl<'file> Iterator for WasmSegmentIterator<'file> { } } +/// A segment of an `WasmFile`. +#[derive(Debug)] +pub struct WasmSegment<'file> { + file: &'file WasmFile, +} + impl<'file> ObjectSegment<'static> for WasmSegment<'file> { #[inline] fn address(&self) -> u64 { @@ -191,27 +139,57 @@ impl<'file> ObjectSegment<'static> for WasmSegment<'file> { unreachable!() } + #[inline] + fn align(&self) -> u64 { + unreachable!() + } + fn data(&self) -> &'static [u8] { unreachable!() } + fn data_range(&self, _address: u64, _size: u64) -> Option<&'static [u8]> { + unreachable!() + } + #[inline] fn name(&self) -> Option<&str> { unreachable!() } } +/// An iterator over the sections of an `WasmFile`. +#[derive(Debug)] +pub struct WasmSectionIterator<'file> { + sections: iter::Enumerate>, +} + impl<'file> Iterator for WasmSectionIterator<'file> { type Item = WasmSection<'file>; fn next(&mut self) -> Option { - self.sections.next().map(|s| WasmSection { - section: s, + self.sections.next().map(|(index, section)| WasmSection { + index: SectionIndex(index), + section, }) } } +/// A section of an `WasmFile`. +#[derive(Debug)] +pub struct WasmSection<'file> { + index: SectionIndex, + section: &'file elements::Section, +} + impl<'file> ObjectSection<'static> for WasmSection<'file> { + type RelocationIterator = WasmRelocationIterator; + + #[inline] + fn index(&self) -> SectionIndex { + self.index + } + #[inline] fn address(&self) -> u64 { 1 @@ -222,8 +200,30 @@ impl<'file> ObjectSection<'static> for WasmSection<'file> { serialize_to_cow(self.section.clone()).map_or(0, |b| b.len() as u64) } + #[inline] + fn align(&self) -> u64 { + 1 + } + fn data(&self) -> Cow<'static, [u8]> { - serialize_to_cow(self.section.clone()).unwrap_or(Cow::from(&[][..])) + match *self.section { + elements::Section::Custom(ref section) => Some(section.payload().to_vec().into()), + elements::Section::Start(section) => { + serialize_to_cow(elements::VarUint32::from(section)) + } + _ => serialize_to_cow(self.section.clone()), + } + .unwrap_or_else(|| Cow::from(&[][..])) + } + + fn data_range(&self, _address: u64, _size: u64) -> Option<&'static [u8]> { + unimplemented!() + } + + #[inline] + fn uncompressed_data(&self) -> Cow<'static, [u8]> { + // TODO: does wasm support compression? + self.data() } fn name(&self) -> Option<&str> { @@ -240,6 +240,7 @@ impl<'file> ObjectSection<'static> for WasmSection<'file> { elements::Section::Start(_) => Some("Start"), elements::Section::Element(_) => Some("Element"), elements::Section::Code(_) => Some("Code"), + elements::Section::DataCount(_) => Some("DataCount"), elements::Section::Data(_) => Some("Data"), elements::Section::Name(_) => Some("Name"), elements::Section::Reloc(_) => Some("Reloc"), @@ -265,17 +266,40 @@ impl<'file> ObjectSection<'static> for WasmSection<'file> { elements::Section::Start(_) => SectionKind::Other, elements::Section::Element(_) => SectionKind::Other, elements::Section::Code(_) => SectionKind::Text, + elements::Section::DataCount(_) => SectionKind::Other, elements::Section::Data(_) => SectionKind::Data, elements::Section::Name(_) => SectionKind::Other, elements::Section::Reloc(_) => SectionKind::Other, } } + + fn relocations(&self) -> WasmRelocationIterator { + WasmRelocationIterator + } +} + +/// An iterator over the symbols of an `WasmFile`. +#[derive(Debug)] +pub struct WasmSymbolIterator<'file> { + file: &'file WasmFile, } impl<'file> Iterator for WasmSymbolIterator<'file> { - type Item = Symbol<'static>; + type Item = (SymbolIndex, Symbol<'static>); fn next(&mut self) -> Option { unimplemented!() } } + +/// An iterator over the relocations in an `WasmSection`. +#[derive(Debug)] +pub struct WasmRelocationIterator; + +impl Iterator for WasmRelocationIterator { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option { + None + } +} diff --git a/third_party/rust/object/src/traits.rs b/third_party/rust/object/src/traits.rs deleted file mode 100644 index c37407b7d8b4..000000000000 --- a/third_party/rust/object/src/traits.rs +++ /dev/null @@ -1,124 +0,0 @@ -use alloc::borrow::Cow; -use {Uuid, Machine, SectionKind, Symbol, SymbolMap}; - -/// An object file. -pub trait Object<'data, 'file> { - /// A segment in the object file. - type Segment: ObjectSegment<'data>; - - /// An iterator over the segments in the object file. - type SegmentIterator: Iterator; - - /// A section in the object file. - type Section: ObjectSection<'data>; - - /// An iterator over the sections in the object file. - type SectionIterator: Iterator; - - /// An iterator over the symbols in the object file. - type SymbolIterator: Iterator>; - - /// Get the machine type of the file. - fn machine(&self) -> Machine; - - /// Get an iterator over the segments in the file. - fn segments(&'file self) -> Self::SegmentIterator; - - /// Get the entry point address of the binary - fn entry(&'file self) -> u64; - - /// Get the contents of the section named `section_name`, if such - /// a section exists. - /// - /// If `section_name` starts with a '.' then it is treated as a system section name, - /// and is compared using the conventions specific to the object file format. - /// For example, if ".text" is requested for a Mach-O object file, then the actual - /// section name that is searched for is "__text". - /// - /// For some object files, multiple segments may contain sections with the same - /// name. In this case, the first matching section will be used. - /// - /// This may decompress section data. - fn section_data_by_name(&self, section_name: &str) -> Option>; - - /// Get an iterator over the sections in the file. - fn sections(&'file self) -> Self::SectionIterator; - - /// Get an iterator over the debugging symbols in the file. - fn symbols(&'file self) -> Self::SymbolIterator; - - /// Get an iterator over the dynamic linking symbols in the file. - fn dynamic_symbols(&'file self) -> Self::SymbolIterator; - - /// Construct a map from addresses to symbols. - fn symbol_map(&self) -> SymbolMap<'data>; - - /// Return true if the file is little endian, false if it is big endian. - fn is_little_endian(&self) -> bool; - - /// Return true if the file contains debug information sections, false if not. - fn has_debug_symbols(&self) -> bool; - - /// The UUID from a Mach-O `LC_UUID` load command. - #[inline] - fn mach_uuid(&self) -> Option { - None - } - - /// The build ID from an ELF `NT_GNU_BUILD_ID` note. - #[inline] - fn build_id(&self) -> Option<&'data [u8]> { - None - } - - /// The filename and CRC from a `.gnu_debuglink` section. - #[inline] - fn gnu_debuglink(&self) -> Option<(&'data [u8], u32)> { - None - } -} - -/// A loadable segment defined in an object file. -/// -/// For ELF, this is a program header with type `PT_LOAD`. -/// For Mach-O, this is a load command with type `LC_SEGMENT` or `LC_SEGMENT_64`. -pub trait ObjectSegment<'data> { - /// Returns the virtual address of the segment. - fn address(&self) -> u64; - - /// Returns the size of the segment in memory. - fn size(&self) -> u64; - - /// Returns a reference to the file contents of the segment. - /// The length of this data may be different from the size of the - /// segment in memory. - fn data(&self) -> &'data [u8]; - - /// Returns the name of the segment. - fn name(&self) -> Option<&str>; -} - -/// A section defined in an object file. -pub trait ObjectSection<'data> { - /// Returns the address of the section. - fn address(&self) -> u64; - - /// Returns the size of the section in memory. - fn size(&self) -> u64; - - /// Returns a reference to the raw contents of the section. - /// The length of this data may be different from the size of the - /// section in memory. - /// - /// This does not do any decompression. - fn data(&self) -> Cow<'data, [u8]>; - - /// Returns the name of the section. - fn name(&self) -> Option<&str>; - - /// Returns the name of the segment for this section. - fn segment_name(&self) -> Option<&str>; - - /// Return the kind of this section. - fn kind(&self) -> SectionKind; -} diff --git a/third_party/rust/object/src/write/coff.rs b/third_party/rust/object/src/write/coff.rs new file mode 100644 index 000000000000..0db43be237df --- /dev/null +++ b/third_party/rust/object/src/write/coff.rs @@ -0,0 +1,477 @@ +use crc32fast; +use scroll::ctx::SizeWith; +use scroll::IOwrite; +use std::iter; +use std::string::String; + +use crate::alloc::vec::Vec; +use crate::write::string::*; +use crate::write::util::*; +use crate::write::*; + +mod coff { + pub use goblin::pe::characteristic::*; + pub use goblin::pe::header::*; + pub use goblin::pe::relocation::*; + pub use goblin::pe::section_table::*; + pub use goblin::pe::symbol::*; +} + +#[derive(Default, Clone, Copy)] +struct SectionOffsets { + offset: usize, + str_id: Option, + reloc_offset: usize, +} + +#[derive(Default, Clone, Copy)] +struct SymbolOffsets { + index: usize, + str_id: Option, + aux_count: u8, +} + +impl Object { + pub(crate) fn coff_section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match section { + StandardSection::Text => (&[], &b".text"[..], SectionKind::Text), + StandardSection::Data => (&[], &b".data"[..], SectionKind::Data), + StandardSection::ReadOnlyData + | StandardSection::ReadOnlyDataWithRel + | StandardSection::ReadOnlyString => (&[], &b".rdata"[..], SectionKind::ReadOnlyData), + StandardSection::UninitializedData => { + (&[], &b".bss"[..], SectionKind::UninitializedData) + } + } + } + + pub(crate) fn coff_subsection_name(&self, section: &[u8], value: &[u8]) -> Vec { + let mut name = section.to_vec(); + name.push(b'$'); + name.extend(value); + name + } + + pub(crate) fn coff_fixup_relocation(&mut self, mut relocation: &mut Relocation) -> i64 { + if relocation.kind == RelocationKind::GotRelative { + // Use a stub symbol for the relocation instead. + // This isn't really a GOT, but it's a similar purpose. + // TODO: need to handle DLL imports differently? + relocation.kind = RelocationKind::Relative; + relocation.symbol = self.coff_add_stub_symbol(relocation.symbol); + } else if relocation.kind == RelocationKind::PltRelative { + // Windows doesn't need a separate relocation type for + // references to functions in import libraries. + // For convenience, treat this the same as Relative. + relocation.kind = RelocationKind::Relative; + } + + let constant = match self.architecture { + Architecture::I386 => match relocation.kind { + RelocationKind::Relative => { + // IMAGE_REL_I386_REL32 + relocation.addend + 4 + } + _ => relocation.addend, + }, + Architecture::X86_64 => match relocation.kind { + RelocationKind::Relative => { + // IMAGE_REL_AMD64_REL32 through to IMAGE_REL_AMD64_REL32_5 + if relocation.addend >= -4 && relocation.addend <= -9 { + 0 + } else { + relocation.addend + 4 + } + } + _ => relocation.addend, + }, + _ => unimplemented!(), + }; + relocation.addend -= constant; + constant + } + + fn coff_add_stub_symbol(&mut self, symbol_id: SymbolId) -> SymbolId { + if let Some(stub_id) = self.stub_symbols.get(&symbol_id) { + return *stub_id; + } + let stub_size = self.architecture.pointer_width().unwrap().bytes(); + + let mut name = b".rdata$.refptr.".to_vec(); + name.extend(&self.symbols[symbol_id.0].name); + let section_id = self.add_section(Vec::new(), name, SectionKind::ReadOnlyData); + let section = self.section_mut(section_id); + section.set_data(vec![0; stub_size as usize], u64::from(stub_size)); + section.relocations = vec![Relocation { + offset: 0, + size: stub_size * 8, + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + symbol: symbol_id, + addend: 0, + }]; + + let mut name = b".refptr.".to_vec(); + name.extend(&self.symbol(symbol_id).name); + let stub_id = self.add_symbol(Symbol { + name, + value: 0, + size: u64::from(stub_size), + kind: SymbolKind::Data, + scope: SymbolScope::Compilation, + weak: false, + section: Some(section_id), + }); + self.stub_symbols.insert(symbol_id, stub_id); + + stub_id + } + + pub(crate) fn coff_write(&self) -> Result, String> { + // Calculate offsets of everything, and build strtab. + let mut offset = 0; + let mut strtab = StringTable::default(); + + // COFF header. + let ctx = scroll::LE; + offset += coff::CoffHeader::size_with(&ctx); + + // Section headers. + offset += self.sections.len() * coff::SectionTable::size_with(&ctx); + + // Calculate size of section data and add section strings to strtab. + let mut section_offsets = vec![SectionOffsets::default(); self.sections.len()]; + for (index, section) in self.sections.iter().enumerate() { + if section.name.len() > 8 { + section_offsets[index].str_id = Some(strtab.add(§ion.name)); + } + + let len = section.data.len(); + if len != 0 { + // TODO: not sure what alignment is required here, but this seems to match LLVM + offset = align(offset, 4); + section_offsets[index].offset = offset; + offset += len; + } else { + section_offsets[index].offset = offset; + } + + // Calculate size of relocations. + let count = section.relocations.len(); + if count != 0 { + section_offsets[index].reloc_offset = offset; + offset += count * coff::Relocation::size_with(&ctx); + } + } + + // Calculate size of symbols and add symbol strings to strtab. + let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; + let mut symtab_count = 0; + for (index, symbol) in self.symbols.iter().enumerate() { + symbol_offsets[index].index = symtab_count; + symtab_count += 1; + match symbol.kind { + SymbolKind::File => { + // Name goes in auxilary symbol records. + let aux_count = + (symbol.name.len() + coff::COFF_SYMBOL_SIZE - 1) / coff::COFF_SYMBOL_SIZE; + symbol_offsets[index].aux_count = aux_count as u8; + symtab_count += aux_count; + // Don't add name to strtab. + continue; + } + SymbolKind::Section => { + symbol_offsets[index].aux_count = 1; + symtab_count += 1; + } + _ => {} + } + if symbol.name.len() > 8 { + symbol_offsets[index].str_id = Some(strtab.add(&symbol.name)); + } + } + + // Calculate size of symtab. + let symtab_offset = offset; + let symtab_len = symtab_count * coff::COFF_SYMBOL_SIZE; + offset += symtab_len; + + // Calculate size of strtab. + let strtab_offset = offset; + let mut strtab_data = Vec::new(); + // First 4 bytes of strtab are the length. + strtab.write(4, &mut strtab_data); + let strtab_len = strtab_data.len() + 4; + offset += strtab_len; + + // Start writing. + let mut buffer = Vec::with_capacity(offset); + + // Write file header. + let header = coff::CoffHeader { + machine: match self.architecture { + Architecture::I386 => coff::COFF_MACHINE_X86, + Architecture::X86_64 => coff::COFF_MACHINE_X86_64, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }, + number_of_sections: self.sections.len() as u16, + time_date_stamp: 0, + pointer_to_symbol_table: symtab_offset as u32, + number_of_symbol_table: symtab_count as u32, + size_of_optional_header: 0, + characteristics: 0, + }; + buffer.iowrite_with(header, ctx).unwrap(); + + // Write section headers. + for (index, section) in self.sections.iter().enumerate() { + // TODO: IMAGE_SCN_LNK_COMDAT + let characteristics = match section.kind { + SectionKind::Text => { + coff::IMAGE_SCN_CNT_CODE + | coff::IMAGE_SCN_MEM_EXECUTE + | coff::IMAGE_SCN_MEM_READ + } + SectionKind::Data => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_WRITE + } + SectionKind::UninitializedData => { + coff::IMAGE_SCN_CNT_UNINITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_WRITE + } + SectionKind::ReadOnlyData | SectionKind::ReadOnlyString => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA | coff::IMAGE_SCN_MEM_READ + } + SectionKind::Debug | SectionKind::Other | SectionKind::OtherString => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_DISCARDABLE + } + SectionKind::Linker => coff::IMAGE_SCN_LNK_INFO | coff::IMAGE_SCN_LNK_REMOVE, + SectionKind::Tls + | SectionKind::UninitializedTls + | SectionKind::TlsVariables + | SectionKind::Unknown + | SectionKind::Metadata => { + return Err(format!("unimplemented section {:?}", section.kind)) + } + }; + let align = match section.align { + 1 => coff::IMAGE_SCN_ALIGN_1BYTES, + 2 => coff::IMAGE_SCN_ALIGN_2BYTES, + 4 => coff::IMAGE_SCN_ALIGN_4BYTES, + 8 => coff::IMAGE_SCN_ALIGN_8BYTES, + 16 => coff::IMAGE_SCN_ALIGN_16BYTES, + 32 => coff::IMAGE_SCN_ALIGN_32BYTES, + 64 => coff::IMAGE_SCN_ALIGN_64BYTES, + 128 => coff::IMAGE_SCN_ALIGN_128BYTES, + 256 => coff::IMAGE_SCN_ALIGN_256BYTES, + 512 => coff::IMAGE_SCN_ALIGN_512BYTES, + 1024 => coff::IMAGE_SCN_ALIGN_1024BYTES, + 2048 => coff::IMAGE_SCN_ALIGN_2048BYTES, + 4096 => coff::IMAGE_SCN_ALIGN_4096BYTES, + 8192 => coff::IMAGE_SCN_ALIGN_8192BYTES, + _ => return Err(format!("unimplemented section align {}", section.align)), + }; + let mut coff_section = coff::SectionTable { + name: [0; 8], + real_name: None, + virtual_size: if section.data.is_empty() { + section.size as u32 + } else { + 0 + }, + virtual_address: 0, + size_of_raw_data: section.data.len() as u32, + pointer_to_raw_data: if section.data.is_empty() { + 0 + } else { + section_offsets[index].offset as u32 + }, + pointer_to_relocations: section_offsets[index].reloc_offset as u32, + pointer_to_linenumbers: 0, + number_of_relocations: section.relocations.len() as u16, + number_of_linenumbers: 0, + characteristics: characteristics | align, + }; + if section.name.len() <= 8 { + coff_section.name[..section.name.len()].copy_from_slice(§ion.name); + } else { + let str_offset = strtab.get_offset(section_offsets[index].str_id.unwrap()); + coff_section.set_name_offset(str_offset).unwrap(); + } + buffer.iowrite_with(coff_section, ctx).unwrap(); + } + + // Write section data and relocations. + for (index, section) in self.sections.iter().enumerate() { + let len = section.data.len(); + if len != 0 { + write_align(&mut buffer, 4); + debug_assert_eq!(section_offsets[index].offset, buffer.len()); + buffer.extend(§ion.data); + } + + if !section.relocations.is_empty() { + debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); + for reloc in §ion.relocations { + //assert!(reloc.implicit_addend); + let typ = match self.architecture { + Architecture::I386 => match (reloc.kind, reloc.size, reloc.addend) { + (RelocationKind::Absolute, 16, 0) => coff::IMAGE_REL_I386_DIR16, + (RelocationKind::Relative, 16, 0) => coff::IMAGE_REL_I386_REL16, + (RelocationKind::Absolute, 32, 0) => coff::IMAGE_REL_I386_DIR32, + (RelocationKind::ImageOffset, 32, 0) => coff::IMAGE_REL_I386_DIR32NB, + (RelocationKind::SectionIndex, 16, 0) => coff::IMAGE_REL_I386_SECTION, + (RelocationKind::SectionOffset, 32, 0) => coff::IMAGE_REL_I386_SECREL, + (RelocationKind::SectionOffset, 7, 0) => coff::IMAGE_REL_I386_SECREL7, + (RelocationKind::Relative, 32, -4) => coff::IMAGE_REL_I386_REL32, + (RelocationKind::Other(x), _, _) => x as u16, + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + Architecture::X86_64 => match (reloc.kind, reloc.size, reloc.addend) { + (RelocationKind::Absolute, 64, 0) => coff::IMAGE_REL_AMD64_ADDR64, + (RelocationKind::Absolute, 32, 0) => coff::IMAGE_REL_AMD64_ADDR32, + (RelocationKind::ImageOffset, 32, 0) => coff::IMAGE_REL_AMD64_ADDR32NB, + (RelocationKind::Relative, 32, -4) => coff::IMAGE_REL_AMD64_REL32, + (RelocationKind::Relative, 32, -5) => coff::IMAGE_REL_AMD64_REL32_1, + (RelocationKind::Relative, 32, -6) => coff::IMAGE_REL_AMD64_REL32_2, + (RelocationKind::Relative, 32, -7) => coff::IMAGE_REL_AMD64_REL32_3, + (RelocationKind::Relative, 32, -8) => coff::IMAGE_REL_AMD64_REL32_4, + (RelocationKind::Relative, 32, -9) => coff::IMAGE_REL_AMD64_REL32_5, + (RelocationKind::SectionOffset, 32, 0) => coff::IMAGE_REL_AMD64_SECREL, + (RelocationKind::SectionOffset, 7, 0) => coff::IMAGE_REL_AMD64_SECREL7, + (RelocationKind::Other(x), _, _) => x as u16, + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + buffer + .iowrite_with( + coff::Relocation { + virtual_address: reloc.offset as u32, + symbol_table_index: symbol_offsets[reloc.symbol.0].index as u32, + typ, + }, + ctx, + ) + .unwrap(); + } + } + } + + // Write symbols. + debug_assert_eq!(symtab_offset, buffer.len()); + for (index, symbol) in self.symbols.iter().enumerate() { + let mut name = &symbol.name[..]; + let mut section_number = symbol.section.map(|x| x.0 + 1).unwrap_or(0) as i16; + let typ = if symbol.kind == SymbolKind::Text { + coff::IMAGE_SYM_DTYPE_FUNCTION << coff::IMAGE_SYM_DTYPE_SHIFT + } else { + coff::IMAGE_SYM_TYPE_NULL + }; + let storage_class = match symbol.kind { + SymbolKind::File => { + // Name goes in auxilary symbol records. + name = b".file"; + section_number = coff::IMAGE_SYM_DEBUG; + coff::IMAGE_SYM_CLASS_FILE + } + SymbolKind::Section => coff::IMAGE_SYM_CLASS_STATIC, + SymbolKind::Label => coff::IMAGE_SYM_CLASS_LABEL, + SymbolKind::Text | SymbolKind::Data => { + match symbol.scope { + _ if symbol.is_undefined() => coff::IMAGE_SYM_CLASS_EXTERNAL, + // TODO: does this need aux symbol records too? + _ if symbol.weak => coff::IMAGE_SYM_CLASS_WEAK_EXTERNAL, + SymbolScope::Unknown => { + return Err(format!("unimplemented symbol scope {:?}", symbol)) + } + SymbolScope::Compilation => coff::IMAGE_SYM_CLASS_STATIC, + SymbolScope::Linkage | SymbolScope::Dynamic => { + coff::IMAGE_SYM_CLASS_EXTERNAL + } + } + } + _ => return Err(format!("unimplemented symbol {:?}", symbol.kind)), + }; + let number_of_aux_symbols = symbol_offsets[index].aux_count; + let mut coff_symbol = coff::Symbol { + name: [0; 8], + value: symbol.value as u32, + section_number, + typ, + storage_class, + number_of_aux_symbols, + }; + if name.len() <= 8 { + coff_symbol.name[..name.len()].copy_from_slice(name); + } else { + let str_offset = strtab.get_offset(symbol_offsets[index].str_id.unwrap()); + coff_symbol.set_name_offset(str_offset as u32); + } + buffer.iowrite_with(coff_symbol, ctx).unwrap(); + + match symbol.kind { + SymbolKind::File => { + let aux_len = number_of_aux_symbols as usize * coff::COFF_SYMBOL_SIZE; + debug_assert!(aux_len >= symbol.name.len()); + buffer.extend(&symbol.name); + buffer.extend(iter::repeat(0).take(aux_len - symbol.name.len())); + } + SymbolKind::Section => { + debug_assert_eq!(number_of_aux_symbols, 1); + let section = &self.sections[symbol.section.unwrap().0]; + buffer + .iowrite_with( + coff::AuxSectionDefinition { + length: section.data.len() as u32, + number_of_relocations: section.relocations.len() as u16, + number_of_line_numbers: 0, + checksum: checksum(§ion.data), + number: section_number as u16, + // TODO: COMDAT + selection: 0, + unused: [0; 3], + }, + ctx, + ) + .unwrap(); + } + _ => { + debug_assert_eq!(number_of_aux_symbols, 0); + } + } + } + + // Write strtab section. + debug_assert_eq!(strtab_offset, buffer.len()); + buffer.iowrite_with(strtab_len as u32, ctx).unwrap(); + buffer.extend(&strtab_data); + + Ok(buffer) + } +} + +// JamCRC +fn checksum(data: &[u8]) -> u32 { + let mut hasher = crc32fast::Hasher::new_with_initial(0xffff_ffff); + hasher.update(data); + !hasher.finalize() +} diff --git a/third_party/rust/object/src/write/elf.rs b/third_party/rust/object/src/write/elf.rs new file mode 100644 index 000000000000..3f8c015078bf --- /dev/null +++ b/third_party/rust/object/src/write/elf.rs @@ -0,0 +1,728 @@ +use scroll::ctx::SizeWith; +use scroll::IOwrite; +use std::string::String; + +use crate::alloc::vec::Vec; +use crate::write::string::*; +use crate::write::util::*; +use crate::write::*; + +mod elf { + pub use goblin::elf::header::*; + pub use goblin::elf::program_header::*; + pub use goblin::elf::reloc::*; + pub use goblin::elf::section_header::*; + pub use goblin::elf::sym::*; +} + +#[derive(Default, Clone, Copy)] +struct SectionOffsets { + index: usize, + offset: usize, + str_id: Option, + reloc_index: usize, + reloc_offset: usize, + reloc_len: usize, + reloc_str_id: Option, +} + +#[derive(Default, Clone, Copy)] +struct SymbolOffsets { + index: usize, + str_id: Option, +} + +impl Object { + pub(crate) fn elf_section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match section { + StandardSection::Text => (&[], &b".text"[..], SectionKind::Text), + StandardSection::Data => (&[], &b".data"[..], SectionKind::Data), + StandardSection::ReadOnlyData + | StandardSection::ReadOnlyDataWithRel + | StandardSection::ReadOnlyString => (&[], &b".rodata"[..], SectionKind::ReadOnlyData), + StandardSection::UninitializedData => { + (&[], &b".bss"[..], SectionKind::UninitializedData) + } + } + } + + pub(crate) fn elf_subsection_name(&self, section: &[u8], value: &[u8]) -> Vec { + let mut name = section.to_vec(); + name.push(b'.'); + name.extend(value); + name + } + + fn elf_has_relocation_addend(&self) -> Result { + Ok(match self.architecture { + Architecture::Arm => false, + Architecture::Aarch64 => false, + Architecture::I386 => false, + Architecture::X86_64 => true, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }) + } + + pub(crate) fn elf_fixup_relocation( + &mut self, + mut relocation: &mut Relocation, + ) -> Result { + // Return true if we should use a section symbol to avoid preemption. + fn want_section_symbol(relocation: &Relocation, symbol: &Symbol) -> bool { + if symbol.scope != SymbolScope::Dynamic { + // Only dynamic symbols can be preemptible. + return false; + } + match symbol.kind { + SymbolKind::Text | SymbolKind::Data => {} + _ => return false, + } + match relocation.kind { + // Anything using GOT or PLT is preemptible. + // We also require that `Other` relocations must already be correct. + RelocationKind::Got + | RelocationKind::GotRelative + | RelocationKind::GotBaseRelative + | RelocationKind::PltRelative + | RelocationKind::Other(_) => return false, + // Absolute relocations are preemptible for non-local data. + // TODO: not sure if this rule is exactly correct + // This rule was added to handle global data references in debuginfo. + // Maybe this should be a new relocation kind so that the caller can decide. + RelocationKind::Absolute => { + if symbol.kind == SymbolKind::Data { + return false; + } + } + _ => {} + } + true + } + + // Use section symbols for relocations where required to avoid preemption. + // Otherwise, the linker will fail with: + // relocation R_X86_64_PC32 against symbol `SomeSymbolName' can not be used when + // making a shared object; recompile with -fPIC + let symbol = &self.symbols[relocation.symbol.0]; + if want_section_symbol(relocation, symbol) { + if let Some(section) = symbol.section { + relocation.addend += symbol.value as i64; + relocation.symbol = self.section_symbol(section); + } + } + + // Determine whether the addend is stored in the relocation or the data. + if self.elf_has_relocation_addend()? { + Ok(0) + } else { + let constant = relocation.addend; + relocation.addend = 0; + Ok(constant) + } + } + + pub(crate) fn elf_write(&self) -> Result, String> { + let (container, pointer_align) = match self.architecture.pointer_width().unwrap() { + PointerWidth::U16 | PointerWidth::U32 => (goblin::container::Container::Little, 4), + PointerWidth::U64 => (goblin::container::Container::Big, 8), + }; + let endian = match self.architecture.endianness().unwrap() { + Endianness::Little => goblin::container::Endian::Little, + Endianness::Big => goblin::container::Endian::Big, + }; + let ctx = goblin::container::Ctx::new(container, endian); + let is_rela = self.elf_has_relocation_addend()?; + let reloc_ctx = (is_rela, ctx); + + // Calculate offsets of everything. + let mut offset = 0; + + // ELF header. + let e_ehsize = elf::Header::size_with(&ctx); + offset += e_ehsize; + + // Create reloc section header names. + let reloc_names: Vec<_> = self + .sections + .iter() + .map(|section| { + let mut reloc_name = Vec::new(); + if !section.relocations.is_empty() { + reloc_name.extend_from_slice(if is_rela { + &b".rela"[..] + } else { + &b".rel"[..] + }); + reloc_name.extend_from_slice(§ion.name); + } + reloc_name + }) + .collect(); + + // Calculate size of section data. + let mut shstrtab = StringTable::default(); + let mut section_offsets = vec![SectionOffsets::default(); self.sections.len()]; + // Null section. + let mut e_shnum = 1; + for (index, section) in self.sections.iter().enumerate() { + section_offsets[index].str_id = Some(shstrtab.add(§ion.name)); + section_offsets[index].index = e_shnum; + e_shnum += 1; + + let len = section.data.len(); + if len != 0 { + offset = align(offset, section.align as usize); + section_offsets[index].offset = offset; + offset += len; + } else { + section_offsets[index].offset = offset; + } + + if !section.relocations.is_empty() { + section_offsets[index].reloc_str_id = Some(shstrtab.add(&reloc_names[index])); + section_offsets[index].reloc_index = e_shnum; + e_shnum += 1; + } + } + + // Calculate index of symbols and add symbol strings to strtab. + let mut strtab = StringTable::default(); + let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; + // Null symbol. + let mut symtab_count = 1; + // Local symbols must come before global. + for (index, symbol) in self.symbols.iter().enumerate() { + if symbol.is_local() { + symbol_offsets[index].index = symtab_count; + symtab_count += 1; + } + } + let symtab_count_local = symtab_count; + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol.is_local() { + symbol_offsets[index].index = symtab_count; + symtab_count += 1; + } + } + for (index, symbol) in self.symbols.iter().enumerate() { + if symbol.kind != SymbolKind::Section { + symbol_offsets[index].str_id = Some(strtab.add(&symbol.name)); + } + } + + // Calculate size of symtab. + let symtab_str_id = shstrtab.add(&b".symtab"[..]); + offset = align(offset, pointer_align); + let symtab_offset = offset; + let symtab_len = symtab_count * elf::Sym::size_with(&ctx); + offset += symtab_len; + let symtab_index = e_shnum; + e_shnum += 1; + + // Calculate size of symtab_shndx. + let mut need_symtab_shndx = false; + for symbol in &self.symbols { + let index = symbol + .section + .map(|s| section_offsets[s.0].index) + .unwrap_or(0); + if index >= elf::SHN_LORESERVE as usize { + need_symtab_shndx = true; + break; + } + } + let symtab_shndx_offset = offset; + let mut symtab_shndx_str_id = None; + let mut symtab_shndx_len = 0; + if need_symtab_shndx { + symtab_shndx_str_id = Some(shstrtab.add(&b".symtab_shndx"[..])); + symtab_shndx_len = symtab_count * 4; + offset += symtab_shndx_len; + e_shnum += 1; + } + + // Calculate size of strtab. + let strtab_str_id = shstrtab.add(&b".strtab"[..]); + let strtab_offset = offset; + let mut strtab_data = Vec::new(); + // Null name. + strtab_data.push(0); + strtab.write(1, &mut strtab_data); + offset += strtab_data.len(); + let strtab_index = e_shnum; + e_shnum += 1; + + // Calculate size of relocations. + for (index, section) in self.sections.iter().enumerate() { + let count = section.relocations.len(); + if count != 0 { + offset = align(offset, pointer_align); + section_offsets[index].reloc_offset = offset; + let len = count * elf::Reloc::size_with(&reloc_ctx); + section_offsets[index].reloc_len = len; + offset += len; + } + } + + // Calculate size of shstrtab. + let shstrtab_str_id = shstrtab.add(&b".shstrtab"[..]); + let shstrtab_offset = offset; + let mut shstrtab_data = Vec::new(); + // Null section name. + shstrtab_data.push(0); + shstrtab.write(1, &mut shstrtab_data); + offset += shstrtab_data.len(); + let shstrtab_index = e_shnum; + e_shnum += 1; + + // Calculate size of section headers. + offset = align(offset, pointer_align); + let e_shoff = offset; + let e_shentsize = elf::SectionHeader::size_with(&ctx); + offset += e_shnum * e_shentsize; + + // Start writing. + let mut buffer = Vec::with_capacity(offset); + + // Write file header. + let e_machine = match self.architecture { + Architecture::Arm => elf::EM_ARM, + Architecture::Aarch64 => elf::EM_AARCH64, + Architecture::I386 => elf::EM_386, + Architecture::X86_64 => elf::EM_X86_64, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + let mut header = elf::Header { + e_ident: [0; 16], + e_type: elf::ET_REL, + e_machine, + e_version: elf::EV_CURRENT.into(), + e_entry: 0, + e_phoff: 0, + e_shoff: e_shoff as u64, + e_flags: 0, + e_ehsize: e_ehsize as u16, + e_phentsize: 0, + e_phnum: 0, + e_shentsize: e_shentsize as u16, + e_shnum: if e_shnum >= elf::SHN_LORESERVE as usize { + 0 + } else { + e_shnum as u16 + }, + e_shstrndx: if shstrtab_index >= elf::SHN_LORESERVE as usize { + elf::SHN_XINDEX as u16 + } else { + shstrtab_index as u16 + }, + }; + header.e_ident[0..4].copy_from_slice(elf::ELFMAG); + header.e_ident[elf::EI_CLASS] = if container.is_big() { + elf::ELFCLASS64 + } else { + elf::ELFCLASS32 + }; + header.e_ident[elf::EI_DATA] = if endian.is_little() { + elf::ELFDATA2LSB + } else { + elf::ELFDATA2MSB + }; + header.e_ident[elf::EI_VERSION] = elf::EV_CURRENT; + header.e_ident[elf::EI_OSABI] = elf::ELFOSABI_NONE; + header.e_ident[elf::EI_ABIVERSION] = 0; + buffer.iowrite_with(header, ctx).unwrap(); + + // Write section data. + for (index, section) in self.sections.iter().enumerate() { + let len = section.data.len(); + if len != 0 { + write_align(&mut buffer, section.align as usize); + debug_assert_eq!(section_offsets[index].offset, buffer.len()); + buffer.extend(§ion.data); + } + } + + // Write symbols. + write_align(&mut buffer, pointer_align); + debug_assert_eq!(symtab_offset, buffer.len()); + buffer + .iowrite_with( + elf::Sym { + st_name: 0, + st_info: 0, + st_other: 0, + st_shndx: 0, + st_value: 0, + st_size: 0, + }, + ctx, + ) + .unwrap(); + let mut symtab_shndx = Vec::new(); + if need_symtab_shndx { + symtab_shndx.iowrite_with(0, ctx.le).unwrap(); + } + let mut write_symbol = |index: usize, symbol: &Symbol| { + let st_type = match symbol.kind { + SymbolKind::Unknown | SymbolKind::Null => elf::STT_NOTYPE, + SymbolKind::Text => { + if symbol.is_undefined() { + elf::STT_NOTYPE + } else { + elf::STT_FUNC + } + } + SymbolKind::Data => { + if symbol.is_undefined() { + elf::STT_NOTYPE + } else { + elf::STT_OBJECT + } + } + SymbolKind::Section => elf::STT_SECTION, + SymbolKind::File => elf::STT_FILE, + SymbolKind::Common => elf::STT_COMMON, + SymbolKind::Tls => elf::STT_TLS, + SymbolKind::Label => elf::STT_NOTYPE, + }; + let st_bind = if symbol.is_undefined() { + elf::STB_GLOBAL + } else if symbol.is_local() { + elf::STB_LOCAL + } else if symbol.weak { + elf::STB_WEAK + } else { + elf::STB_GLOBAL + }; + let st_other = if symbol.scope == SymbolScope::Linkage { + elf::STV_HIDDEN + } else { + elf::STV_DEFAULT + }; + let st_shndx = match symbol.kind { + SymbolKind::File => { + if need_symtab_shndx { + symtab_shndx.iowrite_with(0, ctx.le).unwrap(); + } + elf::SHN_ABS as usize + } + _ => { + let index = symbol + .section + .map(|s| section_offsets[s.0].index) + .unwrap_or(elf::SHN_UNDEF as usize); + if need_symtab_shndx { + symtab_shndx.iowrite_with(index as u32, ctx.le).unwrap(); + } + if index >= elf::SHN_LORESERVE as usize { + elf::SHN_XINDEX as usize + } else { + index + } + } + }; + let st_name = symbol_offsets[index] + .str_id + .map(|id| strtab.get_offset(id)) + .unwrap_or(0); + buffer + .iowrite_with( + elf::Sym { + st_name, + st_info: (st_bind << 4) + st_type, + st_other, + st_shndx, + st_value: symbol.value, + st_size: symbol.size, + }, + ctx, + ) + .unwrap(); + }; + for (index, symbol) in self.symbols.iter().enumerate() { + if symbol.is_local() { + write_symbol(index, symbol); + } + } + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol.is_local() { + write_symbol(index, symbol); + } + } + if need_symtab_shndx { + debug_assert_eq!(symtab_shndx_offset, buffer.len()); + debug_assert_eq!(symtab_shndx_len, symtab_shndx.len()); + buffer.extend(&symtab_shndx); + } + + // Write strtab section. + debug_assert_eq!(strtab_offset, buffer.len()); + buffer.extend(&strtab_data); + + // Write relocations. + for (index, section) in self.sections.iter().enumerate() { + if !section.relocations.is_empty() { + write_align(&mut buffer, pointer_align); + debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); + for reloc in §ion.relocations { + let r_type = match self.architecture { + Architecture::I386 => match (reloc.kind, reloc.size) { + (RelocationKind::Absolute, 32) => elf::R_386_32, + (RelocationKind::Relative, 32) => elf::R_386_PC32, + (RelocationKind::Got, 32) => elf::R_386_GOT32, + (RelocationKind::PltRelative, 32) => elf::R_386_PLT32, + (RelocationKind::GotBaseOffset, 32) => elf::R_386_GOTOFF, + (RelocationKind::GotBaseRelative, 32) => elf::R_386_GOTPC, + (RelocationKind::Absolute, 16) => elf::R_386_16, + (RelocationKind::Relative, 16) => elf::R_386_PC16, + (RelocationKind::Absolute, 8) => elf::R_386_8, + (RelocationKind::Relative, 8) => elf::R_386_PC8, + (RelocationKind::Other(x), _) => x, + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + Architecture::X86_64 => match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 64) => { + elf::R_X86_64_64 + } + (RelocationKind::Relative, _, 32) => elf::R_X86_64_PC32, + (RelocationKind::Got, _, 32) => elf::R_X86_64_GOT32, + (RelocationKind::PltRelative, _, 32) => elf::R_X86_64_PLT32, + (RelocationKind::GotRelative, _, 32) => elf::R_X86_64_GOTPCREL, + (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => { + elf::R_X86_64_32 + } + (RelocationKind::Absolute, RelocationEncoding::X86Signed, 32) => { + elf::R_X86_64_32S + } + (RelocationKind::Absolute, _, 16) => elf::R_X86_64_16, + (RelocationKind::Relative, _, 16) => elf::R_X86_64_PC16, + (RelocationKind::Absolute, _, 8) => elf::R_X86_64_8, + (RelocationKind::Relative, _, 8) => elf::R_X86_64_PC8, + (RelocationKind::Other(x), _, _) => x, + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + let r_sym = symbol_offsets[reloc.symbol.0].index; + buffer + .iowrite_with( + elf::Reloc { + r_offset: reloc.offset, + r_addend: Some(reloc.addend), + r_sym, + r_type, + }, + reloc_ctx, + ) + .unwrap(); + } + } + } + + // Write shstrtab section. + debug_assert_eq!(shstrtab_offset, buffer.len()); + buffer.extend(&shstrtab_data); + + // Write section headers. + write_align(&mut buffer, pointer_align); + debug_assert_eq!(e_shoff, buffer.len()); + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: 0, + sh_type: 0, + sh_flags: 0, + sh_addr: 0, + sh_offset: 0, + sh_size: if e_shnum >= elf::SHN_LORESERVE as usize { + e_shnum as u64 + } else { + 0 + }, + sh_link: if shstrtab_index >= elf::SHN_LORESERVE as usize { + shstrtab_index as u32 + } else { + 0 + }, + // TODO: e_phnum overflow + sh_info: 0, + sh_addralign: 0, + sh_entsize: 0, + }, + ctx, + ) + .unwrap(); + for (index, section) in self.sections.iter().enumerate() { + let sh_type = match section.kind { + SectionKind::UninitializedData | SectionKind::UninitializedTls => elf::SHT_NOBITS, + _ => elf::SHT_PROGBITS, + }; + let sh_flags = match section.kind { + SectionKind::Text => elf::SHF_ALLOC | elf::SHF_EXECINSTR, + SectionKind::Data => elf::SHF_ALLOC | elf::SHF_WRITE, + SectionKind::Tls => elf::SHF_ALLOC | elf::SHF_WRITE | elf::SHF_TLS, + SectionKind::UninitializedData => elf::SHF_ALLOC | elf::SHF_WRITE, + SectionKind::UninitializedTls => elf::SHF_ALLOC | elf::SHF_WRITE | elf::SHF_TLS, + SectionKind::ReadOnlyData => elf::SHF_ALLOC, + SectionKind::ReadOnlyString => elf::SHF_ALLOC | elf::SHF_STRINGS | elf::SHF_MERGE, + SectionKind::OtherString => elf::SHF_STRINGS | elf::SHF_MERGE, + SectionKind::Other + | SectionKind::Debug + | SectionKind::Unknown + | SectionKind::Metadata + | SectionKind::Linker => 0, + SectionKind::TlsVariables => { + return Err(format!("unimplemented section {:?}", section.kind)) + } + }; + // TODO: not sure if this is correct, maybe user should determine this + let sh_entsize = match section.kind { + SectionKind::ReadOnlyString | SectionKind::OtherString => 1, + _ => 0, + }; + let sh_name = section_offsets[index] + .str_id + .map(|id| shstrtab.get_offset(id)) + .unwrap_or(0); + buffer + .iowrite_with( + elf::SectionHeader { + sh_name, + sh_type, + sh_flags: sh_flags.into(), + sh_addr: 0, + sh_offset: section_offsets[index].offset as u64, + sh_size: section.size, + sh_link: 0, + sh_info: 0, + sh_addralign: section.align, + sh_entsize, + }, + ctx, + ) + .unwrap(); + + if !section.relocations.is_empty() { + let sh_name = section_offsets[index] + .reloc_str_id + .map(|id| shstrtab.get_offset(id)) + .unwrap_or(0); + buffer + .iowrite_with( + elf::SectionHeader { + sh_name, + sh_type: if is_rela { elf::SHT_RELA } else { elf::SHT_REL }, + sh_flags: elf::SHF_INFO_LINK.into(), + sh_addr: 0, + sh_offset: section_offsets[index].reloc_offset as u64, + sh_size: section_offsets[index].reloc_len as u64, + sh_link: symtab_index as u32, + sh_info: section_offsets[index].index as u32, + sh_addralign: pointer_align as u64, + sh_entsize: elf::Reloc::size_with(&reloc_ctx) as u64, + }, + ctx, + ) + .unwrap(); + } + } + + // Write symtab section header. + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: shstrtab.get_offset(symtab_str_id), + sh_type: elf::SHT_SYMTAB, + sh_flags: 0, + sh_addr: 0, + sh_offset: symtab_offset as u64, + sh_size: symtab_len as u64, + sh_link: strtab_index as u32, + sh_info: symtab_count_local as u32, + sh_addralign: pointer_align as u64, + sh_entsize: elf::Sym::size_with(&ctx) as u64, + }, + ctx, + ) + .unwrap(); + + // Write symtab_shndx section header. + if need_symtab_shndx { + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: shstrtab.get_offset(symtab_shndx_str_id.unwrap()), + sh_type: elf::SHT_SYMTAB_SHNDX, + sh_flags: 0, + sh_addr: 0, + sh_offset: symtab_shndx_offset as u64, + sh_size: symtab_shndx_len as u64, + sh_link: strtab_index as u32, + sh_info: symtab_count_local as u32, + sh_addralign: pointer_align as u64, + sh_entsize: elf::Sym::size_with(&ctx) as u64, + }, + ctx, + ) + .unwrap(); + } + + // Write strtab section header. + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: shstrtab.get_offset(strtab_str_id), + sh_type: elf::SHT_STRTAB, + sh_flags: 0, + sh_addr: 0, + sh_offset: strtab_offset as u64, + sh_size: strtab_data.len() as u64, + sh_link: 0, + sh_info: 0, + sh_addralign: 1, + sh_entsize: 0, + }, + ctx, + ) + .unwrap(); + + // Write shstrtab section header. + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: shstrtab.get_offset(shstrtab_str_id), + sh_type: elf::SHT_STRTAB, + sh_flags: 0, + sh_addr: 0, + sh_offset: shstrtab_offset as u64, + sh_size: shstrtab_data.len() as u64, + sh_link: 0, + sh_info: 0, + sh_addralign: 1, + sh_entsize: 0, + }, + ctx, + ) + .unwrap(); + + Ok(buffer) + } +} diff --git a/third_party/rust/object/src/write/macho.rs b/third_party/rust/object/src/write/macho.rs new file mode 100644 index 000000000000..dbbe5df49562 --- /dev/null +++ b/third_party/rust/object/src/write/macho.rs @@ -0,0 +1,449 @@ +use scroll::ctx::SizeWith; +use scroll::{IOwrite, Pwrite}; +use std::string::String; + +use crate::alloc::vec::Vec; +use crate::write::string::*; +use crate::write::util::*; +use crate::write::*; + +mod mach { + pub use goblin::mach::constants::cputype::*; + pub use goblin::mach::constants::*; + pub use goblin::mach::header::*; + pub use goblin::mach::load_command::*; + pub use goblin::mach::relocation::*; + pub use goblin::mach::segment::*; + pub use goblin::mach::symbols::*; +} + +#[derive(Default, Clone, Copy)] +struct SectionOffsets { + index: usize, + offset: usize, + address: u64, + reloc_offset: usize, +} + +#[derive(Default, Clone, Copy)] +struct SymbolOffsets { + index: usize, + str_id: Option, +} + +impl Object { + pub(crate) fn macho_segment_name(&self, segment: StandardSegment) -> &'static [u8] { + match segment { + StandardSegment::Text => &b"__TEXT"[..], + StandardSegment::Data => &b"__DATA"[..], + StandardSegment::Debug => &b"__DWARF"[..], + } + } + + pub(crate) fn macho_section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match section { + StandardSection::Text => (&b"__TEXT"[..], &b"__text"[..], SectionKind::Text), + StandardSection::Data => (&b"__DATA"[..], &b"__data"[..], SectionKind::Data), + StandardSection::ReadOnlyData => { + (&b"__TEXT"[..], &b"__const"[..], SectionKind::ReadOnlyData) + } + StandardSection::ReadOnlyDataWithRel => { + (&b"__DATA"[..], &b"__const"[..], SectionKind::ReadOnlyData) + } + StandardSection::ReadOnlyString => ( + &b"__TEXT"[..], + &b"__cstring"[..], + SectionKind::ReadOnlyString, + ), + StandardSection::UninitializedData => ( + &b"__DATA"[..], + &b"__bss"[..], + SectionKind::UninitializedData, + ), + } + } + + pub(crate) fn macho_fixup_relocation(&mut self, mut relocation: &mut Relocation) -> i64 { + let constant = match relocation.kind { + RelocationKind::Relative + | RelocationKind::GotRelative + | RelocationKind::PltRelative => relocation.addend + 4, + _ => relocation.addend, + }; + relocation.addend -= constant; + constant + } + + pub(crate) fn macho_write(&self) -> Result, String> { + let endian = match self.architecture.endianness().unwrap() { + Endianness::Little => goblin::container::Endian::Little, + Endianness::Big => goblin::container::Endian::Big, + }; + let (container, pointer_align) = match self.architecture.pointer_width().unwrap() { + PointerWidth::U16 | PointerWidth::U32 => (goblin::container::Container::Little, 4), + PointerWidth::U64 => (goblin::container::Container::Big, 8), + }; + let ctx = goblin::container::Ctx::new(container, endian); + + // Calculate offsets of everything, and build strtab. + let mut offset = 0; + + // Calculate size of Mach-O header. + offset += mach::Header::size_with(&ctx); + + // Calculate size of commands. + let mut ncmds = 0; + let command_offset = offset; + + // Calculate size of segment command and section headers. + let segment_command_offset = offset; + let segment_command_len = + mach::Segment::size_with(&ctx) + self.sections.len() * mach::Section::size_with(&ctx); + offset += segment_command_len; + ncmds += 1; + + // Calculate size of symtab command. + let symtab_command_offset = offset; + let symtab_command_len = mach::SymtabCommand::size_with(&ctx.le); + offset += symtab_command_len; + ncmds += 1; + + let sizeofcmds = offset - command_offset; + + // Calculate size of section data. + let segment_data_offset = offset; + let mut section_offsets = vec![SectionOffsets::default(); self.sections.len()]; + let mut address = 0; + for (index, section) in self.sections.iter().enumerate() { + section_offsets[index].index = 1 + index; + let len = section.data.len(); + if len != 0 { + offset = align(offset, section.align as usize); + section_offsets[index].offset = offset; + offset += len; + } else { + section_offsets[index].offset = offset; + } + address = align_u64(address, section.align); + section_offsets[index].address = address; + address += section.size; + } + let segment_data_size = offset - segment_data_offset; + + // Count symbols and add symbol strings to strtab. + let mut strtab = StringTable::default(); + let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; + let mut nsyms = 0; + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol.is_undefined() { + match symbol.kind { + SymbolKind::Text | SymbolKind::Data => {} + SymbolKind::File | SymbolKind::Section => continue, + _ => return Err(format!("unimplemented symbol {:?}", symbol)), + } + } + symbol_offsets[index].index = nsyms; + nsyms += 1; + if !symbol.name.is_empty() { + symbol_offsets[index].str_id = Some(strtab.add(&symbol.name)); + } + } + + // Calculate size of symtab. + offset = align(offset, pointer_align); + let symtab_offset = offset; + let symtab_len = nsyms * mach::Nlist::size_with(&ctx); + offset += symtab_len; + + // Calculate size of strtab. + let strtab_offset = offset; + let mut strtab_data = Vec::new(); + // Null name. + strtab_data.push(0); + strtab.write(1, &mut strtab_data); + offset += strtab_data.len(); + + // Calculate size of relocations. + for (index, section) in self.sections.iter().enumerate() { + let count = section.relocations.len(); + if count != 0 { + offset = align(offset, 4); + section_offsets[index].reloc_offset = offset; + let len = count * mach::RelocationInfo::size_with(&ctx.le); + offset += len; + } + } + + // Start writing. + let mut buffer = Vec::with_capacity(offset); + + // Write file header. + let (cputype, cpusubtype) = match self.architecture { + Architecture::Arm => (mach::CPU_TYPE_ARM, mach::CPU_SUBTYPE_ARM_ALL), + Architecture::Aarch64 => (mach::CPU_TYPE_ARM64, mach::CPU_SUBTYPE_ARM64_ALL), + Architecture::I386 => (mach::CPU_TYPE_I386, mach::CPU_SUBTYPE_I386_ALL), + Architecture::X86_64 => (mach::CPU_TYPE_X86_64, mach::CPU_SUBTYPE_X86_64_ALL), + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + + let header = mach::Header { + magic: if ctx.is_big() { + mach::MH_MAGIC_64 + } else { + mach::MH_MAGIC + }, + cputype, + cpusubtype, + filetype: mach::MH_OBJECT, + ncmds, + sizeofcmds: sizeofcmds as u32, + flags: if self.subsection_via_symbols { + mach::MH_SUBSECTIONS_VIA_SYMBOLS + } else { + 0 + }, + reserved: 0, + }; + buffer.iowrite_with(header, ctx).unwrap(); + + // Write segment command. + debug_assert_eq!(segment_command_offset, buffer.len()); + let mut segment_command = mach::Segment::new(ctx, &[]); + segment_command.cmd = if ctx.is_big() { + mach::LC_SEGMENT_64 + } else { + mach::LC_SEGMENT + }; + segment_command.cmdsize = segment_command_len as u32; + segment_command.segname = [0; 16]; + segment_command.vmaddr = 0; + segment_command.vmsize = address; + segment_command.fileoff = segment_data_offset as u64; + segment_command.filesize = segment_data_size as u64; + segment_command.maxprot = mach::VM_PROT_READ | mach::VM_PROT_WRITE | mach::VM_PROT_EXECUTE; + segment_command.initprot = mach::VM_PROT_READ | mach::VM_PROT_WRITE | mach::VM_PROT_EXECUTE; + segment_command.nsects = self.sections.len() as u32; + segment_command.flags = 0; + buffer.iowrite_with(segment_command, ctx).unwrap(); + + // Write section headers. + for (index, section) in self.sections.iter().enumerate() { + let mut sectname = [0; 16]; + sectname.pwrite(&*section.name, 0).unwrap(); + let mut segname = [0; 16]; + segname.pwrite(&*section.segment, 0).unwrap(); + let flags = match section.kind { + SectionKind::Text => { + mach::S_ATTR_PURE_INSTRUCTIONS | mach::S_ATTR_SOME_INSTRUCTIONS + } + SectionKind::Data => 0, + SectionKind::ReadOnlyData => 0, + SectionKind::ReadOnlyString => mach::S_CSTRING_LITERALS, + SectionKind::UninitializedData => mach::S_ZEROFILL, + SectionKind::Tls => mach::S_THREAD_LOCAL_REGULAR, + SectionKind::UninitializedTls => mach::S_THREAD_LOCAL_ZEROFILL, + SectionKind::TlsVariables => mach::S_THREAD_LOCAL_VARIABLES, + SectionKind::Debug => mach::S_ATTR_DEBUG, + SectionKind::OtherString => mach::S_CSTRING_LITERALS, + SectionKind::Other + | SectionKind::Unknown + | SectionKind::Linker + | SectionKind::Metadata => 0, + }; + buffer + .iowrite_with( + mach::Section { + sectname, + segname, + addr: section_offsets[index].address, + size: section.size, + offset: section_offsets[index].offset as u32, + align: section.align.trailing_zeros(), + reloff: section_offsets[index].reloc_offset as u32, + nreloc: section.relocations.len() as u32, + flags, + }, + ctx, + ) + .unwrap(); + } + + // Write symtab command. + debug_assert_eq!(symtab_command_offset, buffer.len()); + buffer + .iowrite_with( + mach::SymtabCommand { + cmd: mach::LC_SYMTAB, + cmdsize: symtab_command_len as u32, + symoff: symtab_offset as u32, + nsyms: nsyms as u32, + stroff: strtab_offset as u32, + strsize: strtab_data.len() as u32, + }, + ctx.le, + ) + .unwrap(); + + // Write section data. + debug_assert_eq!(segment_data_offset, buffer.len()); + for (index, section) in self.sections.iter().enumerate() { + let len = section.data.len(); + if len != 0 { + write_align(&mut buffer, section.align as usize); + debug_assert_eq!(section_offsets[index].offset, buffer.len()); + buffer.extend(§ion.data); + } + } + + // Write symtab. + write_align(&mut buffer, pointer_align); + debug_assert_eq!(symtab_offset, buffer.len()); + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol.is_undefined() { + match symbol.kind { + SymbolKind::Text | SymbolKind::Data => {} + SymbolKind::File | SymbolKind::Section => continue, + _ => return Err(format!("unimplemented symbol {:?}", symbol)), + } + } + // TODO: N_STAB + // TODO: N_ABS + let mut n_type = if symbol.is_undefined() { + mach::N_UNDF | mach::N_EXT + } else { + mach::N_SECT + }; + match symbol.scope { + SymbolScope::Unknown | SymbolScope::Compilation => {} + SymbolScope::Linkage => { + n_type |= mach::N_EXT | mach::N_PEXT; + } + SymbolScope::Dynamic => { + n_type |= mach::N_EXT; + } + } + + let mut n_desc = 0; + if symbol.weak { + if symbol.is_undefined() { + n_desc |= mach::N_WEAK_REF; + } else { + n_desc |= mach::N_WEAK_DEF; + } + } + + let n_value = match symbol.section { + Some(section) => section_offsets[section.0].address + symbol.value, + None => symbol.value, + }; + + let n_strx = symbol_offsets[index] + .str_id + .map(|id| strtab.get_offset(id)) + .unwrap_or(0); + + buffer + .iowrite_with( + mach::Nlist { + n_strx, + n_type, + n_sect: symbol.section.map(|x| x.0 + 1).unwrap_or(0), + n_desc, + n_value, + }, + ctx, + ) + .unwrap(); + } + + // Write strtab. + debug_assert_eq!(strtab_offset, buffer.len()); + buffer.extend(&strtab_data); + + // Write relocations. + for (index, section) in self.sections.iter().enumerate() { + if !section.relocations.is_empty() { + write_align(&mut buffer, 4); + debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); + for reloc in §ion.relocations { + let r_extern; + let r_symbolnum; + let symbol = &self.symbols[reloc.symbol.0]; + if symbol.kind == SymbolKind::Section { + r_symbolnum = section_offsets[symbol.section.unwrap().0].index as u32; + r_extern = 0; + } else { + r_symbolnum = symbol_offsets[reloc.symbol.0].index as u32; + r_extern = 1; + } + let r_length = match reloc.size { + 8 => 0, + 16 => 1, + 32 => 2, + 64 => 3, + _ => return Err(format!("unimplemented reloc size {:?}", reloc)), + }; + let (r_pcrel, r_type) = match self.architecture { + Architecture::I386 => match reloc.kind { + RelocationKind::Absolute => (0, mach::GENERIC_RELOC_VANILLA), + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + Architecture::X86_64 => match (reloc.kind, reloc.encoding, reloc.addend) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 0) => { + (0, mach::X86_64_RELOC_UNSIGNED) + } + (RelocationKind::Relative, RelocationEncoding::X86RipRelative, -4) => { + (1, mach::X86_64_RELOC_SIGNED) + } + (RelocationKind::Relative, RelocationEncoding::X86Branch, -4) => { + (1, mach::X86_64_RELOC_BRANCH) + } + (RelocationKind::PltRelative, RelocationEncoding::X86Branch, -4) => { + (1, mach::X86_64_RELOC_BRANCH) + } + ( + RelocationKind::GotRelative, + RelocationEncoding::X86RipRelativeMovq, + -4, + ) => (1, mach::X86_64_RELOC_GOT_LOAD), + (RelocationKind::GotRelative, RelocationEncoding::Generic, -4) => { + (1, mach::X86_64_RELOC_GOT) + } + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + let r_info = r_symbolnum + | r_pcrel << 24 + | r_length << 25 + | r_extern << 27 + | u32::from(r_type) << 28; + buffer + .iowrite_with( + mach::RelocationInfo { + r_address: reloc.offset as i32, + r_info, + }, + ctx.le, + ) + .unwrap(); + } + } + } + + Ok(buffer) + } +} diff --git a/third_party/rust/object/src/write/mod.rs b/third_party/rust/object/src/write/mod.rs new file mode 100644 index 000000000000..ce12ee6a59b4 --- /dev/null +++ b/third_party/rust/object/src/write/mod.rs @@ -0,0 +1,532 @@ +//! Interface for writing object files. + +#![allow(clippy::collapsible_if)] +#![allow(clippy::cognitive_complexity)] +#![allow(clippy::module_inception)] + +use scroll::Pwrite; +use std::collections::HashMap; +use std::string::String; + +use crate::alloc::vec::Vec; +use crate::target_lexicon::{Architecture, BinaryFormat, Endianness, PointerWidth}; +use crate::{RelocationEncoding, RelocationKind, SectionKind, SymbolKind, SymbolScope}; + +mod coff; +mod elf; +mod macho; +mod string; +mod util; + +/// A writable object file. +#[derive(Debug)] +pub struct Object { + format: BinaryFormat, + architecture: Architecture, + sections: Vec
, + standard_sections: HashMap, + symbols: Vec, + symbol_map: HashMap, SymbolId>, + stub_symbols: HashMap, + subsection_via_symbols: bool, +} + +impl Object { + /// Create an empty object file. + pub fn new(format: BinaryFormat, architecture: Architecture) -> Object { + Object { + format, + architecture, + sections: Vec::new(), + standard_sections: HashMap::new(), + symbols: Vec::new(), + symbol_map: HashMap::new(), + stub_symbols: HashMap::new(), + subsection_via_symbols: false, + } + } + + /// Return the file format. + #[inline] + pub fn format(&self) -> BinaryFormat { + self.format + } + + /// Return the architecture. + #[inline] + pub fn architecture(&self) -> Architecture { + self.architecture + } + + /// Return the name for a standard segment. + /// + /// This will vary based on the file format. + pub fn segment_name(&self, segment: StandardSegment) -> &'static [u8] { + match self.format { + BinaryFormat::Elf | BinaryFormat::Coff => &[], + BinaryFormat::Macho => self.macho_segment_name(segment), + _ => unimplemented!(), + } + } + + /// Get the section with the given `SectionId`. + #[inline] + pub fn section(&self, section: SectionId) -> &Section { + &self.sections[section.0] + } + + /// Mutably get the section with the given `SectionId`. + #[inline] + pub fn section_mut(&mut self, section: SectionId) -> &mut Section { + &mut self.sections[section.0] + } + + /// Append data to an existing section. Returns the section offset of the data. + pub fn append_section_data(&mut self, section: SectionId, data: &[u8], align: u64) -> u64 { + self.sections[section.0].append_data(data, align) + } + + /// Append zero-initialized data to an existing section. Returns the section offset of the data. + pub fn append_section_bss(&mut self, section: SectionId, size: u64, align: u64) -> u64 { + self.sections[section.0].append_bss(size, align) + } + + /// Return the `SectionId` of a standard section. + /// + /// If the section doesn't already exist then it is created. + pub fn section_id(&mut self, section: StandardSection) -> SectionId { + self.standard_sections + .get(§ion) + .cloned() + .unwrap_or_else(|| { + let (segment, name, kind) = self.section_info(section); + self.add_section(segment.to_vec(), name.to_vec(), kind) + }) + } + + /// Add a new section and return its `SectionId`. + /// + /// This also creates a section symbol. + pub fn add_section(&mut self, segment: Vec, name: Vec, kind: SectionKind) -> SectionId { + let id = SectionId(self.sections.len()); + self.sections.push(Section { + segment, + name, + kind, + size: 0, + align: 1, + data: Vec::new(), + relocations: Vec::new(), + symbol: None, + }); + + // Add to self.standard_sections if required. This may match multiple standard sections. + let section = &self.sections[id.0]; + for standard_section in StandardSection::all() { + if !self.standard_sections.contains_key(standard_section) { + let (segment, name, kind) = self.section_info(*standard_section); + if segment == &*section.segment && name == &*section.name && kind == section.kind { + self.standard_sections.insert(*standard_section, id); + } + } + } + + id + } + + fn section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match self.format { + BinaryFormat::Elf => self.elf_section_info(section), + BinaryFormat::Coff => self.coff_section_info(section), + BinaryFormat::Macho => self.macho_section_info(section), + _ => unimplemented!(), + } + } + + /// Add a subsection. Returns the `SectionId` and section offset of the data. + pub fn add_subsection( + &mut self, + section: StandardSection, + name: &[u8], + data: &[u8], + align: u64, + ) -> (SectionId, u64) { + let section_id = if self.has_subsection_via_symbols() { + self.subsection_via_symbols = true; + self.section_id(section) + } else { + let (segment, name, kind) = self.subsection_info(section, name); + self.add_section(segment.to_vec(), name, kind) + }; + let offset = self.append_section_data(section_id, data, align); + (section_id, offset) + } + + fn has_subsection_via_symbols(&self) -> bool { + match self.format { + BinaryFormat::Elf | BinaryFormat::Coff => false, + BinaryFormat::Macho => true, + _ => unimplemented!(), + } + } + + fn subsection_info( + &self, + section: StandardSection, + value: &[u8], + ) -> (&'static [u8], Vec, SectionKind) { + let (segment, section, kind) = self.section_info(section); + let name = self.subsection_name(section, value); + (segment, name, kind) + } + + fn subsection_name(&self, section: &[u8], value: &[u8]) -> Vec { + debug_assert!(!self.has_subsection_via_symbols()); + match self.format { + BinaryFormat::Elf => self.elf_subsection_name(section, value), + BinaryFormat::Coff => self.coff_subsection_name(section, value), + _ => unimplemented!(), + } + } + + /// Get the `SymbolId` of the symbol with the given name. + pub fn symbol_id(&self, name: &[u8]) -> Option { + self.symbol_map.get(name).cloned() + } + + /// Get the symbol with the given `SymbolId`. + #[inline] + pub fn symbol(&self, symbol: SymbolId) -> &Symbol { + &self.symbols[symbol.0] + } + + /// Mutably get the symbol with the given `SymbolId`. + #[inline] + pub fn symbol_mut(&mut self, symbol: SymbolId) -> &mut Symbol { + &mut self.symbols[symbol.0] + } + + /// Add a new symbol and return its `SymbolId`. + pub fn add_symbol(&mut self, symbol: Symbol) -> SymbolId { + // Defined symbols must have a scope. + debug_assert!(symbol.is_undefined() || symbol.scope != SymbolScope::Unknown); + if symbol.kind == SymbolKind::Section { + return self.section_symbol(symbol.section.unwrap()); + } + let symbol_id = SymbolId(self.symbols.len()); + if !symbol.name.is_empty() { + self.symbol_map.insert(symbol.name.clone(), symbol_id); + } + self.symbols.push(symbol); + symbol_id + } + + /// Add a new file symbol and return its `SymbolId`. + pub fn add_file_symbol(&mut self, name: Vec) -> SymbolId { + self.add_symbol(Symbol { + name, + value: 0, + size: 0, + kind: SymbolKind::File, + scope: SymbolScope::Compilation, + weak: false, + section: None, + }) + } + + /// Get the symbol for a section. + pub fn section_symbol(&mut self, section_id: SectionId) -> SymbolId { + let section = &mut self.sections[section_id.0]; + if let Some(symbol) = section.symbol { + return symbol; + } + let name = if self.format == BinaryFormat::Coff { + section.name.clone() + } else { + Vec::new() + }; + let symbol_id = SymbolId(self.symbols.len()); + self.symbols.push(Symbol { + name, + value: 0, + size: 0, + kind: SymbolKind::Section, + scope: SymbolScope::Compilation, + weak: false, + section: Some(section_id), + }); + section.symbol = Some(symbol_id); + symbol_id + } + + /// Append data to an existing section, and update a symbol to refer to it. + /// + /// Returns the section offset of the data. + pub fn add_symbol_data( + &mut self, + symbol: SymbolId, + section: SectionId, + data: &[u8], + align: u64, + ) -> u64 { + let offset = self.append_section_data(section, data, align); + let symbol = self.symbol_mut(symbol); + symbol.value = offset; + symbol.size = data.len() as u64; + symbol.section = Some(section); + offset + } + + /// Add a relocation to a section. + /// + /// Relocations must only be added after the referenced symbols have been added + /// and defined (if applicable). + pub fn add_relocation( + &mut self, + section: SectionId, + mut relocation: Relocation, + ) -> Result<(), String> { + let addend = match self.format { + BinaryFormat::Elf => self.elf_fixup_relocation(&mut relocation)?, + BinaryFormat::Coff => self.coff_fixup_relocation(&mut relocation), + BinaryFormat::Macho => self.macho_fixup_relocation(&mut relocation), + _ => unimplemented!(), + }; + if addend != 0 { + self.write_relocation_addend(section, &relocation, addend)?; + } + self.sections[section.0].relocations.push(relocation); + Ok(()) + } + + fn write_relocation_addend( + &mut self, + section: SectionId, + relocation: &Relocation, + addend: i64, + ) -> Result<(), String> { + let endian = match self.architecture.endianness().unwrap() { + Endianness::Little => scroll::LE, + Endianness::Big => scroll::BE, + }; + + let data = &mut self.sections[section.0].data; + if relocation.offset + (u64::from(relocation.size) + 7) / 8 > data.len() as u64 { + return Err(format!( + "invalid relocation offset {}+{} (max {})", + relocation.offset, + relocation.size, + data.len() + )); + } + match relocation.size { + 32 => { + data.pwrite_with(addend as i32, relocation.offset as usize, endian) + .unwrap(); + } + 64 => { + data.pwrite_with(addend, relocation.offset as usize, endian) + .unwrap(); + } + _ => return Err(format!("unimplemented relocation addend {:?}", relocation)), + } + Ok(()) + } + + /// Write the object to a `Vec`. + pub fn write(&self) -> Result, String> { + match self.format { + BinaryFormat::Elf => self.elf_write(), + BinaryFormat::Coff => self.coff_write(), + BinaryFormat::Macho => self.macho_write(), + _ => unimplemented!(), + } + } +} + +/// A standard segment kind. +#[allow(missing_docs)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum StandardSegment { + Text, + Data, + Debug, +} + +/// A standard section kind. +#[allow(missing_docs)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum StandardSection { + Text, + Data, + ReadOnlyData, + ReadOnlyDataWithRel, + ReadOnlyString, + UninitializedData, +} + +impl StandardSection { + /// Return the section kind of a standard section. + pub fn kind(self) -> SectionKind { + match self { + StandardSection::Text => SectionKind::Text, + StandardSection::Data => SectionKind::Data, + StandardSection::ReadOnlyData | StandardSection::ReadOnlyDataWithRel => { + SectionKind::ReadOnlyData + } + StandardSection::ReadOnlyString => SectionKind::ReadOnlyString, + StandardSection::UninitializedData => SectionKind::UninitializedData, + } + } + + fn all() -> &'static [StandardSection] { + &[ + StandardSection::Text, + StandardSection::Data, + StandardSection::ReadOnlyData, + StandardSection::ReadOnlyDataWithRel, + StandardSection::ReadOnlyString, + StandardSection::UninitializedData, + ] + } +} + +/// An identifier used to reference a section. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SectionId(usize); + +/// A section in an object file. +#[derive(Debug)] +pub struct Section { + segment: Vec, + name: Vec, + kind: SectionKind, + size: u64, + align: u64, + data: Vec, + relocations: Vec, + symbol: Option, +} + +impl Section { + /// Return true if this section contains uninitialized data. + #[inline] + pub fn is_bss(&self) -> bool { + self.kind == SectionKind::UninitializedData || self.kind == SectionKind::UninitializedTls + } + + /// Set the data for a section. + /// + /// Must not be called for sections that already have data, or that contain uninitialized data. + pub fn set_data(&mut self, data: Vec, align: u64) { + debug_assert!(!self.is_bss()); + debug_assert_eq!(align & (align - 1), 0); + debug_assert!(self.data.is_empty()); + self.size = data.len() as u64; + self.data = data; + self.align = align; + } + + /// Append data to a section. + /// + /// Must not be called for sections that contain uninitialized data. + pub fn append_data(&mut self, data: &[u8], align: u64) -> u64 { + debug_assert!(!self.is_bss()); + debug_assert_eq!(align & (align - 1), 0); + if self.align < align { + self.align = align; + } + let align = align as usize; + let mut offset = self.data.len(); + if offset & (align - 1) != 0 { + offset += align - (offset & (align - 1)); + self.data.resize(offset, 0); + } + self.data.extend(data); + self.size = self.data.len() as u64; + offset as u64 + } + + /// Append unitialized data to a section. + /// + /// Must not be called for sections that contain initialized data. + pub fn append_bss(&mut self, size: u64, align: u64) -> u64 { + debug_assert!(self.is_bss()); + debug_assert_eq!(align & (align - 1), 0); + if self.align < align { + self.align = align; + } + let mut offset = self.size; + if offset & (align - 1) != 0 { + offset += align - (offset & (align - 1)); + self.size = offset; + } + self.size += size; + offset as u64 + } +} + +/// An identifier used to reference a symbol. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SymbolId(usize); + +/// A symbol in an object file. +#[derive(Debug)] +pub struct Symbol { + /// The name of the symbol. + pub name: Vec, + /// The value of the symbol. + /// + /// If the symbol defined in a section, then this is the section offset of the symbol. + pub value: u64, + /// The size of the symbol. + pub size: u64, + /// The kind of the symbol. + pub kind: SymbolKind, + /// The scope of the symbol. + pub scope: SymbolScope, + /// Whether the symbol has weak binding. + pub weak: bool, + /// The section containing the symbol. + /// + /// Set to `None` for undefined symbols. + pub section: Option, +} + +impl Symbol { + /// Return true if the symbol is undefined. + #[inline] + pub fn is_undefined(&self) -> bool { + self.section.is_none() + } + + /// Return true if the symbol scope is local. + #[inline] + pub fn is_local(&self) -> bool { + self.scope == SymbolScope::Compilation + } +} + +/// A relocation in an object file. +#[derive(Debug)] +pub struct Relocation { + /// The section offset of the place of the relocation. + pub offset: u64, + /// The size in bits of the place of relocation. + pub size: u8, + /// The operation used to calculate the result of the relocation. + pub kind: RelocationKind, + /// Information about how the result of the relocation operation is encoded in the place. + pub encoding: RelocationEncoding, + /// The symbol referred to by the relocation. + /// + /// This may be a section symbol. + pub symbol: SymbolId, + /// The addend to use in the relocation calculation. + /// + /// This may be in addition to an implicit addend stored at the place of the relocation. + pub addend: i64, +} diff --git a/third_party/rust/object/src/write/string.rs b/third_party/rust/object/src/write/string.rs new file mode 100644 index 000000000000..6a7ac31fe603 --- /dev/null +++ b/third_party/rust/object/src/write/string.rs @@ -0,0 +1,140 @@ +use indexmap::IndexSet; + +use crate::alloc::vec::Vec; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct StringId(usize); + +#[derive(Debug, Default)] +pub(crate) struct StringTable<'a> { + strings: IndexSet<&'a [u8]>, + offsets: Vec, +} + +impl<'a> StringTable<'a> { + /// Add a string to the string table. + /// + /// Panics if the string table has already been written, or + /// if the string contains a null byte. + pub fn add(&mut self, string: &'a [u8]) -> StringId { + assert!(self.offsets.is_empty()); + assert!(!string.contains(&0)); + let id = self.strings.insert_full(string).0; + StringId(id) + } + + /// Return the offset of the given string. + /// + /// Panics if the string table has not been written, or + /// if the string is not in the string table. + pub fn get_offset(&self, id: StringId) -> usize { + self.offsets[id.0] + } + + /// Append the string table to the given `Vec`, and + /// calculate the list of string offsets. + /// + /// `base` is the initial string table offset. For example, + /// this should be 1 for ELF, to account for the initial + /// null byte (which must have been written by the caller). + pub fn write(&mut self, base: usize, w: &mut Vec) { + assert!(self.offsets.is_empty()); + + let mut ids: Vec<_> = (0..self.strings.len()).collect(); + sort(&mut ids, 1, &self.strings); + + self.offsets = vec![0; ids.len()]; + let mut offset = base; + let mut previous = &[][..]; + for id in ids { + let string = self.strings.get_index(id).unwrap(); + if previous.ends_with(string) { + self.offsets[id] = offset - string.len() - 1; + } else { + self.offsets[id] = offset; + w.extend_from_slice(string); + w.push(0); + offset += string.len() + 1; + previous = string; + } + } + } +} + +// Multi-key quicksort. +// +// Ordering is such that if a string is a suffix of at least one other string, +// then it is placed immediately after one of those strings. That is: +// - comparison starts at the end of the string +// - shorter strings come later +// +// Based on the implementation in LLVM. +fn sort(mut ids: &mut [usize], mut pos: usize, strings: &IndexSet<&[u8]>) { + loop { + if ids.len() <= 1 { + return; + } + + let pivot = byte(ids[0], pos, strings); + let mut lower = 0; + let mut upper = ids.len(); + let mut i = 1; + while i < upper { + let b = byte(ids[i], pos, strings); + if b > pivot { + ids.swap(lower, i); + lower += 1; + i += 1; + } else if b < pivot { + upper -= 1; + ids.swap(upper, i); + } else { + i += 1; + } + } + + sort(&mut ids[..lower], pos, strings); + sort(&mut ids[upper..], pos, strings); + + if pivot == 0 { + return; + } + ids = &mut ids[lower..upper]; + pos += 1; + } +} + +fn byte(id: usize, pos: usize, strings: &IndexSet<&[u8]>) -> u8 { + let string = strings.get_index(id).unwrap(); + let len = string.len(); + if len >= pos { + string[len - pos] + } else { + // We know the strings don't contain null bytes. + 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn string_table() { + let mut table = StringTable::default(); + let id0 = table.add(b""); + let id1 = table.add(b"foo"); + let id2 = table.add(b"bar"); + let id3 = table.add(b"foobar"); + + let mut data = Vec::new(); + data.push(0); + table.write(1, &mut data); + assert_eq!(data, b"\0foobar\0foo\0"); + + assert_eq!(table.get_offset(id0), 11); + assert_eq!(table.get_offset(id1), 8); + assert_eq!(table.get_offset(id2), 4); + assert_eq!(table.get_offset(id3), 1); + } +} diff --git a/third_party/rust/object/src/write/util.rs b/third_party/rust/object/src/write/util.rs new file mode 100644 index 000000000000..0ac65424af11 --- /dev/null +++ b/third_party/rust/object/src/write/util.rs @@ -0,0 +1,14 @@ +use crate::alloc::vec::Vec; + +pub(crate) fn align(offset: usize, size: usize) -> usize { + (offset + (size - 1)) & !(size - 1) +} + +pub(crate) fn align_u64(offset: u64, size: u64) -> u64 { + (offset + (size - 1)) & !(size - 1) +} + +pub(crate) fn write_align(buffer: &mut Vec, size: usize) { + let offset = align(buffer.len(), size); + buffer.resize(offset, 0); +} diff --git a/third_party/rust/object/tests/round_trip.rs b/third_party/rust/object/tests/round_trip.rs new file mode 100644 index 000000000000..d15362ba4790 --- /dev/null +++ b/third_party/rust/object/tests/round_trip.rs @@ -0,0 +1,246 @@ +#![cfg(all(feature = "read", feature = "write"))] + +use object::read::{Object, ObjectSection}; +use object::{read, write}; +use object::{RelocationEncoding, RelocationKind, SectionKind, SymbolKind, SymbolScope}; +use target_lexicon::{Architecture, BinaryFormat}; + +#[test] +fn coff_x86_64() { + let mut object = write::Object::new(BinaryFormat::Coff, Architecture::X86_64); + + let text = object.section_id(write::StandardSection::Text); + object.append_section_data(text, &[1; 30], 4); + + let func1_offset = object.append_section_data(text, &[1; 30], 4); + assert_eq!(func1_offset, 32); + let func1_symbol = object.add_symbol(write::Symbol { + name: b"func1".to_vec(), + value: func1_offset, + size: 32, + kind: SymbolKind::Text, + scope: SymbolScope::Linkage, + weak: false, + section: Some(text), + }); + object + .add_relocation( + text, + write::Relocation { + offset: 8, + size: 64, + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + symbol: func1_symbol, + addend: 0, + }, + ) + .unwrap(); + + let bytes = object.write().unwrap(); + let object = read::File::parse(&bytes).unwrap(); + assert_eq!(object.format(), BinaryFormat::Coff); + assert_eq!(object.architecture(), Architecture::X86_64); + + let mut sections = object.sections(); + + let text = sections.next().unwrap(); + println!("{:?}", text); + let text_index = text.index(); + assert_eq!(text.name(), Some(".text")); + assert_eq!(text.kind(), SectionKind::Text); + assert_eq!(text.address(), 0); + assert_eq!(text.size(), 62); + + let mut symbols = object.symbols(); + + let (func1_symbol, symbol) = symbols.next().unwrap(); + println!("{:?}", symbol); + assert_eq!(symbol.name(), Some("func1")); + assert_eq!(symbol.address(), func1_offset); + assert_eq!(symbol.kind(), SymbolKind::Text); + assert_eq!(symbol.section_index(), Some(text_index)); + assert_eq!(symbol.scope(), SymbolScope::Linkage); + assert_eq!(symbol.is_weak(), false); + assert_eq!(symbol.is_undefined(), false); + + let mut relocations = text.relocations(); + + let (offset, relocation) = relocations.next().unwrap(); + println!("{:?}", relocation); + assert_eq!(offset, 8); + assert_eq!(relocation.kind(), RelocationKind::Absolute); + assert_eq!(relocation.encoding(), RelocationEncoding::Generic); + assert_eq!(relocation.size(), 64); + assert_eq!( + relocation.target(), + read::RelocationTarget::Symbol(func1_symbol) + ); + assert_eq!(relocation.addend(), 0); +} + +#[test] +fn elf_x86_64() { + let mut object = write::Object::new(BinaryFormat::Elf, Architecture::X86_64); + + let text = object.section_id(write::StandardSection::Text); + object.append_section_data(text, &[1; 30], 4); + + let func1_offset = object.append_section_data(text, &[1; 30], 4); + assert_eq!(func1_offset, 32); + let func1_symbol = object.add_symbol(write::Symbol { + name: b"func1".to_vec(), + value: func1_offset, + size: 32, + kind: SymbolKind::Text, + scope: SymbolScope::Linkage, + weak: false, + section: Some(text), + }); + object + .add_relocation( + text, + write::Relocation { + offset: 8, + size: 64, + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + symbol: func1_symbol, + addend: 0, + }, + ) + .unwrap(); + + let bytes = object.write().unwrap(); + let object = read::File::parse(&bytes).unwrap(); + assert_eq!(object.format(), BinaryFormat::Elf); + assert_eq!(object.architecture(), Architecture::X86_64); + + let mut sections = object.sections(); + + let section = sections.next().unwrap(); + println!("{:?}", text); + assert_eq!(section.name(), Some("")); + assert_eq!(section.kind(), SectionKind::Metadata); + assert_eq!(section.address(), 0); + assert_eq!(section.size(), 0); + + let text = sections.next().unwrap(); + println!("{:?}", text); + let text_index = text.index(); + assert_eq!(text.name(), Some(".text")); + assert_eq!(text.kind(), SectionKind::Text); + assert_eq!(text.address(), 0); + assert_eq!(text.size(), 62); + + let mut symbols = object.symbols(); + + let (_, symbol) = symbols.next().unwrap(); + println!("{:?}", symbol); + assert_eq!(symbol.name(), Some("")); + assert_eq!(symbol.address(), 0); + assert_eq!(symbol.kind(), SymbolKind::Null); + assert_eq!(symbol.section_index(), None); + assert_eq!(symbol.scope(), SymbolScope::Unknown); + assert_eq!(symbol.is_weak(), false); + assert_eq!(symbol.is_undefined(), true); + + let (func1_symbol, symbol) = symbols.next().unwrap(); + println!("{:?}", symbol); + assert_eq!(symbol.name(), Some("func1")); + assert_eq!(symbol.address(), func1_offset); + assert_eq!(symbol.kind(), SymbolKind::Text); + assert_eq!(symbol.section_index(), Some(text_index)); + assert_eq!(symbol.scope(), SymbolScope::Linkage); + assert_eq!(symbol.is_weak(), false); + assert_eq!(symbol.is_undefined(), false); + + let mut relocations = text.relocations(); + + let (offset, relocation) = relocations.next().unwrap(); + println!("{:?}", relocation); + assert_eq!(offset, 8); + assert_eq!(relocation.kind(), RelocationKind::Absolute); + assert_eq!(relocation.encoding(), RelocationEncoding::Generic); + assert_eq!(relocation.size(), 64); + assert_eq!( + relocation.target(), + read::RelocationTarget::Symbol(func1_symbol) + ); + assert_eq!(relocation.addend(), 0); +} + +#[test] +fn macho_x86_64() { + let mut object = write::Object::new(BinaryFormat::Macho, Architecture::X86_64); + + let text = object.section_id(write::StandardSection::Text); + object.append_section_data(text, &[1; 30], 4); + + let func1_offset = object.append_section_data(text, &[1; 30], 4); + assert_eq!(func1_offset, 32); + let func1_symbol = object.add_symbol(write::Symbol { + name: b"func1".to_vec(), + value: func1_offset, + size: 32, + kind: SymbolKind::Text, + scope: SymbolScope::Linkage, + weak: false, + section: Some(text), + }); + object + .add_relocation( + text, + write::Relocation { + offset: 8, + size: 64, + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + symbol: func1_symbol, + addend: 0, + }, + ) + .unwrap(); + + let bytes = object.write().unwrap(); + let object = read::File::parse(&bytes).unwrap(); + assert_eq!(object.format(), BinaryFormat::Macho); + assert_eq!(object.architecture(), Architecture::X86_64); + + let mut sections = object.sections(); + + let text = sections.next().unwrap(); + println!("{:?}", text); + let text_index = text.index(); + assert_eq!(text.name(), Some("__text")); + assert_eq!(text.segment_name(), Some("__TEXT")); + assert_eq!(text.kind(), SectionKind::Text); + assert_eq!(text.address(), 0); + assert_eq!(text.size(), 62); + + let mut symbols = object.symbols(); + + let (func1_symbol, symbol) = symbols.next().unwrap(); + println!("{:?}", symbol); + assert_eq!(symbol.name(), Some("func1")); + assert_eq!(symbol.address(), func1_offset); + assert_eq!(symbol.kind(), SymbolKind::Text); + assert_eq!(symbol.section_index(), Some(text_index)); + assert_eq!(symbol.scope(), SymbolScope::Linkage); + assert_eq!(symbol.is_weak(), false); + assert_eq!(symbol.is_undefined(), false); + + let mut relocations = text.relocations(); + + let (offset, relocation) = relocations.next().unwrap(); + println!("{:?}", relocation); + assert_eq!(offset, 8); + assert_eq!(relocation.kind(), RelocationKind::Absolute); + assert_eq!(relocation.encoding(), RelocationEncoding::Generic); + assert_eq!(relocation.size(), 64); + assert_eq!( + relocation.target(), + read::RelocationTarget::Symbol(func1_symbol) + ); + assert_eq!(relocation.addend(), 0); +}