Bug 1878375 - Synchronize vendored Rust libraries with mozilla-central. r=mkmelin
mozilla-central: c845f32071817c81069390d3ba4f06b0ee270c20 comm-central: 47264167d5a5bcbee95f2f854aeb5f7c355361af Differential Revision: https://phabricator.services.mozilla.com/D229450 --HG-- extra : amend_source : 8d7e9ffbf2d4b3f4295a8e85f321ad5309628d35
This commit is contained in:
Родитель
983bf5e47c
Коммит
71c23c3b82
|
@ -1336,9 +1336,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.6"
|
||||
version = "0.10.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f"
|
||||
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"crypto-common",
|
||||
|
@ -4836,9 +4836,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.10.6"
|
||||
version = "0.10.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0"
|
||||
checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"mc_workspace_toml": "a60cafa6802cfd0786deb79522c80105ee41671e7c00a3870db4e3ae77d866233bd96e36d000234f719a05edc31d10947590c8f363b715e3d0a60f2a3ea42b2e", "mc_gkrust_toml": "f7fa79da556c7387a40a59d5658dac7883af9bf3ab97a2efb3888738efa8f68c824af55252719afa69213ba3b482ead07229a32bd9c41212f5cc058c6a93e246", "mc_hack_toml": "94703992b6fbcc5190ed1044f8390b072a7897a2c297361ad9e721bbaaa336689a89174cf7d339339ef885331c0c358fd363c4b14fb03e25a78875b66d3d575e", "mc_cargo_lock": "21960c4bb63dafb7d31bef46652353562a118bf4d8a605c43911a1953155d614b47a92fedc5d52c3e76259fd8250bfcf0e824ab17643b8fd2733f6ee2b7e6cae"}
|
||||
{"mc_workspace_toml": "a60cafa6802cfd0786deb79522c80105ee41671e7c00a3870db4e3ae77d866233bd96e36d000234f719a05edc31d10947590c8f363b715e3d0a60f2a3ea42b2e", "mc_gkrust_toml": "f7fa79da556c7387a40a59d5658dac7883af9bf3ab97a2efb3888738efa8f68c824af55252719afa69213ba3b482ead07229a32bd9c41212f5cc058c6a93e246", "mc_hack_toml": "94703992b6fbcc5190ed1044f8390b072a7897a2c297361ad9e721bbaaa336689a89174cf7d339339ef885331c0c358fd363c4b14fb03e25a78875b66d3d575e", "mc_cargo_lock": "2aa755044a6883c0c96b71d30c360ec598b92efc48fa6369b48c7ad9d6c6d4bd96ea2d92e93db40d0a9477c822dae1d1ad731881fab6021ce64250cda41bc553"}
|
|
@ -1 +1 @@
|
|||
{"files":{"CHANGELOG.md":"cba0482b4328c05f545e94d6fea5d068b8c2e8c27abec3851b8fb567c6a0f562","Cargo.toml":"be0df25f7235deb18a52323de163e63bd5aefe4ad91ed276022d4757ccddeece","LICENSE-APACHE":"a9040321c3712d8fd0b09cf52b17445de04a23a10165049ae187cd39e5c86be5","LICENSE-MIT":"9e0dfd2dd4173a530e238cb6adb37aa78c34c6bc7444e0e10c1ab5d8881f63ba","README.md":"edf9f16c57466b06d201b8646182b7332324c7aba28f832dde7f57d03249637d","src/core_api.rs":"b52728aba8a84f980f3f9cc8a94a64d3a97f1eb5f4db144904822c2f8eefb1f8","src/core_api/ct_variable.rs":"703bd62fb693a437e319d1192988bd674f9127a6b76f73b4c58c71afc79bc013","src/core_api/rt_variable.rs":"b57f89bf3991a313e2ddde09c701375e23539e7df74d685a161707ba1fbc99e4","src/core_api/wrapper.rs":"033777bed7d140b158e15d50fda8a6e06557ce89bd0738fcca692be2c39e8b8a","src/core_api/xof_reader.rs":"f33ca7b2c17eb99d84ea460d5567af68690e4fa6c2d94069a5d6748f8c8620eb","src/dev.rs":"cbaeab07489efcadec917d7b7bcf2fdade79e78a4839ab3c3d8ad442f8f82833","src/dev/fixed.rs":"1cbabc651645c1e781d31825791132b4e3741f426e99d7e40988e2a5ee49bddd","src/dev/mac.rs":"e8837d3b99dc8b6ddb398e7fad5731c2ed36931f851ed625d3ae59fb31244165","src/dev/rng.rs":"156f42e9eb8fb2083cd12dc4a9bff9d57a321d33367efe6cd42cdc02c17ed2dc","src/dev/variable.rs":"51939602b43f5a813fc725bc603a34246bbf76facaa7930cb7bf78c283ec94a7","src/dev/xof.rs":"b3971175e50f615247e4158cba87d77c369461eda22751d888725cec45b61985","src/digest.rs":"8beab74640774c9f6811daa6dac9b5a8867f5beeb0b552a9b5ddbc5cfc196ed0","src/lib.rs":"5128199102bf0f7638fba0bbcf42b23822e31065841fb0c4304b64f681fde961","src/mac.rs":"6303caa2c5b76513346c082dd600e007354179ad440fc83dad3d7f4240281803"},"package":"8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f"}
|
||||
{"files":{"CHANGELOG.md":"e89e1b904ddcb022d2413bf6eb0cc14418c0177f669c4da5520c2566ebb3800c","Cargo.toml":"f0cc2fc18c6e7f3533658b9e59d71b55e641e7d8226e5d1b915a5e7c0a1d3f36","LICENSE-APACHE":"a9040321c3712d8fd0b09cf52b17445de04a23a10165049ae187cd39e5c86be5","LICENSE-MIT":"9e0dfd2dd4173a530e238cb6adb37aa78c34c6bc7444e0e10c1ab5d8881f63ba","README.md":"edf9f16c57466b06d201b8646182b7332324c7aba28f832dde7f57d03249637d","src/core_api.rs":"b52728aba8a84f980f3f9cc8a94a64d3a97f1eb5f4db144904822c2f8eefb1f8","src/core_api/ct_variable.rs":"703bd62fb693a437e319d1192988bd674f9127a6b76f73b4c58c71afc79bc013","src/core_api/rt_variable.rs":"b57f89bf3991a313e2ddde09c701375e23539e7df74d685a161707ba1fbc99e4","src/core_api/wrapper.rs":"033777bed7d140b158e15d50fda8a6e06557ce89bd0738fcca692be2c39e8b8a","src/core_api/xof_reader.rs":"f33ca7b2c17eb99d84ea460d5567af68690e4fa6c2d94069a5d6748f8c8620eb","src/dev.rs":"cbaeab07489efcadec917d7b7bcf2fdade79e78a4839ab3c3d8ad442f8f82833","src/dev/fixed.rs":"1cbabc651645c1e781d31825791132b4e3741f426e99d7e40988e2a5ee49bddd","src/dev/mac.rs":"e8837d3b99dc8b6ddb398e7fad5731c2ed36931f851ed625d3ae59fb31244165","src/dev/rng.rs":"156f42e9eb8fb2083cd12dc4a9bff9d57a321d33367efe6cd42cdc02c17ed2dc","src/dev/variable.rs":"51939602b43f5a813fc725bc603a34246bbf76facaa7930cb7bf78c283ec94a7","src/dev/xof.rs":"b3971175e50f615247e4158cba87d77c369461eda22751d888725cec45b61985","src/digest.rs":"8beab74640774c9f6811daa6dac9b5a8867f5beeb0b552a9b5ddbc5cfc196ed0","src/lib.rs":"5128199102bf0f7638fba0bbcf42b23822e31065841fb0c4304b64f681fde961","src/mac.rs":"6303caa2c5b76513346c082dd600e007354179ad440fc83dad3d7f4240281803"},"package":"9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"}
|
|
@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
|
|||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## 0.10.7 (2023-05-19)
|
||||
### Changed
|
||||
- Loosen `subtle` version requirement ([#1260])
|
||||
|
||||
[#1260]: https://github.com/RustCrypto/traits/pull/1260
|
||||
|
||||
## 0.10.6 (2022-11-17)
|
||||
### Added
|
||||
- `Mac::verify_reset` and `Mac::verify_slice_reset` methods ([#1154])
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
[package]
|
||||
edition = "2018"
|
||||
name = "digest"
|
||||
version = "0.10.6"
|
||||
version = "0.10.7"
|
||||
authors = ["RustCrypto Developers"]
|
||||
description = "Traits for cryptographic hash functions and message authentication codes"
|
||||
documentation = "https://docs.rs/digest"
|
||||
|
@ -52,7 +52,7 @@ optional = true
|
|||
version = "0.1.3"
|
||||
|
||||
[dependencies.subtle]
|
||||
version = "=2.4"
|
||||
version = "2.4"
|
||||
optional = true
|
||||
default-features = false
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"CHANGELOG.md":"604cc546b683e035e1f759479e41401c9035e7da6e07808f2cbd7702a07a5d26","Cargo.toml":"a6172879ad5aa1b7a6593e57db16c3d99c00563076239bf094b0d0f5ed6b30f8","LICENSE-APACHE":"a9040321c3712d8fd0b09cf52b17445de04a23a10165049ae187cd39e5c86be5","LICENSE-MIT":"b4eb00df6e2a4d22518fcaa6a2b4646f249b3a3c9814509b22bd2091f1392ff1","README.md":"b7af562922e4a631657acf264772d2af2b72a08d9bbc5fbcf56d9324f9027708","benches/mod.rs":"c32d9f91a541821ea988c14eee710963e623ef1edf69b02b41a29bc44e04ba95","src/consts.rs":"2f820349fa7cbf9fecc1d4aabbd1a721bb1badc3f32ef9e903826960b6f42523","src/core_api.rs":"73b160d98bfa6737688875ad73da5e3c2c93582604dc313d208200e12fdab676","src/lib.rs":"a286546dab99a51bdb3a5dc4edbd08fb9a57028cb422151f3a97441d113d7425","src/sha256.rs":"cfc2b62a412112e471781a770793f0ba0466594b2e37001334562f3d95f340ce","src/sha256/aarch64.rs":"02dbac483409a853126fec642f964a464e4372f53da2fa4120b29bed204f72b7","src/sha256/soft.rs":"98e765a8e8dfa0af31f2b76570f212e6b3099522bf300e1554cbbd9fd5d02960","src/sha256/x86.rs":"70f1597f2029522b35bfd026df0a8908f086523ab2a80ba3ef35e6231b56353c","src/sha512.rs":"92c4210a627b78505a195722b2f24bac5e6cfdece6292bf184ba8d42e7e2c35f","src/sha512/soft.rs":"0183ad89418b886859d2afa9bf061bc92759ae337c1d26147b4300042e63ef42","src/sha512/x86.rs":"c7dd8bdf3212e1e8c4cc9cc6b380dc0468f79dcfd0f61a445d0d38cead45a03a","tests/data/sha224.blb":"59b185972521af418fd49a079de3d5f5bed74cd76d80473da51cab3faee6c7d0","tests/data/sha256.blb":"bb096934bb7e43e41ce143d211397afca6fcdfe243a39811688ea31aae6f800a","tests/data/sha384.blb":"e8fe66c07ba336fae2c0aa4c87cb768f41bd4ed318ee1a36fbde0a68581946ec","tests/data/sha512.blb":"1cc0e86571f2f4e3bc81438ce7b6c25c118d2d7437355240113f59cbb782c8d6","tests/data/sha512_224.blb":"b02dd46741db1034112e0888d0cdb233a21b9a82c319456f806bbaae49acf440","tests/data/sha512_256.blb":"95195b758e362d92ff0cebebac4cca696512ea5811b635243bc70e29164e5786","tests/mod.rs":"61be596fd9b45a8db345950ff2ed6f87eaf4d239ac156885f36e819da0597644"},"package":"82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0"}
|
||||
{"files":{"CHANGELOG.md":"b7b0a14409ac2880f86fe50d9584acc81f2346ebcb4e46a9e2235b54ac5b02ef","Cargo.toml":"5fdf94b86fc47d105d2f2cc55c6346d15e7f3d2d7ea92031b1ce2d24276e7778","LICENSE-APACHE":"a9040321c3712d8fd0b09cf52b17445de04a23a10165049ae187cd39e5c86be5","LICENSE-MIT":"b4eb00df6e2a4d22518fcaa6a2b4646f249b3a3c9814509b22bd2091f1392ff1","README.md":"b7af562922e4a631657acf264772d2af2b72a08d9bbc5fbcf56d9324f9027708","benches/mod.rs":"c32d9f91a541821ea988c14eee710963e623ef1edf69b02b41a29bc44e04ba95","src/consts.rs":"2f820349fa7cbf9fecc1d4aabbd1a721bb1badc3f32ef9e903826960b6f42523","src/core_api.rs":"73b160d98bfa6737688875ad73da5e3c2c93582604dc313d208200e12fdab676","src/lib.rs":"9d0ec0ba86a801bd9b2024f0b84ee322a26c7376a623dd61210e0eb9d6355aa1","src/sha256.rs":"78e84eea5d517554aa5a10860bf2ce5013ca26d529e78643cd59062546e0746f","src/sha256/aarch64.rs":"18121a25867a575fec8ef64da763693ece4e3e3e84da095254b8471234c6f1f8","src/sha256/loongarch64_asm.rs":"79e2d5e3c039581e2319f8789de9ed13a8dd819ebffd13532dbd83448c7ad662","src/sha256/soft.rs":"98e765a8e8dfa0af31f2b76570f212e6b3099522bf300e1554cbbd9fd5d02960","src/sha256/x86.rs":"70f1597f2029522b35bfd026df0a8908f086523ab2a80ba3ef35e6231b56353c","src/sha512.rs":"1b19c23c63e9cfca8b42fd9e108a8570dd03e22a37d4d6f499f2fa5e566cb2de","src/sha512/aarch64.rs":"2ed929329a0fa66180e4726d028713a49f99cc223e635078fc1f3252a44981e0","src/sha512/loongarch64_asm.rs":"58a7b54d95a0e037ba80570d96ffe0dd7c0014c7fcb45b90725e522cc4992d8a","src/sha512/soft.rs":"0183ad89418b886859d2afa9bf061bc92759ae337c1d26147b4300042e63ef42","src/sha512/x86.rs":"c7dd8bdf3212e1e8c4cc9cc6b380dc0468f79dcfd0f61a445d0d38cead45a03a","tests/data/sha224.blb":"59b185972521af418fd49a079de3d5f5bed74cd76d80473da51cab3faee6c7d0","tests/data/sha256.blb":"bb096934bb7e43e41ce143d211397afca6fcdfe243a39811688ea31aae6f800a","tests/data/sha384.blb":"e8fe66c07ba336fae2c0aa4c87cb768f41bd4ed318ee1a36fbde0a68581946ec","tests/data/sha512.blb":"1cc0e86571f2f4e3bc81438ce7b6c25c118d2d7437355240113f59cbb782c8d6","tests/data/sha512_224.blb":"b02dd46741db1034112e0888d0cdb233a21b9a82c319456f806bbaae49acf440","tests/data/sha512_256.blb":"95195b758e362d92ff0cebebac4cca696512ea5811b635243bc70e29164e5786","tests/mod.rs":"61be596fd9b45a8db345950ff2ed6f87eaf4d239ac156885f36e819da0597644"},"package":"793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"}
|
|
@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file.
|
|||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## 0.10.8 (2023-09-26)
|
||||
### Added
|
||||
- `asm!`-based backend for LoongArch64 targets gated behind `loongarch64_asm` feature [#507]
|
||||
|
||||
[#507]: https://github.com/RustCrypto/hashes/pull/507
|
||||
|
||||
## 0.10.7 (2023-06-15)
|
||||
### Added
|
||||
- AArch64 Neon-based backend ([#490])
|
||||
|
||||
[#490]: https://github.com/RustCrypto/hashes/pull/490
|
||||
|
||||
## 0.10.6 (2022-09-16)
|
||||
### Added
|
||||
- Feature-gated OID support ([#405])
|
||||
|
|
|
@ -3,35 +3,51 @@
|
|||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
# to registry (e.g., crates.io) dependencies.
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
# If you are reading this file be aware that the original Cargo.toml
|
||||
# will likely look very different (and much more reasonable).
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
name = "sha2"
|
||||
version = "0.10.6"
|
||||
version = "0.10.8"
|
||||
authors = ["RustCrypto Developers"]
|
||||
description = "Pure Rust implementation of the SHA-2 hash function family\nincluding SHA-224, SHA-256, SHA-384, and SHA-512.\n"
|
||||
description = """
|
||||
Pure Rust implementation of the SHA-2 hash function family
|
||||
including SHA-224, SHA-256, SHA-384, and SHA-512.
|
||||
"""
|
||||
documentation = "https://docs.rs/sha2"
|
||||
readme = "README.md"
|
||||
keywords = ["crypto", "sha2", "hash", "digest"]
|
||||
categories = ["cryptography", "no-std"]
|
||||
keywords = [
|
||||
"crypto",
|
||||
"sha2",
|
||||
"hash",
|
||||
"digest",
|
||||
]
|
||||
categories = [
|
||||
"cryptography",
|
||||
"no-std",
|
||||
]
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/RustCrypto/hashes"
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
all-features = true
|
||||
rustdoc-args = ["--cfg", "docsrs"]
|
||||
rustdoc-args = [
|
||||
"--cfg",
|
||||
"docsrs",
|
||||
]
|
||||
|
||||
[dependencies.cfg-if]
|
||||
version = "1.0"
|
||||
|
||||
[dependencies.digest]
|
||||
version = "0.10.4"
|
||||
version = "0.10.7"
|
||||
|
||||
[dev-dependencies.digest]
|
||||
version = "0.10.4"
|
||||
version = "0.10.7"
|
||||
features = ["dev"]
|
||||
|
||||
[dev-dependencies.hex-literal]
|
||||
|
@ -43,8 +59,10 @@ asm-aarch64 = ["asm"]
|
|||
compress = []
|
||||
default = ["std"]
|
||||
force-soft = []
|
||||
loongarch64_asm = []
|
||||
oid = ["digest/oid"]
|
||||
std = ["digest/std"]
|
||||
|
||||
[target."cfg(any(target_arch = \"aarch64\", target_arch = \"x86_64\", target_arch = \"x86\"))".dependencies.cpufeatures]
|
||||
version = "0.2"
|
||||
|
||||
|
|
|
@ -6,7 +6,8 @@
|
|||
//! Algorithmically, there are only 2 core algorithms: SHA-256 and SHA-512.
|
||||
//! All other algorithms are just applications of these with different initial
|
||||
//! hash values, and truncated to different digest bit lengths. The first two
|
||||
//! algorithms in the list are based on SHA-256, while the last three on SHA-512.
|
||||
//! algorithms in the list are based on SHA-256, while the last four are based
|
||||
//! on SHA-512.
|
||||
//!
|
||||
//! # Usage
|
||||
//!
|
||||
|
|
|
@ -17,6 +17,9 @@ cfg_if::cfg_if! {
|
|||
mod soft;
|
||||
mod aarch64;
|
||||
use aarch64::compress;
|
||||
} else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] {
|
||||
mod loongarch64_asm;
|
||||
use loongarch64_asm::compress;
|
||||
} else {
|
||||
mod soft;
|
||||
use soft::compress;
|
||||
|
|
|
@ -1,15 +1,159 @@
|
|||
//! SHA-256 `aarch64` backend.
|
||||
|
||||
// Implementation adapted from mbedtls.
|
||||
|
||||
// TODO: stdarch intrinsics: RustCrypto/hashes#257
|
||||
|
||||
use core::arch::{aarch64::*, asm};
|
||||
|
||||
use crate::consts::K32;
|
||||
|
||||
cpufeatures::new!(sha2_hwcap, "sha2");
|
||||
|
||||
pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
|
||||
// TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
|
||||
// after stabilization
|
||||
if sha2_hwcap::get() {
|
||||
sha2_asm::compress256(state, blocks);
|
||||
unsafe { sha256_compress(state, blocks) }
|
||||
} else {
|
||||
super::soft::compress(state, blocks);
|
||||
}
|
||||
}
|
||||
|
||||
#[target_feature(enable = "sha2")]
|
||||
unsafe fn sha256_compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
|
||||
// SAFETY: Requires the sha2 feature.
|
||||
|
||||
// Load state into vectors.
|
||||
let mut abcd = vld1q_u32(state[0..4].as_ptr());
|
||||
let mut efgh = vld1q_u32(state[4..8].as_ptr());
|
||||
|
||||
// Iterate through the message blocks.
|
||||
for block in blocks {
|
||||
// Keep original state values.
|
||||
let abcd_orig = abcd;
|
||||
let efgh_orig = efgh;
|
||||
|
||||
// Load the message block into vectors, assuming little endianness.
|
||||
let mut s0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[0..16].as_ptr())));
|
||||
let mut s1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[16..32].as_ptr())));
|
||||
let mut s2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[32..48].as_ptr())));
|
||||
let mut s3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[48..64].as_ptr())));
|
||||
|
||||
// Rounds 0 to 3
|
||||
let mut tmp = vaddq_u32(s0, vld1q_u32(&K32[0]));
|
||||
let mut abcd_prev = abcd;
|
||||
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
|
||||
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
|
||||
|
||||
// Rounds 4 to 7
|
||||
tmp = vaddq_u32(s1, vld1q_u32(&K32[4]));
|
||||
abcd_prev = abcd;
|
||||
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
|
||||
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
|
||||
|
||||
// Rounds 8 to 11
|
||||
tmp = vaddq_u32(s2, vld1q_u32(&K32[8]));
|
||||
abcd_prev = abcd;
|
||||
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
|
||||
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
|
||||
|
||||
// Rounds 12 to 15
|
||||
tmp = vaddq_u32(s3, vld1q_u32(&K32[12]));
|
||||
abcd_prev = abcd;
|
||||
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
|
||||
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
|
||||
|
||||
for t in (16..64).step_by(16) {
|
||||
// Rounds t to t + 3
|
||||
s0 = vsha256su1q_u32(vsha256su0q_u32(s0, s1), s2, s3);
|
||||
tmp = vaddq_u32(s0, vld1q_u32(&K32[t]));
|
||||
abcd_prev = abcd;
|
||||
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
|
||||
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
|
||||
|
||||
// Rounds t + 4 to t + 7
|
||||
s1 = vsha256su1q_u32(vsha256su0q_u32(s1, s2), s3, s0);
|
||||
tmp = vaddq_u32(s1, vld1q_u32(&K32[t + 4]));
|
||||
abcd_prev = abcd;
|
||||
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
|
||||
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
|
||||
|
||||
// Rounds t + 8 to t + 11
|
||||
s2 = vsha256su1q_u32(vsha256su0q_u32(s2, s3), s0, s1);
|
||||
tmp = vaddq_u32(s2, vld1q_u32(&K32[t + 8]));
|
||||
abcd_prev = abcd;
|
||||
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
|
||||
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
|
||||
|
||||
// Rounds t + 12 to t + 15
|
||||
s3 = vsha256su1q_u32(vsha256su0q_u32(s3, s0), s1, s2);
|
||||
tmp = vaddq_u32(s3, vld1q_u32(&K32[t + 12]));
|
||||
abcd_prev = abcd;
|
||||
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
|
||||
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
|
||||
}
|
||||
|
||||
// Add the block-specific state to the original state.
|
||||
abcd = vaddq_u32(abcd, abcd_orig);
|
||||
efgh = vaddq_u32(efgh, efgh_orig);
|
||||
}
|
||||
|
||||
// Store vectors into state.
|
||||
vst1q_u32(state[0..4].as_mut_ptr(), abcd);
|
||||
vst1q_u32(state[4..8].as_mut_ptr(), efgh);
|
||||
}
|
||||
|
||||
// TODO remove these polyfills once SHA2 intrinsics land
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn vsha256hq_u32(
|
||||
mut hash_efgh: uint32x4_t,
|
||||
hash_abcd: uint32x4_t,
|
||||
wk: uint32x4_t,
|
||||
) -> uint32x4_t {
|
||||
asm!(
|
||||
"SHA256H {:q}, {:q}, {:v}.4S",
|
||||
inout(vreg) hash_efgh, in(vreg) hash_abcd, in(vreg) wk,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
hash_efgh
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn vsha256h2q_u32(
|
||||
mut hash_efgh: uint32x4_t,
|
||||
hash_abcd: uint32x4_t,
|
||||
wk: uint32x4_t,
|
||||
) -> uint32x4_t {
|
||||
asm!(
|
||||
"SHA256H2 {:q}, {:q}, {:v}.4S",
|
||||
inout(vreg) hash_efgh, in(vreg) hash_abcd, in(vreg) wk,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
hash_efgh
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn vsha256su0q_u32(mut w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t {
|
||||
asm!(
|
||||
"SHA256SU0 {:v}.4S, {:v}.4S",
|
||||
inout(vreg) w0_3, in(vreg) w4_7,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
w0_3
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn vsha256su1q_u32(
|
||||
mut tw0_3: uint32x4_t,
|
||||
w8_11: uint32x4_t,
|
||||
w12_15: uint32x4_t,
|
||||
) -> uint32x4_t {
|
||||
asm!(
|
||||
"SHA256SU1 {:v}.4S, {:v}.4S, {:v}.4S",
|
||||
inout(vreg) tw0_3, in(vreg) w8_11, in(vreg) w12_15,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
tw0_3
|
||||
}
|
||||
|
|
|
@ -0,0 +1,227 @@
|
|||
//! LoongArch64 assembly backend
|
||||
|
||||
macro_rules! c {
|
||||
($($l:expr)*) => {
|
||||
concat!($($l ,)*)
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! rounda {
|
||||
($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
|
||||
c!(
|
||||
"ld.w $a5, $a1, (" $i " * 4);"
|
||||
"revb.2h $a5, $a5;"
|
||||
"rotri.w $a5, $a5, 16;"
|
||||
roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! roundb {
|
||||
($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
|
||||
c!(
|
||||
"ld.w $a4, $sp, (((" $i " - 15) & 0xF) * 4);"
|
||||
"ld.w $a5, $sp, (((" $i " - 16) & 0xF) * 4);"
|
||||
"ld.w $a6, $sp, (((" $i " - 7) & 0xF) * 4);"
|
||||
"add.w $a5, $a5, $a6;"
|
||||
"rotri.w $a6, $a4, 18;"
|
||||
"srli.w $a7, $a4, 3;"
|
||||
"rotri.w $a4, $a4, 7;"
|
||||
"xor $a6, $a6, $a7;"
|
||||
"xor $a4, $a4, $a6;"
|
||||
"add.w $a5, $a5, $a4;"
|
||||
"ld.w $a4, $sp, (((" $i " - 2) & 0xF) * 4);"
|
||||
"rotri.w $a6, $a4, 19;"
|
||||
"srli.w $a7, $a4, 10;"
|
||||
"rotri.w $a4, $a4, 17;"
|
||||
"xor $a6, $a6, $a7;"
|
||||
"xor $a4, $a4, $a6;"
|
||||
"add.w $a5, $a5, $a4;"
|
||||
roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! roundtail {
|
||||
($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
|
||||
c!(
|
||||
// Part 0
|
||||
"rotri.w $a6, " $e ", 11;"
|
||||
"rotri.w $a7, " $e ", 25;"
|
||||
"rotri.w $a4, " $e ", 6;"
|
||||
"xor $a6, $a6, $a7;"
|
||||
"xor $a4, $a4, $a6;"
|
||||
"xor $a6, " $g ", " $f ";"
|
||||
"ld.w $a7, $a3, " $i " * 4;"
|
||||
"and $a6, $a6, " $e ";"
|
||||
"xor $a6, $a6, " $g ";"
|
||||
"add.w $a4, $a4, $a6;"
|
||||
"add.w $a4, $a4, $a7;"
|
||||
"add.w " $h ", " $h ", $a5;"
|
||||
"add.w " $h ", " $h ", $a4;"
|
||||
// Part 1
|
||||
"add.w " $d ", " $d ", " $h ";"
|
||||
// Part 2
|
||||
"rotri.w $a6, " $a ", 13;"
|
||||
"rotri.w $a7, " $a ", 22;"
|
||||
"rotri.w $a4, " $a ", 2;"
|
||||
"xor $a6, $a6, $a7;"
|
||||
"xor $a4, $a4, $a6;"
|
||||
"add.w " $h ", " $h ", $a4;"
|
||||
"or $a4, " $c ", " $b ";"
|
||||
"and $a6, " $c ", " $b ";"
|
||||
"and $a4, $a4, " $a ";"
|
||||
"or $a4, $a4, $a6;"
|
||||
"add.w " $h ", " $h ", $a4;"
|
||||
"st.w $a5, $sp, ((" $i " & 0xF) * 4);"
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
|
||||
if blocks.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
core::arch::asm!(
|
||||
// Allocate scratch stack space
|
||||
"addi.d $sp, $sp, -64;",
|
||||
|
||||
// Load state
|
||||
"ld.w $t0, $a0, 0",
|
||||
"ld.w $t1, $a0, 4",
|
||||
"ld.w $t2, $a0, 8",
|
||||
"ld.w $t3, $a0, 12",
|
||||
"ld.w $t4, $a0, 16",
|
||||
"ld.w $t5, $a0, 20",
|
||||
"ld.w $t6, $a0, 24",
|
||||
"ld.w $t7, $a0, 28",
|
||||
|
||||
"42:",
|
||||
|
||||
// Do 64 rounds of hashing
|
||||
rounda!( 0, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
rounda!( 1, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
rounda!( 2, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
rounda!( 3, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
rounda!( 4, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
rounda!( 5, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
rounda!( 6, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
rounda!( 7, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
rounda!( 8, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
rounda!( 9, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
rounda!(10, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
rounda!(11, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
rounda!(12, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
rounda!(13, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
rounda!(14, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
rounda!(15, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
roundb!(16, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
roundb!(17, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
roundb!(18, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
roundb!(19, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
roundb!(20, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
roundb!(21, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
roundb!(22, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
roundb!(23, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
roundb!(24, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
roundb!(25, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
roundb!(26, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
roundb!(27, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
roundb!(28, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
roundb!(29, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
roundb!(30, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
roundb!(31, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
roundb!(32, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
roundb!(33, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
roundb!(34, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
roundb!(35, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
roundb!(36, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
roundb!(37, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
roundb!(38, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
roundb!(39, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
roundb!(40, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
roundb!(41, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
roundb!(42, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
roundb!(43, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
roundb!(44, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
roundb!(45, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
roundb!(46, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
roundb!(47, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
roundb!(48, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
roundb!(49, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
roundb!(50, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
roundb!(51, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
roundb!(52, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
roundb!(53, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
roundb!(54, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
roundb!(55, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
roundb!(56, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
roundb!(57, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
roundb!(58, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
roundb!(59, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
roundb!(60, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
roundb!(61, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
roundb!(62, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
roundb!(63, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
|
||||
// Update state registers
|
||||
"ld.w $a4, $a0, 0", // a
|
||||
"ld.w $a5, $a0, 4", // b
|
||||
"ld.w $a6, $a0, 8", // c
|
||||
"ld.w $a7, $a0, 12", // d
|
||||
"add.w $t0, $t0, $a4",
|
||||
"add.w $t1, $t1, $a5",
|
||||
"add.w $t2, $t2, $a6",
|
||||
"add.w $t3, $t3, $a7",
|
||||
"ld.w $a4, $a0, 16", // e
|
||||
"ld.w $a5, $a0, 20", // f
|
||||
"ld.w $a6, $a0, 24", // g
|
||||
"ld.w $a7, $a0, 28", // h
|
||||
"add.w $t4, $t4, $a4",
|
||||
"add.w $t5, $t5, $a5",
|
||||
"add.w $t6, $t6, $a6",
|
||||
"add.w $t7, $t7, $a7",
|
||||
|
||||
// Save updated state
|
||||
"st.w $t0, $a0, 0",
|
||||
"st.w $t1, $a0, 4",
|
||||
"st.w $t2, $a0, 8",
|
||||
"st.w $t3, $a0, 12",
|
||||
"st.w $t4, $a0, 16",
|
||||
"st.w $t5, $a0, 20",
|
||||
"st.w $t6, $a0, 24",
|
||||
"st.w $t7, $a0, 28",
|
||||
|
||||
// Looping over blocks
|
||||
"addi.d $a1, $a1, 64",
|
||||
"addi.d $a2, $a2, -1",
|
||||
"bnez $a2, 42b",
|
||||
|
||||
// Restore stack register
|
||||
"addi.d $sp, $sp, 64",
|
||||
|
||||
in("$a0") state,
|
||||
inout("$a1") blocks.as_ptr() => _,
|
||||
inout("$a2") blocks.len() => _,
|
||||
in("$a3") crate::consts::K32.as_ptr(),
|
||||
|
||||
// Clobbers
|
||||
out("$a4") _,
|
||||
out("$a5") _,
|
||||
out("$a6") _,
|
||||
out("$a7") _,
|
||||
out("$t0") _,
|
||||
out("$t1") _,
|
||||
out("$t2") _,
|
||||
out("$t3") _,
|
||||
out("$t4") _,
|
||||
out("$t5") _,
|
||||
out("$t6") _,
|
||||
out("$t7") _,
|
||||
|
||||
options(preserves_flags),
|
||||
);
|
||||
}
|
||||
}
|
|
@ -15,6 +15,13 @@ cfg_if::cfg_if! {
|
|||
}
|
||||
mod x86;
|
||||
use x86::compress;
|
||||
} else if #[cfg(all(feature = "asm", target_arch = "aarch64"))] {
|
||||
mod soft;
|
||||
mod aarch64;
|
||||
use aarch64::compress;
|
||||
} else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] {
|
||||
mod loongarch64_asm;
|
||||
use loongarch64_asm::compress;
|
||||
} else {
|
||||
mod soft;
|
||||
use soft::compress;
|
||||
|
|
|
@ -0,0 +1,235 @@
|
|||
// Implementation adapted from mbedtls.
|
||||
|
||||
use core::arch::{aarch64::*, asm};
|
||||
|
||||
use crate::consts::K64;
|
||||
|
||||
cpufeatures::new!(sha3_hwcap, "sha3");
|
||||
|
||||
pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
|
||||
// TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
|
||||
// after stabilization
|
||||
if sha3_hwcap::get() {
|
||||
unsafe { sha512_compress(state, blocks) }
|
||||
} else {
|
||||
super::soft::compress(state, blocks);
|
||||
}
|
||||
}
|
||||
|
||||
#[target_feature(enable = "sha3")]
|
||||
unsafe fn sha512_compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
|
||||
// SAFETY: Requires the sha3 feature.
|
||||
|
||||
// Load state into vectors.
|
||||
let mut ab = vld1q_u64(state[0..2].as_ptr());
|
||||
let mut cd = vld1q_u64(state[2..4].as_ptr());
|
||||
let mut ef = vld1q_u64(state[4..6].as_ptr());
|
||||
let mut gh = vld1q_u64(state[6..8].as_ptr());
|
||||
|
||||
// Iterate through the message blocks.
|
||||
for block in blocks {
|
||||
// Keep original state values.
|
||||
let ab_orig = ab;
|
||||
let cd_orig = cd;
|
||||
let ef_orig = ef;
|
||||
let gh_orig = gh;
|
||||
|
||||
// Load the message block into vectors, assuming little endianness.
|
||||
let mut s0 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[0..16].as_ptr())));
|
||||
let mut s1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[16..32].as_ptr())));
|
||||
let mut s2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[32..48].as_ptr())));
|
||||
let mut s3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[48..64].as_ptr())));
|
||||
let mut s4 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[64..80].as_ptr())));
|
||||
let mut s5 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[80..96].as_ptr())));
|
||||
let mut s6 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[96..112].as_ptr())));
|
||||
let mut s7 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[112..128].as_ptr())));
|
||||
|
||||
// Rounds 0 and 1
|
||||
let mut initial_sum = vaddq_u64(s0, vld1q_u64(&K64[0]));
|
||||
let mut sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh);
|
||||
let mut intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1));
|
||||
gh = vsha512h2q_u64(intermed, cd, ab);
|
||||
cd = vaddq_u64(cd, intermed);
|
||||
|
||||
// Rounds 2 and 3
|
||||
initial_sum = vaddq_u64(s1, vld1q_u64(&K64[2]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1));
|
||||
ef = vsha512h2q_u64(intermed, ab, gh);
|
||||
ab = vaddq_u64(ab, intermed);
|
||||
|
||||
// Rounds 4 and 5
|
||||
initial_sum = vaddq_u64(s2, vld1q_u64(&K64[4]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1));
|
||||
cd = vsha512h2q_u64(intermed, gh, ef);
|
||||
gh = vaddq_u64(gh, intermed);
|
||||
|
||||
// Rounds 6 and 7
|
||||
initial_sum = vaddq_u64(s3, vld1q_u64(&K64[6]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1));
|
||||
ab = vsha512h2q_u64(intermed, ef, cd);
|
||||
ef = vaddq_u64(ef, intermed);
|
||||
|
||||
// Rounds 8 and 9
|
||||
initial_sum = vaddq_u64(s4, vld1q_u64(&K64[8]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1));
|
||||
gh = vsha512h2q_u64(intermed, cd, ab);
|
||||
cd = vaddq_u64(cd, intermed);
|
||||
|
||||
// Rounds 10 and 11
|
||||
initial_sum = vaddq_u64(s5, vld1q_u64(&K64[10]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1));
|
||||
ef = vsha512h2q_u64(intermed, ab, gh);
|
||||
ab = vaddq_u64(ab, intermed);
|
||||
|
||||
// Rounds 12 and 13
|
||||
initial_sum = vaddq_u64(s6, vld1q_u64(&K64[12]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1));
|
||||
cd = vsha512h2q_u64(intermed, gh, ef);
|
||||
gh = vaddq_u64(gh, intermed);
|
||||
|
||||
// Rounds 14 and 15
|
||||
initial_sum = vaddq_u64(s7, vld1q_u64(&K64[14]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1));
|
||||
ab = vsha512h2q_u64(intermed, ef, cd);
|
||||
ef = vaddq_u64(ef, intermed);
|
||||
|
||||
for t in (16..80).step_by(16) {
|
||||
// Rounds t and t + 1
|
||||
s0 = vsha512su1q_u64(vsha512su0q_u64(s0, s1), s7, vextq_u64(s4, s5, 1));
|
||||
initial_sum = vaddq_u64(s0, vld1q_u64(&K64[t]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1));
|
||||
gh = vsha512h2q_u64(intermed, cd, ab);
|
||||
cd = vaddq_u64(cd, intermed);
|
||||
|
||||
// Rounds t + 2 and t + 3
|
||||
s1 = vsha512su1q_u64(vsha512su0q_u64(s1, s2), s0, vextq_u64(s5, s6, 1));
|
||||
initial_sum = vaddq_u64(s1, vld1q_u64(&K64[t + 2]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1));
|
||||
ef = vsha512h2q_u64(intermed, ab, gh);
|
||||
ab = vaddq_u64(ab, intermed);
|
||||
|
||||
// Rounds t + 4 and t + 5
|
||||
s2 = vsha512su1q_u64(vsha512su0q_u64(s2, s3), s1, vextq_u64(s6, s7, 1));
|
||||
initial_sum = vaddq_u64(s2, vld1q_u64(&K64[t + 4]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1));
|
||||
cd = vsha512h2q_u64(intermed, gh, ef);
|
||||
gh = vaddq_u64(gh, intermed);
|
||||
|
||||
// Rounds t + 6 and t + 7
|
||||
s3 = vsha512su1q_u64(vsha512su0q_u64(s3, s4), s2, vextq_u64(s7, s0, 1));
|
||||
initial_sum = vaddq_u64(s3, vld1q_u64(&K64[t + 6]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1));
|
||||
ab = vsha512h2q_u64(intermed, ef, cd);
|
||||
ef = vaddq_u64(ef, intermed);
|
||||
|
||||
// Rounds t + 8 and t + 9
|
||||
s4 = vsha512su1q_u64(vsha512su0q_u64(s4, s5), s3, vextq_u64(s0, s1, 1));
|
||||
initial_sum = vaddq_u64(s4, vld1q_u64(&K64[t + 8]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1));
|
||||
gh = vsha512h2q_u64(intermed, cd, ab);
|
||||
cd = vaddq_u64(cd, intermed);
|
||||
|
||||
// Rounds t + 10 and t + 11
|
||||
s5 = vsha512su1q_u64(vsha512su0q_u64(s5, s6), s4, vextq_u64(s1, s2, 1));
|
||||
initial_sum = vaddq_u64(s5, vld1q_u64(&K64[t + 10]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1));
|
||||
ef = vsha512h2q_u64(intermed, ab, gh);
|
||||
ab = vaddq_u64(ab, intermed);
|
||||
|
||||
// Rounds t + 12 and t + 13
|
||||
s6 = vsha512su1q_u64(vsha512su0q_u64(s6, s7), s5, vextq_u64(s2, s3, 1));
|
||||
initial_sum = vaddq_u64(s6, vld1q_u64(&K64[t + 12]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1));
|
||||
cd = vsha512h2q_u64(intermed, gh, ef);
|
||||
gh = vaddq_u64(gh, intermed);
|
||||
|
||||
// Rounds t + 14 and t + 15
|
||||
s7 = vsha512su1q_u64(vsha512su0q_u64(s7, s0), s6, vextq_u64(s3, s4, 1));
|
||||
initial_sum = vaddq_u64(s7, vld1q_u64(&K64[t + 14]));
|
||||
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab);
|
||||
intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1));
|
||||
ab = vsha512h2q_u64(intermed, ef, cd);
|
||||
ef = vaddq_u64(ef, intermed);
|
||||
}
|
||||
|
||||
// Add the block-specific state to the original state.
|
||||
ab = vaddq_u64(ab, ab_orig);
|
||||
cd = vaddq_u64(cd, cd_orig);
|
||||
ef = vaddq_u64(ef, ef_orig);
|
||||
gh = vaddq_u64(gh, gh_orig);
|
||||
}
|
||||
|
||||
// Store vectors into state.
|
||||
vst1q_u64(state[0..2].as_mut_ptr(), ab);
|
||||
vst1q_u64(state[2..4].as_mut_ptr(), cd);
|
||||
vst1q_u64(state[4..6].as_mut_ptr(), ef);
|
||||
vst1q_u64(state[6..8].as_mut_ptr(), gh);
|
||||
}
|
||||
|
||||
// TODO remove these polyfills once SHA3 intrinsics land
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn vsha512hq_u64(
|
||||
mut hash_ed: uint64x2_t,
|
||||
hash_gf: uint64x2_t,
|
||||
kwh_kwh2: uint64x2_t,
|
||||
) -> uint64x2_t {
|
||||
asm!(
|
||||
"SHA512H {:q}, {:q}, {:v}.2D",
|
||||
inout(vreg) hash_ed, in(vreg) hash_gf, in(vreg) kwh_kwh2,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
hash_ed
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn vsha512h2q_u64(
|
||||
mut sum_ab: uint64x2_t,
|
||||
hash_c_: uint64x2_t,
|
||||
hash_ab: uint64x2_t,
|
||||
) -> uint64x2_t {
|
||||
asm!(
|
||||
"SHA512H2 {:q}, {:q}, {:v}.2D",
|
||||
inout(vreg) sum_ab, in(vreg) hash_c_, in(vreg) hash_ab,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
sum_ab
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn vsha512su0q_u64(mut w0_1: uint64x2_t, w2_: uint64x2_t) -> uint64x2_t {
|
||||
asm!(
|
||||
"SHA512SU0 {:v}.2D, {:v}.2D",
|
||||
inout(vreg) w0_1, in(vreg) w2_,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
w0_1
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn vsha512su1q_u64(
|
||||
mut s01_s02: uint64x2_t,
|
||||
w14_15: uint64x2_t,
|
||||
w9_10: uint64x2_t,
|
||||
) -> uint64x2_t {
|
||||
asm!(
|
||||
"SHA512SU1 {:v}.2D, {:v}.2D, {:v}.2D",
|
||||
inout(vreg) s01_s02, in(vreg) w14_15, in(vreg) w9_10,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
s01_s02
|
||||
}
|
|
@ -0,0 +1,242 @@
|
|||
//! LoongArch64 assembly backend
|
||||
|
||||
macro_rules! c {
|
||||
($($l:expr)*) => {
|
||||
concat!($($l ,)*)
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! rounda {
|
||||
($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
|
||||
c!(
|
||||
"ld.d $a5, $a1, (" $i " * 8);"
|
||||
"revb.d $a5, $a5;"
|
||||
roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! roundb {
|
||||
($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
|
||||
c!(
|
||||
"ld.d $a4, $sp, (((" $i " - 15) & 0xF) * 8);"
|
||||
"ld.d $a5, $sp, (((" $i " - 16) & 0xF) * 8);"
|
||||
"ld.d $a6, $sp, (((" $i " - 7) & 0xF) * 8);"
|
||||
"add.d $a5, $a5, $a6;"
|
||||
"rotri.d $a6, $a4, 8;"
|
||||
"srli.d $a7, $a4, 7;"
|
||||
"rotri.d $a4, $a4, 1;"
|
||||
"xor $a6, $a6, $a7;"
|
||||
"xor $a4, $a4, $a6;"
|
||||
"add.d $a5, $a5, $a4;"
|
||||
"ld.d $a4, $sp, (((" $i " - 2) & 0xF) * 8);"
|
||||
"rotri.d $a6, $a4, 61;"
|
||||
"srli.d $a7, $a4, 6;"
|
||||
"rotri.d $a4, $a4, 19;"
|
||||
"xor $a6, $a6, $a7;"
|
||||
"xor $a4, $a4, $a6;"
|
||||
"add.d $a5, $a5, $a4;"
|
||||
roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! roundtail {
|
||||
($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
|
||||
c!(
|
||||
// Part 0
|
||||
"rotri.d $a6, " $e ", 18;"
|
||||
"rotri.d $a7, " $e ", 41;"
|
||||
"rotri.d $a4, " $e ", 14;"
|
||||
"xor $a6, $a6, $a7;"
|
||||
"xor $a4, $a4, $a6;"
|
||||
"xor $a6, " $g ", " $f ";"
|
||||
"ld.d $a7, $a3, " $i " * 8;"
|
||||
"and $a6, $a6, " $e ";"
|
||||
"xor $a6, $a6, " $g ";"
|
||||
"add.d $a4, $a4, $a6;"
|
||||
"add.d $a4, $a4, $a7;"
|
||||
"add.d " $h ", " $h ", $a5;"
|
||||
"add.d " $h ", " $h ", $a4;"
|
||||
// Part 1
|
||||
"add.d " $d ", " $d ", " $h ";"
|
||||
// Part 2
|
||||
"rotri.d $a6, " $a ", 39;"
|
||||
"rotri.d $a7, " $a ", 34;"
|
||||
"rotri.d $a4, " $a ", 28;"
|
||||
"xor $a6, $a6, $a7;"
|
||||
"xor $a4, $a4, $a6;"
|
||||
"add.d " $h ", " $h ", $a4;"
|
||||
"or $a4, " $c ", " $b ";"
|
||||
"and $a6, " $c ", " $b ";"
|
||||
"and $a4, $a4, " $a ";"
|
||||
"or $a4, $a4, $a6;"
|
||||
"add.d " $h ", " $h ", $a4;"
|
||||
"st.d $a5, $sp, ((" $i " & 0xF) * 8);"
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
|
||||
if blocks.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
core::arch::asm!(
|
||||
// Allocate scratch stack space
|
||||
"addi.d $sp, $sp, -128;",
|
||||
|
||||
// Load state
|
||||
"ld.d $t0, $a0, 0",
|
||||
"ld.d $t1, $a0, 8",
|
||||
"ld.d $t2, $a0, 16",
|
||||
"ld.d $t3, $a0, 24",
|
||||
"ld.d $t4, $a0, 32",
|
||||
"ld.d $t5, $a0, 40",
|
||||
"ld.d $t6, $a0, 48",
|
||||
"ld.d $t7, $a0, 56",
|
||||
|
||||
"42:",
|
||||
|
||||
// Do 64 rounds of hashing
|
||||
rounda!( 0, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
rounda!( 1, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
rounda!( 2, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
rounda!( 3, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
rounda!( 4, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
rounda!( 5, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
rounda!( 6, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
rounda!( 7, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
rounda!( 8, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
rounda!( 9, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
rounda!(10, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
rounda!(11, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
rounda!(12, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
rounda!(13, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
rounda!(14, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
rounda!(15, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
roundb!(16, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
roundb!(17, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
roundb!(18, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
roundb!(19, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
roundb!(20, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
roundb!(21, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
roundb!(22, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
roundb!(23, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
roundb!(24, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
roundb!(25, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
roundb!(26, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
roundb!(27, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
roundb!(28, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
roundb!(29, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
roundb!(30, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
roundb!(31, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
roundb!(32, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
roundb!(33, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
roundb!(34, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
roundb!(35, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
roundb!(36, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
roundb!(37, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
roundb!(38, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
roundb!(39, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
roundb!(40, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
roundb!(41, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
roundb!(42, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
roundb!(43, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
roundb!(44, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
roundb!(45, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
roundb!(46, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
roundb!(47, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
roundb!(48, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
roundb!(49, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
roundb!(50, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
roundb!(51, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
roundb!(52, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
roundb!(53, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
roundb!(54, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
roundb!(55, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
roundb!(56, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
roundb!(57, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
roundb!(58, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
roundb!(59, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
roundb!(60, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
roundb!(61, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
roundb!(62, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
roundb!(63, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
roundb!(64, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
roundb!(65, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
roundb!(66, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
roundb!(67, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
roundb!(68, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
roundb!(69, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
roundb!(70, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
roundb!(71, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
roundb!(72, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
|
||||
roundb!(73, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
|
||||
roundb!(74, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
|
||||
roundb!(75, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
|
||||
roundb!(76, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
|
||||
roundb!(77, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
|
||||
roundb!(78, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
|
||||
roundb!(79, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
|
||||
|
||||
// Update state registers
|
||||
"ld.d $a4, $a0, 0", // a
|
||||
"ld.d $a5, $a0, 8", // b
|
||||
"ld.d $a6, $a0, 16", // c
|
||||
"ld.d $a7, $a0, 24", // d
|
||||
"add.d $t0, $t0, $a4",
|
||||
"add.d $t1, $t1, $a5",
|
||||
"add.d $t2, $t2, $a6",
|
||||
"add.d $t3, $t3, $a7",
|
||||
"ld.d $a4, $a0, 32", // e
|
||||
"ld.d $a5, $a0, 40", // f
|
||||
"ld.d $a6, $a0, 48", // g
|
||||
"ld.d $a7, $a0, 56", // h
|
||||
"add.d $t4, $t4, $a4",
|
||||
"add.d $t5, $t5, $a5",
|
||||
"add.d $t6, $t6, $a6",
|
||||
"add.d $t7, $t7, $a7",
|
||||
|
||||
// Save updated state
|
||||
"st.d $t0, $a0, 0",
|
||||
"st.d $t1, $a0, 8",
|
||||
"st.d $t2, $a0, 16",
|
||||
"st.d $t3, $a0, 24",
|
||||
"st.d $t4, $a0, 32",
|
||||
"st.d $t5, $a0, 40",
|
||||
"st.d $t6, $a0, 48",
|
||||
"st.d $t7, $a0, 56",
|
||||
|
||||
// Looping over blocks
|
||||
"addi.d $a1, $a1, 128",
|
||||
"addi.d $a2, $a2, -1",
|
||||
"bnez $a2, 42b",
|
||||
|
||||
// Restore stack register
|
||||
"addi.d $sp, $sp, 128",
|
||||
|
||||
in("$a0") state,
|
||||
inout("$a1") blocks.as_ptr() => _,
|
||||
inout("$a2") blocks.len() => _,
|
||||
in("$a3") crate::consts::K64.as_ptr(),
|
||||
|
||||
// Clobbers
|
||||
out("$a4") _,
|
||||
out("$a5") _,
|
||||
out("$a6") _,
|
||||
out("$a7") _,
|
||||
out("$t0") _,
|
||||
out("$t1") _,
|
||||
out("$t2") _,
|
||||
out("$t3") _,
|
||||
out("$t4") _,
|
||||
out("$t5") _,
|
||||
out("$t6") _,
|
||||
out("$t7") _,
|
||||
|
||||
options(preserves_flags),
|
||||
);
|
||||
}
|
||||
}
|
Загрузка…
Ссылка в новой задаче