зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1716518 - Upgrade ppv-lite86 to v0.2.10. r=emilio
Differential Revision: https://phabricator.services.mozilla.com/D117836
This commit is contained in:
Родитель
4d155afc6e
Коммит
478846d726
|
@ -3902,9 +3902,9 @@ checksum = "b18befed8bc2b61abc79a457295e7e838417326da1586050b919414073977f19"
|
|||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.6"
|
||||
version = "0.2.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"
|
||||
checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
|
||||
|
||||
[[package]]
|
||||
name = "precomputed-hash"
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"3cf6fa0e4089c12be1d19ac9082aabed68d3f193dbd5024379c22a8a4db15abe","LICENSE-APACHE":"0218327e7a480793ffdd4eb792379a9709e5c135c7ba267f709d6f6d4d70af0a","LICENSE-MIT":"4cada0bd02ea3692eee6f16400d86c6508bbd3bafb2b65fed0419f36d4f83e8f","src/generic.rs":"6c19b3062aec77a92130be1ce55c90aeca0811bcb19951c25a50c8fbe89a26d0","src/lib.rs":"75beb27d89dcc7541c8e81ad1f4bec81908d8d5fa0e3adec47cb1a1f008dfd32","src/soft.rs":"6fb8aa428183ec09d63d45761507d8da6dffc45990f2d1fcfd387c4c856599cc","src/types.rs":"4890069359ed53575a6b9a8168037ccdd4b029c8d61d540e9770fe3c90359345","src/x86_64/mod.rs":"e95910e8c9d23c212055598a437bfcdfaebf4f12e03a04e75f961bc3e9e257a1","src/x86_64/sse2.rs":"da72424e9e3fabd6236d4de80edb1448dc0bac02797df8e15298d412cdaef10c"},"package":"74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"}
|
||||
{"files":{"Cargo.toml":"5d9b7092f252e3a6f7f50f6aeb1b873803b322cf5edbf0ae07e0a27d57df3fbf","LICENSE-APACHE":"0218327e7a480793ffdd4eb792379a9709e5c135c7ba267f709d6f6d4d70af0a","LICENSE-MIT":"4cada0bd02ea3692eee6f16400d86c6508bbd3bafb2b65fed0419f36d4f83e8f","src/generic.rs":"6f38250421846499c816c222d0b48155bfab09a9921e6c400d7b75567ab98f14","src/lib.rs":"bcf308d7037e259d6640a785556fcdb86653cb4f72f64fbfeda9899857c14479","src/soft.rs":"5cdee0e46c99a9d5078c0b3a733fe6fd1430ed0a888ef747bc2a1271265a1140","src/types.rs":"a354d2e3267c7c451a1420903314a358328346772ca964fa6c1ef7b96c983930","src/x86_64/mod.rs":"4d5a1da816f8e59bb385464f005075de889d1060e24dcee6709b321a3d6c92f7","src/x86_64/sse2.rs":"a9df3e7b3b8ffcd249a2cbed0e538042f7747dfa6ae7af0c9af364dc5a12d409"},"package":"ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"}
|
|
@ -13,7 +13,7 @@
|
|||
[package]
|
||||
edition = "2018"
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.6"
|
||||
version = "0.2.10"
|
||||
authors = ["The CryptoCorrosion Contributors"]
|
||||
description = "Implementation of the crypto-simd API for x86"
|
||||
keywords = ["crypto", "simd", "x86"]
|
||||
|
@ -24,7 +24,8 @@ repository = "https://github.com/cryptocorrosion/cryptocorrosion"
|
|||
[dependencies]
|
||||
|
||||
[features]
|
||||
default = ["std", "simd"]
|
||||
default = ["std"]
|
||||
no_simd = []
|
||||
simd = []
|
||||
std = []
|
||||
[badges.travis-ci]
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
#![allow(non_camel_case_types)]
|
||||
|
||||
use core::ops::*;
|
||||
use crate::soft::{x2, x4};
|
||||
use crate::types::*;
|
||||
use core::ops::*;
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy)]
|
||||
pub union vec128_storage {
|
||||
d: [u32; 4],
|
||||
q: [u64; 2],
|
||||
o: [u128; 1],
|
||||
}
|
||||
impl From<[u32; 4]> for vec128_storage {
|
||||
#[inline]
|
||||
|
@ -16,7 +16,38 @@ impl From<[u32; 4]> for vec128_storage {
|
|||
Self { d }
|
||||
}
|
||||
}
|
||||
#[derive(Clone, Copy)]
|
||||
impl From<vec128_storage> for [u32; 4] {
|
||||
#[inline]
|
||||
fn from(d: vec128_storage) -> Self {
|
||||
unsafe { d.d }
|
||||
}
|
||||
}
|
||||
impl From<[u64; 2]> for vec128_storage {
|
||||
#[inline]
|
||||
fn from(q: [u64; 2]) -> Self {
|
||||
Self { q }
|
||||
}
|
||||
}
|
||||
impl From<vec128_storage> for [u64; 2] {
|
||||
#[inline]
|
||||
fn from(q: vec128_storage) -> Self {
|
||||
unsafe { q.q }
|
||||
}
|
||||
}
|
||||
impl Default for vec128_storage {
|
||||
#[inline]
|
||||
fn default() -> Self {
|
||||
Self { q: [0, 0] }
|
||||
}
|
||||
}
|
||||
impl Eq for vec128_storage {}
|
||||
impl PartialEq<vec128_storage> for vec128_storage {
|
||||
#[inline]
|
||||
fn eq(&self, rhs: &Self) -> bool {
|
||||
unsafe { self.q == rhs.q }
|
||||
}
|
||||
}
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Default)]
|
||||
pub struct vec256_storage {
|
||||
v128: [vec128_storage; 2],
|
||||
}
|
||||
|
@ -30,7 +61,15 @@ impl vec256_storage {
|
|||
self.v128
|
||||
}
|
||||
}
|
||||
#[derive(Clone, Copy)]
|
||||
impl From<vec256_storage> for [u64; 4] {
|
||||
#[inline]
|
||||
fn from(q: vec256_storage) -> Self {
|
||||
let [a, b]: [u64; 2] = q.v128[0].into();
|
||||
let [c, d]: [u64; 2] = q.v128[1].into();
|
||||
[a, b, c, d]
|
||||
}
|
||||
}
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Default)]
|
||||
pub struct vec512_storage {
|
||||
v128: [vec128_storage; 4],
|
||||
}
|
||||
|
@ -106,14 +145,22 @@ where
|
|||
unsafe { T::unpack(q) }
|
||||
}
|
||||
|
||||
fn o_of_q(q: [u64; 2]) -> u128 {
|
||||
u128::from(q[0]) | (u128::from(q[1]) << 64)
|
||||
}
|
||||
|
||||
fn q_of_o(o: u128) -> [u64; 2] {
|
||||
[o as u64, (o >> 64) as u64]
|
||||
}
|
||||
|
||||
fn omap<T, F>(a: T, f: F) -> T
|
||||
where
|
||||
T: Store<vec128_storage> + Into<vec128_storage>,
|
||||
F: Fn(u128) -> u128,
|
||||
{
|
||||
let a: vec128_storage = a.into();
|
||||
let ao = unsafe { a.o };
|
||||
let o = vec128_storage { o: [f(ao[0])] };
|
||||
let ao = o_of_q(unsafe { a.q });
|
||||
let o = vec128_storage { q: q_of_o(f(ao)) };
|
||||
unsafe { T::unpack(o) }
|
||||
}
|
||||
|
||||
|
@ -124,10 +171,10 @@ where
|
|||
{
|
||||
let a: vec128_storage = a.into();
|
||||
let b: vec128_storage = b.into();
|
||||
let ao = unsafe { a.o };
|
||||
let bo = unsafe { b.o };
|
||||
let ao = o_of_q(unsafe { a.q });
|
||||
let bo = o_of_q(unsafe { b.q });
|
||||
let o = vec128_storage {
|
||||
o: [f(ao[0], bo[0])],
|
||||
q: q_of_o(f(ao, bo)),
|
||||
};
|
||||
unsafe { T::unpack(o) }
|
||||
}
|
||||
|
@ -411,7 +458,7 @@ impl From<u64x2_generic> for vec128_storage {
|
|||
impl From<u128x1_generic> for vec128_storage {
|
||||
#[inline(always)]
|
||||
fn from(o: u128x1_generic) -> Self {
|
||||
Self { o: o.0 }
|
||||
Self { q: q_of_o(o.0[0]) }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -430,7 +477,7 @@ impl Store<vec128_storage> for u64x2_generic {
|
|||
impl Store<vec128_storage> for u128x1_generic {
|
||||
#[inline(always)]
|
||||
unsafe fn unpack(s: vec128_storage) -> Self {
|
||||
Self(s.o)
|
||||
Self([o_of_q(s.q); 1])
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -9,14 +9,14 @@ mod soft;
|
|||
mod types;
|
||||
pub use self::types::*;
|
||||
|
||||
#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))]
|
||||
#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))]
|
||||
pub mod x86_64;
|
||||
#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))]
|
||||
#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))]
|
||||
use self::x86_64 as arch;
|
||||
|
||||
#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))]
|
||||
#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))]
|
||||
pub mod generic;
|
||||
#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))]
|
||||
#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))]
|
||||
use self::generic as arch;
|
||||
|
||||
pub use self::arch::{vec128_storage, vec256_storage, vec512_storage};
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
//! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
|
||||
|
||||
use core::marker::PhantomData;
|
||||
use core::ops::*;
|
||||
use crate::types::*;
|
||||
use crate::{vec128_storage, vec256_storage, vec512_storage};
|
||||
use core::marker::PhantomData;
|
||||
use core::ops::*;
|
||||
|
||||
#[derive(Copy, Clone, Default)]
|
||||
#[allow(non_camel_case_types)]
|
||||
|
@ -238,7 +238,12 @@ macro_rules! fwd_unop_x4 {
|
|||
($fn:ident) => {
|
||||
#[inline(always)]
|
||||
fn $fn(self) -> Self {
|
||||
x4([self.0[0].$fn(), self.0[1].$fn(), self.0[2].$fn(), self.0[3].$fn()])
|
||||
x4([
|
||||
self.0[0].$fn(),
|
||||
self.0[1].$fn(),
|
||||
self.0[2].$fn(),
|
||||
self.0[3].$fn(),
|
||||
])
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#![allow(non_camel_case_types)]
|
||||
use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};
|
||||
|
||||
pub trait AndNot {
|
||||
|
@ -44,182 +45,178 @@ pub trait RotateEachWord64 {
|
|||
|
||||
pub trait RotateEachWord128 {}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
mod types {
|
||||
//! Vector type naming scheme:
|
||||
//! uN[xP]xL
|
||||
//! Unsigned; N-bit words * P bits per lane * L lanes
|
||||
//!
|
||||
//! A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
|
||||
//! wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
|
||||
//! slow inter-lane operations.
|
||||
// Vector type naming scheme:
|
||||
// uN[xP]xL
|
||||
// Unsigned; N-bit words * P bits per lane * L lanes
|
||||
//
|
||||
// A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
|
||||
// wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
|
||||
// slow inter-lane operations.
|
||||
|
||||
use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
|
||||
use crate::{ArithOps, BitOps128, BitOps32, BitOps64, Machine, Store, StoreBytes};
|
||||
use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
|
||||
|
||||
pub trait UnsafeFrom<T> {
|
||||
unsafe fn unsafe_from(t: T) -> Self;
|
||||
}
|
||||
|
||||
/// A vector composed of two elements, which may be words or themselves vectors.
|
||||
pub trait Vec2<W> {
|
||||
fn extract(self, i: u32) -> W;
|
||||
fn insert(self, w: W, i: u32) -> Self;
|
||||
}
|
||||
|
||||
/// A vector composed of four elements, which may be words or themselves vectors.
|
||||
pub trait Vec4<W> {
|
||||
fn extract(self, i: u32) -> W;
|
||||
fn insert(self, w: W, i: u32) -> Self;
|
||||
}
|
||||
|
||||
// TODO: multiples of 4 should inherit this
|
||||
/// A vector composed of four words; depending on their size, operations may cross lanes.
|
||||
pub trait Words4 {
|
||||
fn shuffle1230(self) -> Self;
|
||||
fn shuffle2301(self) -> Self;
|
||||
fn shuffle3012(self) -> Self;
|
||||
}
|
||||
|
||||
/// A vector composed one or more lanes each composed of four words.
|
||||
pub trait LaneWords4 {
|
||||
fn shuffle_lane_words1230(self) -> Self;
|
||||
fn shuffle_lane_words2301(self) -> Self;
|
||||
fn shuffle_lane_words3012(self) -> Self;
|
||||
}
|
||||
|
||||
// TODO: make this a part of BitOps
|
||||
/// Exchange neigboring ranges of bits of the specified size
|
||||
pub trait Swap64 {
|
||||
fn swap1(self) -> Self;
|
||||
fn swap2(self) -> Self;
|
||||
fn swap4(self) -> Self;
|
||||
fn swap8(self) -> Self;
|
||||
fn swap16(self) -> Self;
|
||||
fn swap32(self) -> Self;
|
||||
fn swap64(self) -> Self;
|
||||
}
|
||||
|
||||
pub trait u32x4<M: Machine>:
|
||||
BitOps32
|
||||
+ Store<vec128_storage>
|
||||
+ ArithOps
|
||||
+ Vec4<u32>
|
||||
+ Words4
|
||||
+ LaneWords4
|
||||
+ StoreBytes
|
||||
+ MultiLane<[u32; 4]>
|
||||
+ Into<vec128_storage>
|
||||
{
|
||||
}
|
||||
pub trait u64x2<M: Machine>:
|
||||
BitOps64
|
||||
+ Store<vec128_storage>
|
||||
+ ArithOps
|
||||
+ Vec2<u64>
|
||||
+ MultiLane<[u64; 2]>
|
||||
+ Into<vec128_storage>
|
||||
{
|
||||
}
|
||||
pub trait u128x1<M: Machine>:
|
||||
BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
|
||||
{
|
||||
#[allow(clippy::missing_safety_doc)]
|
||||
pub trait UnsafeFrom<T> {
|
||||
unsafe fn unsafe_from(t: T) -> Self;
|
||||
}
|
||||
|
||||
pub trait u32x4x2<M: Machine>:
|
||||
BitOps32
|
||||
+ Store<vec256_storage>
|
||||
+ Vec2<M::u32x4>
|
||||
+ MultiLane<[M::u32x4; 2]>
|
||||
+ ArithOps
|
||||
+ Into<vec256_storage>
|
||||
{
|
||||
}
|
||||
pub trait u64x2x2<M: Machine>:
|
||||
BitOps64
|
||||
+ Store<vec256_storage>
|
||||
+ Vec2<M::u64x2>
|
||||
+ MultiLane<[M::u64x2; 2]>
|
||||
+ ArithOps
|
||||
+ StoreBytes
|
||||
+ Into<vec256_storage>
|
||||
{
|
||||
}
|
||||
pub trait u64x4<M: Machine>:
|
||||
BitOps64
|
||||
+ Store<vec256_storage>
|
||||
+ Vec4<u64>
|
||||
+ MultiLane<[u64; 4]>
|
||||
+ ArithOps
|
||||
+ Words4
|
||||
+ StoreBytes
|
||||
+ Into<vec256_storage>
|
||||
{
|
||||
}
|
||||
pub trait u128x2<M: Machine>:
|
||||
BitOps128
|
||||
+ Store<vec256_storage>
|
||||
+ Vec2<M::u128x1>
|
||||
+ MultiLane<[M::u128x1; 2]>
|
||||
+ Swap64
|
||||
+ Into<vec256_storage>
|
||||
{
|
||||
/// A vector composed of two elements, which may be words or themselves vectors.
|
||||
pub trait Vec2<W> {
|
||||
fn extract(self, i: u32) -> W;
|
||||
fn insert(self, w: W, i: u32) -> Self;
|
||||
}
|
||||
|
||||
pub trait u32x4x4<M: Machine>:
|
||||
BitOps32
|
||||
+ Store<vec512_storage>
|
||||
+ Vec4<M::u32x4>
|
||||
+ MultiLane<[M::u32x4; 4]>
|
||||
+ ArithOps
|
||||
+ LaneWords4
|
||||
+ Into<vec512_storage>
|
||||
{
|
||||
}
|
||||
pub trait u64x2x4<M: Machine>:
|
||||
BitOps64
|
||||
+ Store<vec512_storage>
|
||||
+ Vec4<M::u64x2>
|
||||
+ MultiLane<[M::u64x2; 4]>
|
||||
+ ArithOps
|
||||
+ Into<vec512_storage>
|
||||
{
|
||||
}
|
||||
// TODO: Words4
|
||||
pub trait u128x4<M: Machine>:
|
||||
BitOps128
|
||||
+ Store<vec512_storage>
|
||||
+ Vec4<M::u128x1>
|
||||
+ MultiLane<[M::u128x1; 4]>
|
||||
+ Swap64
|
||||
+ Into<vec512_storage>
|
||||
{
|
||||
/// A vector composed of four elements, which may be words or themselves vectors.
|
||||
pub trait Vec4<W> {
|
||||
fn extract(self, i: u32) -> W;
|
||||
fn insert(self, w: W, i: u32) -> Self;
|
||||
}
|
||||
|
||||
/// A vector composed of multiple 128-bit lanes.
|
||||
pub trait MultiLane<Lanes> {
|
||||
/// Split a multi-lane vector into single-lane vectors.
|
||||
fn to_lanes(self) -> Lanes;
|
||||
/// Build a multi-lane vector from individual lanes.
|
||||
fn from_lanes(lanes: Lanes) -> Self;
|
||||
}
|
||||
// TODO: multiples of 4 should inherit this
|
||||
/// A vector composed of four words; depending on their size, operations may cross lanes.
|
||||
pub trait Words4 {
|
||||
fn shuffle1230(self) -> Self;
|
||||
fn shuffle2301(self) -> Self;
|
||||
fn shuffle3012(self) -> Self;
|
||||
}
|
||||
|
||||
/// Combine single vectors into a multi-lane vector.
|
||||
pub trait VZip<V> {
|
||||
fn vzip(self) -> V;
|
||||
}
|
||||
/// A vector composed one or more lanes each composed of four words.
|
||||
pub trait LaneWords4 {
|
||||
fn shuffle_lane_words1230(self) -> Self;
|
||||
fn shuffle_lane_words2301(self) -> Self;
|
||||
fn shuffle_lane_words3012(self) -> Self;
|
||||
}
|
||||
|
||||
impl<V, T> VZip<V> for T
|
||||
where
|
||||
V: MultiLane<T>,
|
||||
{
|
||||
#[inline(always)]
|
||||
fn vzip(self) -> V {
|
||||
V::from_lanes(self)
|
||||
}
|
||||
// TODO: make this a part of BitOps
|
||||
/// Exchange neigboring ranges of bits of the specified size
|
||||
pub trait Swap64 {
|
||||
fn swap1(self) -> Self;
|
||||
fn swap2(self) -> Self;
|
||||
fn swap4(self) -> Self;
|
||||
fn swap8(self) -> Self;
|
||||
fn swap16(self) -> Self;
|
||||
fn swap32(self) -> Self;
|
||||
fn swap64(self) -> Self;
|
||||
}
|
||||
|
||||
pub trait u32x4<M: Machine>:
|
||||
BitOps32
|
||||
+ Store<vec128_storage>
|
||||
+ ArithOps
|
||||
+ Vec4<u32>
|
||||
+ Words4
|
||||
+ LaneWords4
|
||||
+ StoreBytes
|
||||
+ MultiLane<[u32; 4]>
|
||||
+ Into<vec128_storage>
|
||||
{
|
||||
}
|
||||
pub trait u64x2<M: Machine>:
|
||||
BitOps64
|
||||
+ Store<vec128_storage>
|
||||
+ ArithOps
|
||||
+ Vec2<u64>
|
||||
+ MultiLane<[u64; 2]>
|
||||
+ Into<vec128_storage>
|
||||
{
|
||||
}
|
||||
pub trait u128x1<M: Machine>:
|
||||
BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
|
||||
{
|
||||
}
|
||||
|
||||
pub trait u32x4x2<M: Machine>:
|
||||
BitOps32
|
||||
+ Store<vec256_storage>
|
||||
+ Vec2<M::u32x4>
|
||||
+ MultiLane<[M::u32x4; 2]>
|
||||
+ ArithOps
|
||||
+ Into<vec256_storage>
|
||||
{
|
||||
}
|
||||
pub trait u64x2x2<M: Machine>:
|
||||
BitOps64
|
||||
+ Store<vec256_storage>
|
||||
+ Vec2<M::u64x2>
|
||||
+ MultiLane<[M::u64x2; 2]>
|
||||
+ ArithOps
|
||||
+ StoreBytes
|
||||
+ Into<vec256_storage>
|
||||
{
|
||||
}
|
||||
pub trait u64x4<M: Machine>:
|
||||
BitOps64
|
||||
+ Store<vec256_storage>
|
||||
+ Vec4<u64>
|
||||
+ MultiLane<[u64; 4]>
|
||||
+ ArithOps
|
||||
+ Words4
|
||||
+ StoreBytes
|
||||
+ Into<vec256_storage>
|
||||
{
|
||||
}
|
||||
pub trait u128x2<M: Machine>:
|
||||
BitOps128
|
||||
+ Store<vec256_storage>
|
||||
+ Vec2<M::u128x1>
|
||||
+ MultiLane<[M::u128x1; 2]>
|
||||
+ Swap64
|
||||
+ Into<vec256_storage>
|
||||
{
|
||||
}
|
||||
|
||||
pub trait u32x4x4<M: Machine>:
|
||||
BitOps32
|
||||
+ Store<vec512_storage>
|
||||
+ Vec4<M::u32x4>
|
||||
+ MultiLane<[M::u32x4; 4]>
|
||||
+ ArithOps
|
||||
+ LaneWords4
|
||||
+ Into<vec512_storage>
|
||||
{
|
||||
}
|
||||
pub trait u64x2x4<M: Machine>:
|
||||
BitOps64
|
||||
+ Store<vec512_storage>
|
||||
+ Vec4<M::u64x2>
|
||||
+ MultiLane<[M::u64x2; 4]>
|
||||
+ ArithOps
|
||||
+ Into<vec512_storage>
|
||||
{
|
||||
}
|
||||
// TODO: Words4
|
||||
pub trait u128x4<M: Machine>:
|
||||
BitOps128
|
||||
+ Store<vec512_storage>
|
||||
+ Vec4<M::u128x1>
|
||||
+ MultiLane<[M::u128x1; 4]>
|
||||
+ Swap64
|
||||
+ Into<vec512_storage>
|
||||
{
|
||||
}
|
||||
|
||||
/// A vector composed of multiple 128-bit lanes.
|
||||
pub trait MultiLane<Lanes> {
|
||||
/// Split a multi-lane vector into single-lane vectors.
|
||||
fn to_lanes(self) -> Lanes;
|
||||
/// Build a multi-lane vector from individual lanes.
|
||||
fn from_lanes(lanes: Lanes) -> Self;
|
||||
}
|
||||
|
||||
/// Combine single vectors into a multi-lane vector.
|
||||
pub trait VZip<V> {
|
||||
fn vzip(self) -> V;
|
||||
}
|
||||
|
||||
impl<V, T> VZip<V> for T
|
||||
where
|
||||
V: MultiLane<T>,
|
||||
{
|
||||
#[inline(always)]
|
||||
fn vzip(self) -> V {
|
||||
V::from_lanes(self)
|
||||
}
|
||||
}
|
||||
pub use self::types::*;
|
||||
|
||||
pub trait Machine: Sized + Copy {
|
||||
type u32x4: u32x4<Self>;
|
||||
|
@ -264,15 +261,27 @@ pub trait Machine: Sized + Copy {
|
|||
unsafe { V::unsafe_read_be(input) }
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the type of Self is appropriate for the hardware of the execution
|
||||
/// environment.
|
||||
unsafe fn instance() -> Self;
|
||||
}
|
||||
|
||||
pub trait Store<S> {
|
||||
/// # Safety
|
||||
/// Caller must ensure the type of Self is appropriate for the hardware of the execution
|
||||
/// environment.
|
||||
unsafe fn unpack(p: S) -> Self;
|
||||
}
|
||||
|
||||
pub trait StoreBytes {
|
||||
/// # Safety
|
||||
/// Caller must ensure the type of Self is appropriate for the hardware of the execution
|
||||
/// environment.
|
||||
unsafe fn unsafe_read_le(input: &[u8]) -> Self;
|
||||
/// # Safety
|
||||
/// Caller must ensure the type of Self is appropriate for the hardware of the execution
|
||||
/// environment.
|
||||
unsafe fn unsafe_read_be(input: &[u8]) -> Self;
|
||||
fn write_le(self, out: &mut [u8]);
|
||||
fn write_be(self, out: &mut [u8]);
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
// crate minimums: sse2, x86_64
|
||||
|
||||
use core::arch::x86_64::{__m128i, __m256i};
|
||||
use crate::types::*;
|
||||
use core::arch::x86_64::{__m128i, __m256i};
|
||||
|
||||
mod sse2;
|
||||
|
||||
|
@ -137,6 +137,13 @@ impl Default for vec128_storage {
|
|||
vec128_storage { u128x1: [0] }
|
||||
}
|
||||
}
|
||||
impl Eq for vec128_storage {}
|
||||
impl PartialEq for vec128_storage {
|
||||
#[inline(always)]
|
||||
fn eq(&self, rhs: &Self) -> bool {
|
||||
unsafe { self.u128x1 == rhs.u128x1 }
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Copy, Clone)]
|
||||
|
@ -167,6 +174,13 @@ impl vec256_storage {
|
|||
unsafe { self.sse2 }
|
||||
}
|
||||
}
|
||||
impl Eq for vec256_storage {}
|
||||
impl PartialEq for vec256_storage {
|
||||
#[inline(always)]
|
||||
fn eq(&self, rhs: &Self) -> bool {
|
||||
unsafe { self.sse2 == rhs.sse2 }
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Copy, Clone)]
|
||||
|
@ -193,6 +207,13 @@ impl vec512_storage {
|
|||
unsafe { self.sse2 }
|
||||
}
|
||||
}
|
||||
impl Eq for vec512_storage {}
|
||||
impl PartialEq for vec512_storage {
|
||||
#[inline(always)]
|
||||
fn eq(&self, rhs: &Self) -> bool {
|
||||
unsafe { self.avx == rhs.avx }
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_into {
|
||||
($storage:ident, $array:ty, $name:ident) => {
|
||||
|
|
|
@ -166,28 +166,23 @@ macro_rules! impl_bitops128 {
|
|||
|
||||
macro_rules! rotr_32_s3 {
|
||||
($name:ident, $k0:expr, $k1:expr) => {
|
||||
#[inline(always)]
|
||||
fn $name(self) -> Self {
|
||||
Self::new(unsafe {
|
||||
_mm_shuffle_epi8(
|
||||
self.x,
|
||||
_mm_set_epi64x($k0, $k1),
|
||||
)
|
||||
})
|
||||
#[inline(always)]
|
||||
fn $name(self) -> Self {
|
||||
Self::new(unsafe { _mm_shuffle_epi8(self.x, _mm_set_epi64x($k0, $k1)) })
|
||||
}
|
||||
};
|
||||
}
|
||||
macro_rules! rotr_32 {
|
||||
($name:ident, $i:expr) => {
|
||||
#[inline(always)]
|
||||
fn $name(self) -> Self {
|
||||
Self::new(unsafe {
|
||||
_mm_or_si128(
|
||||
_mm_srli_epi32(self.x, $i as i32),
|
||||
_mm_slli_epi32(self.x, 32 - $i as i32),
|
||||
)
|
||||
})
|
||||
}
|
||||
#[inline(always)]
|
||||
fn $name(self) -> Self {
|
||||
Self::new(unsafe {
|
||||
_mm_or_si128(
|
||||
_mm_srli_epi32(self.x, $i as i32),
|
||||
_mm_slli_epi32(self.x, 32 - $i as i32),
|
||||
)
|
||||
})
|
||||
}
|
||||
};
|
||||
}
|
||||
impl<S4: Copy, NI: Copy> RotateEachWord32 for u32x4_sse2<YesS3, S4, NI> {
|
||||
|
@ -228,28 +223,23 @@ impl<S4: Copy, NI: Copy> RotateEachWord32 for u32x4_sse2<NoS3, S4, NI> {
|
|||
|
||||
macro_rules! rotr_64_s3 {
|
||||
($name:ident, $k0:expr, $k1:expr) => {
|
||||
#[inline(always)]
|
||||
fn $name(self) -> Self {
|
||||
Self::new(unsafe {
|
||||
_mm_shuffle_epi8(
|
||||
self.x,
|
||||
_mm_set_epi64x($k0, $k1),
|
||||
)
|
||||
})
|
||||
#[inline(always)]
|
||||
fn $name(self) -> Self {
|
||||
Self::new(unsafe { _mm_shuffle_epi8(self.x, _mm_set_epi64x($k0, $k1)) })
|
||||
}
|
||||
};
|
||||
}
|
||||
macro_rules! rotr_64 {
|
||||
($name:ident, $i:expr) => {
|
||||
#[inline(always)]
|
||||
fn $name(self) -> Self {
|
||||
Self::new(unsafe {
|
||||
_mm_or_si128(
|
||||
_mm_srli_epi64(self.x, $i as i32),
|
||||
_mm_slli_epi64(self.x, 64 - $i as i32),
|
||||
)
|
||||
})
|
||||
}
|
||||
#[inline(always)]
|
||||
fn $name(self) -> Self {
|
||||
Self::new(unsafe {
|
||||
_mm_or_si128(
|
||||
_mm_srli_epi64(self.x, $i as i32),
|
||||
_mm_slli_epi64(self.x, 64 - $i as i32),
|
||||
)
|
||||
})
|
||||
}
|
||||
};
|
||||
}
|
||||
impl<S4: Copy, NI: Copy> RotateEachWord32 for u64x2_sse2<YesS3, S4, NI> {
|
||||
|
@ -296,15 +286,15 @@ impl<S3: Copy, S4: Copy, NI: Copy> RotateEachWord64 for u64x2_sse2<S3, S4, NI> {
|
|||
|
||||
macro_rules! rotr_128 {
|
||||
($name:ident, $i:expr) => {
|
||||
#[inline(always)]
|
||||
fn $name(self) -> Self {
|
||||
Self::new(unsafe {
|
||||
_mm_or_si128(
|
||||
_mm_srli_si128(self.x, $i as i32),
|
||||
_mm_slli_si128(self.x, 128 - $i as i32),
|
||||
)
|
||||
})
|
||||
}
|
||||
#[inline(always)]
|
||||
fn $name(self) -> Self {
|
||||
Self::new(unsafe {
|
||||
_mm_or_si128(
|
||||
_mm_srli_si128(self.x, $i as i32),
|
||||
_mm_slli_si128(self.x, 128 - $i as i32),
|
||||
)
|
||||
})
|
||||
}
|
||||
};
|
||||
}
|
||||
// TODO: completely unoptimized
|
||||
|
@ -411,7 +401,7 @@ impl<S3, S4, NI> MultiLane<[u128; 1]> for u128x1_sse2<S3, S4, NI> {
|
|||
}
|
||||
#[inline(always)]
|
||||
fn from_lanes(xs: [u128; 1]) -> Self {
|
||||
unimplemented!()
|
||||
unimplemented!("{:?}", xs)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -780,7 +770,7 @@ impl<S4, NI> BSwap for u128x1_sse2<YesS3, S4, NI> {
|
|||
impl<S4, NI> BSwap for u128x1_sse2<NoS3, S4, NI> {
|
||||
#[inline(always)]
|
||||
fn bswap(self) -> Self {
|
||||
Self::new(unsafe { unimplemented!() })
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1078,6 +1068,7 @@ impl<W: PartialEq, G> PartialEq for x2<W, G> {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
#[inline(always)]
|
||||
unsafe fn eq128_s4(x: __m128i, y: __m128i) -> bool {
|
||||
let q = _mm_shuffle_epi32(_mm_cmpeq_epi64(x, y), 0b1100_0110);
|
||||
|
@ -1136,13 +1127,14 @@ where
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::x86_64::{SSE2, SSE41, SSSE3};
|
||||
use crate::Machine;
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[cfg_attr(not(target_feature = "ssse3"), ignore)]
|
||||
fn test_bswap32_s2_vs_s3() {
|
||||
let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
|
||||
let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
|
||||
|
@ -1165,7 +1157,7 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[cfg_attr(not(target_feature = "ssse3"), ignore)]
|
||||
fn test_bswap64_s2_vs_s3() {
|
||||
let xs = [0x0f0e_0d0c_0b0a_0908, 0x0706_0504_0302_0100];
|
||||
let ys = [0x0809_0a0b_0c0d_0e0f, 0x0001_0203_0405_0607];
|
||||
|
@ -1188,7 +1180,7 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[cfg_attr(not(target_feature = "ssse3"), ignore)]
|
||||
fn test_shuffle32_s2_vs_s3() {
|
||||
let xs = [0x0, 0x1, 0x2, 0x3];
|
||||
let ys = [0x2, 0x3, 0x0, 0x1];
|
||||
|
@ -1226,7 +1218,7 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[cfg_attr(not(target_feature = "ssse3"), ignore)]
|
||||
fn test_shuffle64_s2_vs_s3() {
|
||||
let xs = [0x0, 0x1, 0x2, 0x3];
|
||||
let ys = [0x2, 0x3, 0x0, 0x1];
|
||||
|
@ -1263,8 +1255,8 @@ mod test {
|
|||
assert_eq!(x_s3, unsafe { core::mem::transmute(x_s3) });
|
||||
}
|
||||
|
||||
#[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
|
||||
#[test]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
fn test_lanes_u32x4() {
|
||||
let xs = [0x1, 0x2, 0x3, 0x4];
|
||||
|
||||
|
@ -1295,7 +1287,7 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
|
||||
fn test_lanes_u64x2() {
|
||||
let xs = [0x1, 0x2];
|
||||
|
||||
|
@ -1326,7 +1318,6 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
fn test_vec4_u32x4_s2() {
|
||||
let xs = [1, 2, 3, 4];
|
||||
let s2 = unsafe { SSE2::instance() };
|
||||
|
@ -1342,7 +1333,7 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
|
||||
fn test_vec4_u32x4_s4() {
|
||||
let xs = [1, 2, 3, 4];
|
||||
let s4 = unsafe { SSE41::instance() };
|
||||
|
@ -1358,7 +1349,6 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
fn test_vec2_u64x2_s2() {
|
||||
let xs = [0x1, 0x2];
|
||||
let s2 = unsafe { SSE2::instance() };
|
||||
|
@ -1370,7 +1360,7 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
|
||||
fn test_vec4_u64x2_s4() {
|
||||
let xs = [0x1, 0x2];
|
||||
let s4 = unsafe { SSE41::instance() };
|
||||
|
@ -1493,19 +1483,13 @@ pub mod avx2 {
|
|||
impl<NI> ArithOps for u32x4x4_avx2<NI> where NI: Copy {}
|
||||
macro_rules! shuf_lane_bytes {
|
||||
($name:ident, $k0:expr, $k1:expr) => {
|
||||
#[inline(always)]
|
||||
fn $name(self) -> Self {
|
||||
Self::new(unsafe {
|
||||
[
|
||||
_mm256_shuffle_epi8(
|
||||
self.x[0],
|
||||
_mm256_set_epi64x($k0, $k1, $k0, $k1),
|
||||
),
|
||||
_mm256_shuffle_epi8(
|
||||
self.x[1],
|
||||
_mm256_set_epi64x($k0, $k1, $k0, $k1),
|
||||
)
|
||||
]
|
||||
#[inline(always)]
|
||||
fn $name(self) -> Self {
|
||||
Self::new(unsafe {
|
||||
[
|
||||
_mm256_shuffle_epi8(self.x[0], _mm256_set_epi64x($k0, $k1, $k0, $k1)),
|
||||
_mm256_shuffle_epi8(self.x[1], _mm256_set_epi64x($k0, $k1, $k0, $k1)),
|
||||
]
|
||||
})
|
||||
}
|
||||
};
|
||||
|
@ -1523,7 +1507,7 @@ pub mod avx2 {
|
|||
_mm256_or_si256(
|
||||
_mm256_srli_epi32(self.x[1], $i as i32),
|
||||
_mm256_slli_epi32(self.x[1], 32 - $i as i32),
|
||||
)
|
||||
),
|
||||
]
|
||||
})
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче