зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1716518 - Upgrade ppv-lite86 to v0.2.10. r=emilio
Differential Revision: https://phabricator.services.mozilla.com/D117836
This commit is contained in:
Родитель
4d155afc6e
Коммит
478846d726
|
@ -3902,9 +3902,9 @@ checksum = "b18befed8bc2b61abc79a457295e7e838417326da1586050b919414073977f19"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ppv-lite86"
|
name = "ppv-lite86"
|
||||||
version = "0.2.6"
|
version = "0.2.10"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"
|
checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "precomputed-hash"
|
name = "precomputed-hash"
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
{"files":{"Cargo.toml":"3cf6fa0e4089c12be1d19ac9082aabed68d3f193dbd5024379c22a8a4db15abe","LICENSE-APACHE":"0218327e7a480793ffdd4eb792379a9709e5c135c7ba267f709d6f6d4d70af0a","LICENSE-MIT":"4cada0bd02ea3692eee6f16400d86c6508bbd3bafb2b65fed0419f36d4f83e8f","src/generic.rs":"6c19b3062aec77a92130be1ce55c90aeca0811bcb19951c25a50c8fbe89a26d0","src/lib.rs":"75beb27d89dcc7541c8e81ad1f4bec81908d8d5fa0e3adec47cb1a1f008dfd32","src/soft.rs":"6fb8aa428183ec09d63d45761507d8da6dffc45990f2d1fcfd387c4c856599cc","src/types.rs":"4890069359ed53575a6b9a8168037ccdd4b029c8d61d540e9770fe3c90359345","src/x86_64/mod.rs":"e95910e8c9d23c212055598a437bfcdfaebf4f12e03a04e75f961bc3e9e257a1","src/x86_64/sse2.rs":"da72424e9e3fabd6236d4de80edb1448dc0bac02797df8e15298d412cdaef10c"},"package":"74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"}
|
{"files":{"Cargo.toml":"5d9b7092f252e3a6f7f50f6aeb1b873803b322cf5edbf0ae07e0a27d57df3fbf","LICENSE-APACHE":"0218327e7a480793ffdd4eb792379a9709e5c135c7ba267f709d6f6d4d70af0a","LICENSE-MIT":"4cada0bd02ea3692eee6f16400d86c6508bbd3bafb2b65fed0419f36d4f83e8f","src/generic.rs":"6f38250421846499c816c222d0b48155bfab09a9921e6c400d7b75567ab98f14","src/lib.rs":"bcf308d7037e259d6640a785556fcdb86653cb4f72f64fbfeda9899857c14479","src/soft.rs":"5cdee0e46c99a9d5078c0b3a733fe6fd1430ed0a888ef747bc2a1271265a1140","src/types.rs":"a354d2e3267c7c451a1420903314a358328346772ca964fa6c1ef7b96c983930","src/x86_64/mod.rs":"4d5a1da816f8e59bb385464f005075de889d1060e24dcee6709b321a3d6c92f7","src/x86_64/sse2.rs":"a9df3e7b3b8ffcd249a2cbed0e538042f7747dfa6ae7af0c9af364dc5a12d409"},"package":"ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"}
|
|
@ -13,7 +13,7 @@
|
||||||
[package]
|
[package]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
name = "ppv-lite86"
|
name = "ppv-lite86"
|
||||||
version = "0.2.6"
|
version = "0.2.10"
|
||||||
authors = ["The CryptoCorrosion Contributors"]
|
authors = ["The CryptoCorrosion Contributors"]
|
||||||
description = "Implementation of the crypto-simd API for x86"
|
description = "Implementation of the crypto-simd API for x86"
|
||||||
keywords = ["crypto", "simd", "x86"]
|
keywords = ["crypto", "simd", "x86"]
|
||||||
|
@ -24,7 +24,8 @@ repository = "https://github.com/cryptocorrosion/cryptocorrosion"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["std", "simd"]
|
default = ["std"]
|
||||||
|
no_simd = []
|
||||||
simd = []
|
simd = []
|
||||||
std = []
|
std = []
|
||||||
[badges.travis-ci]
|
[badges.travis-ci]
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
#![allow(non_camel_case_types)]
|
#![allow(non_camel_case_types)]
|
||||||
|
|
||||||
use core::ops::*;
|
|
||||||
use crate::soft::{x2, x4};
|
use crate::soft::{x2, x4};
|
||||||
use crate::types::*;
|
use crate::types::*;
|
||||||
|
use core::ops::*;
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub union vec128_storage {
|
pub union vec128_storage {
|
||||||
d: [u32; 4],
|
d: [u32; 4],
|
||||||
q: [u64; 2],
|
q: [u64; 2],
|
||||||
o: [u128; 1],
|
|
||||||
}
|
}
|
||||||
impl From<[u32; 4]> for vec128_storage {
|
impl From<[u32; 4]> for vec128_storage {
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -16,7 +16,38 @@ impl From<[u32; 4]> for vec128_storage {
|
||||||
Self { d }
|
Self { d }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[derive(Clone, Copy)]
|
impl From<vec128_storage> for [u32; 4] {
|
||||||
|
#[inline]
|
||||||
|
fn from(d: vec128_storage) -> Self {
|
||||||
|
unsafe { d.d }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl From<[u64; 2]> for vec128_storage {
|
||||||
|
#[inline]
|
||||||
|
fn from(q: [u64; 2]) -> Self {
|
||||||
|
Self { q }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl From<vec128_storage> for [u64; 2] {
|
||||||
|
#[inline]
|
||||||
|
fn from(q: vec128_storage) -> Self {
|
||||||
|
unsafe { q.q }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl Default for vec128_storage {
|
||||||
|
#[inline]
|
||||||
|
fn default() -> Self {
|
||||||
|
Self { q: [0, 0] }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl Eq for vec128_storage {}
|
||||||
|
impl PartialEq<vec128_storage> for vec128_storage {
|
||||||
|
#[inline]
|
||||||
|
fn eq(&self, rhs: &Self) -> bool {
|
||||||
|
unsafe { self.q == rhs.q }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, Default)]
|
||||||
pub struct vec256_storage {
|
pub struct vec256_storage {
|
||||||
v128: [vec128_storage; 2],
|
v128: [vec128_storage; 2],
|
||||||
}
|
}
|
||||||
|
@ -30,7 +61,15 @@ impl vec256_storage {
|
||||||
self.v128
|
self.v128
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[derive(Clone, Copy)]
|
impl From<vec256_storage> for [u64; 4] {
|
||||||
|
#[inline]
|
||||||
|
fn from(q: vec256_storage) -> Self {
|
||||||
|
let [a, b]: [u64; 2] = q.v128[0].into();
|
||||||
|
let [c, d]: [u64; 2] = q.v128[1].into();
|
||||||
|
[a, b, c, d]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, Default)]
|
||||||
pub struct vec512_storage {
|
pub struct vec512_storage {
|
||||||
v128: [vec128_storage; 4],
|
v128: [vec128_storage; 4],
|
||||||
}
|
}
|
||||||
|
@ -106,14 +145,22 @@ where
|
||||||
unsafe { T::unpack(q) }
|
unsafe { T::unpack(q) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn o_of_q(q: [u64; 2]) -> u128 {
|
||||||
|
u128::from(q[0]) | (u128::from(q[1]) << 64)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn q_of_o(o: u128) -> [u64; 2] {
|
||||||
|
[o as u64, (o >> 64) as u64]
|
||||||
|
}
|
||||||
|
|
||||||
fn omap<T, F>(a: T, f: F) -> T
|
fn omap<T, F>(a: T, f: F) -> T
|
||||||
where
|
where
|
||||||
T: Store<vec128_storage> + Into<vec128_storage>,
|
T: Store<vec128_storage> + Into<vec128_storage>,
|
||||||
F: Fn(u128) -> u128,
|
F: Fn(u128) -> u128,
|
||||||
{
|
{
|
||||||
let a: vec128_storage = a.into();
|
let a: vec128_storage = a.into();
|
||||||
let ao = unsafe { a.o };
|
let ao = o_of_q(unsafe { a.q });
|
||||||
let o = vec128_storage { o: [f(ao[0])] };
|
let o = vec128_storage { q: q_of_o(f(ao)) };
|
||||||
unsafe { T::unpack(o) }
|
unsafe { T::unpack(o) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,10 +171,10 @@ where
|
||||||
{
|
{
|
||||||
let a: vec128_storage = a.into();
|
let a: vec128_storage = a.into();
|
||||||
let b: vec128_storage = b.into();
|
let b: vec128_storage = b.into();
|
||||||
let ao = unsafe { a.o };
|
let ao = o_of_q(unsafe { a.q });
|
||||||
let bo = unsafe { b.o };
|
let bo = o_of_q(unsafe { b.q });
|
||||||
let o = vec128_storage {
|
let o = vec128_storage {
|
||||||
o: [f(ao[0], bo[0])],
|
q: q_of_o(f(ao, bo)),
|
||||||
};
|
};
|
||||||
unsafe { T::unpack(o) }
|
unsafe { T::unpack(o) }
|
||||||
}
|
}
|
||||||
|
@ -411,7 +458,7 @@ impl From<u64x2_generic> for vec128_storage {
|
||||||
impl From<u128x1_generic> for vec128_storage {
|
impl From<u128x1_generic> for vec128_storage {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn from(o: u128x1_generic) -> Self {
|
fn from(o: u128x1_generic) -> Self {
|
||||||
Self { o: o.0 }
|
Self { q: q_of_o(o.0[0]) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -430,7 +477,7 @@ impl Store<vec128_storage> for u64x2_generic {
|
||||||
impl Store<vec128_storage> for u128x1_generic {
|
impl Store<vec128_storage> for u128x1_generic {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
unsafe fn unpack(s: vec128_storage) -> Self {
|
unsafe fn unpack(s: vec128_storage) -> Self {
|
||||||
Self(s.o)
|
Self([o_of_q(s.q); 1])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,14 +9,14 @@ mod soft;
|
||||||
mod types;
|
mod types;
|
||||||
pub use self::types::*;
|
pub use self::types::*;
|
||||||
|
|
||||||
#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))]
|
#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))]
|
||||||
pub mod x86_64;
|
pub mod x86_64;
|
||||||
#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))]
|
#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))]
|
||||||
use self::x86_64 as arch;
|
use self::x86_64 as arch;
|
||||||
|
|
||||||
#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))]
|
#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))]
|
||||||
pub mod generic;
|
pub mod generic;
|
||||||
#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))]
|
#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))]
|
||||||
use self::generic as arch;
|
use self::generic as arch;
|
||||||
|
|
||||||
pub use self::arch::{vec128_storage, vec256_storage, vec512_storage};
|
pub use self::arch::{vec128_storage, vec256_storage, vec512_storage};
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
//! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
|
//! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
|
||||||
|
|
||||||
use core::marker::PhantomData;
|
|
||||||
use core::ops::*;
|
|
||||||
use crate::types::*;
|
use crate::types::*;
|
||||||
use crate::{vec128_storage, vec256_storage, vec512_storage};
|
use crate::{vec128_storage, vec256_storage, vec512_storage};
|
||||||
|
use core::marker::PhantomData;
|
||||||
|
use core::ops::*;
|
||||||
|
|
||||||
#[derive(Copy, Clone, Default)]
|
#[derive(Copy, Clone, Default)]
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
|
@ -238,7 +238,12 @@ macro_rules! fwd_unop_x4 {
|
||||||
($fn:ident) => {
|
($fn:ident) => {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn $fn(self) -> Self {
|
fn $fn(self) -> Self {
|
||||||
x4([self.0[0].$fn(), self.0[1].$fn(), self.0[2].$fn(), self.0[3].$fn()])
|
x4([
|
||||||
|
self.0[0].$fn(),
|
||||||
|
self.0[1].$fn(),
|
||||||
|
self.0[2].$fn(),
|
||||||
|
self.0[3].$fn(),
|
||||||
|
])
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#![allow(non_camel_case_types)]
|
||||||
use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};
|
use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};
|
||||||
|
|
||||||
pub trait AndNot {
|
pub trait AndNot {
|
||||||
|
@ -44,182 +45,178 @@ pub trait RotateEachWord64 {
|
||||||
|
|
||||||
pub trait RotateEachWord128 {}
|
pub trait RotateEachWord128 {}
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
// Vector type naming scheme:
|
||||||
mod types {
|
// uN[xP]xL
|
||||||
//! Vector type naming scheme:
|
// Unsigned; N-bit words * P bits per lane * L lanes
|
||||||
//! uN[xP]xL
|
//
|
||||||
//! Unsigned; N-bit words * P bits per lane * L lanes
|
// A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
|
||||||
//!
|
// wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
|
||||||
//! A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
|
// slow inter-lane operations.
|
||||||
//! wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
|
|
||||||
//! slow inter-lane operations.
|
|
||||||
|
|
||||||
use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
|
use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
|
||||||
use crate::{ArithOps, BitOps128, BitOps32, BitOps64, Machine, Store, StoreBytes};
|
|
||||||
|
|
||||||
pub trait UnsafeFrom<T> {
|
#[allow(clippy::missing_safety_doc)]
|
||||||
unsafe fn unsafe_from(t: T) -> Self;
|
pub trait UnsafeFrom<T> {
|
||||||
}
|
unsafe fn unsafe_from(t: T) -> Self;
|
||||||
|
|
||||||
/// A vector composed of two elements, which may be words or themselves vectors.
|
|
||||||
pub trait Vec2<W> {
|
|
||||||
fn extract(self, i: u32) -> W;
|
|
||||||
fn insert(self, w: W, i: u32) -> Self;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A vector composed of four elements, which may be words or themselves vectors.
|
|
||||||
pub trait Vec4<W> {
|
|
||||||
fn extract(self, i: u32) -> W;
|
|
||||||
fn insert(self, w: W, i: u32) -> Self;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: multiples of 4 should inherit this
|
|
||||||
/// A vector composed of four words; depending on their size, operations may cross lanes.
|
|
||||||
pub trait Words4 {
|
|
||||||
fn shuffle1230(self) -> Self;
|
|
||||||
fn shuffle2301(self) -> Self;
|
|
||||||
fn shuffle3012(self) -> Self;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A vector composed one or more lanes each composed of four words.
|
|
||||||
pub trait LaneWords4 {
|
|
||||||
fn shuffle_lane_words1230(self) -> Self;
|
|
||||||
fn shuffle_lane_words2301(self) -> Self;
|
|
||||||
fn shuffle_lane_words3012(self) -> Self;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: make this a part of BitOps
|
|
||||||
/// Exchange neigboring ranges of bits of the specified size
|
|
||||||
pub trait Swap64 {
|
|
||||||
fn swap1(self) -> Self;
|
|
||||||
fn swap2(self) -> Self;
|
|
||||||
fn swap4(self) -> Self;
|
|
||||||
fn swap8(self) -> Self;
|
|
||||||
fn swap16(self) -> Self;
|
|
||||||
fn swap32(self) -> Self;
|
|
||||||
fn swap64(self) -> Self;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait u32x4<M: Machine>:
|
|
||||||
BitOps32
|
|
||||||
+ Store<vec128_storage>
|
|
||||||
+ ArithOps
|
|
||||||
+ Vec4<u32>
|
|
||||||
+ Words4
|
|
||||||
+ LaneWords4
|
|
||||||
+ StoreBytes
|
|
||||||
+ MultiLane<[u32; 4]>
|
|
||||||
+ Into<vec128_storage>
|
|
||||||
{
|
|
||||||
}
|
|
||||||
pub trait u64x2<M: Machine>:
|
|
||||||
BitOps64
|
|
||||||
+ Store<vec128_storage>
|
|
||||||
+ ArithOps
|
|
||||||
+ Vec2<u64>
|
|
||||||
+ MultiLane<[u64; 2]>
|
|
||||||
+ Into<vec128_storage>
|
|
||||||
{
|
|
||||||
}
|
|
||||||
pub trait u128x1<M: Machine>:
|
|
||||||
BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
|
|
||||||
{
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait u32x4x2<M: Machine>:
|
/// A vector composed of two elements, which may be words or themselves vectors.
|
||||||
BitOps32
|
pub trait Vec2<W> {
|
||||||
+ Store<vec256_storage>
|
fn extract(self, i: u32) -> W;
|
||||||
+ Vec2<M::u32x4>
|
fn insert(self, w: W, i: u32) -> Self;
|
||||||
+ MultiLane<[M::u32x4; 2]>
|
|
||||||
+ ArithOps
|
|
||||||
+ Into<vec256_storage>
|
|
||||||
{
|
|
||||||
}
|
|
||||||
pub trait u64x2x2<M: Machine>:
|
|
||||||
BitOps64
|
|
||||||
+ Store<vec256_storage>
|
|
||||||
+ Vec2<M::u64x2>
|
|
||||||
+ MultiLane<[M::u64x2; 2]>
|
|
||||||
+ ArithOps
|
|
||||||
+ StoreBytes
|
|
||||||
+ Into<vec256_storage>
|
|
||||||
{
|
|
||||||
}
|
|
||||||
pub trait u64x4<M: Machine>:
|
|
||||||
BitOps64
|
|
||||||
+ Store<vec256_storage>
|
|
||||||
+ Vec4<u64>
|
|
||||||
+ MultiLane<[u64; 4]>
|
|
||||||
+ ArithOps
|
|
||||||
+ Words4
|
|
||||||
+ StoreBytes
|
|
||||||
+ Into<vec256_storage>
|
|
||||||
{
|
|
||||||
}
|
|
||||||
pub trait u128x2<M: Machine>:
|
|
||||||
BitOps128
|
|
||||||
+ Store<vec256_storage>
|
|
||||||
+ Vec2<M::u128x1>
|
|
||||||
+ MultiLane<[M::u128x1; 2]>
|
|
||||||
+ Swap64
|
|
||||||
+ Into<vec256_storage>
|
|
||||||
{
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait u32x4x4<M: Machine>:
|
/// A vector composed of four elements, which may be words or themselves vectors.
|
||||||
BitOps32
|
pub trait Vec4<W> {
|
||||||
+ Store<vec512_storage>
|
fn extract(self, i: u32) -> W;
|
||||||
+ Vec4<M::u32x4>
|
fn insert(self, w: W, i: u32) -> Self;
|
||||||
+ MultiLane<[M::u32x4; 4]>
|
|
||||||
+ ArithOps
|
|
||||||
+ LaneWords4
|
|
||||||
+ Into<vec512_storage>
|
|
||||||
{
|
|
||||||
}
|
|
||||||
pub trait u64x2x4<M: Machine>:
|
|
||||||
BitOps64
|
|
||||||
+ Store<vec512_storage>
|
|
||||||
+ Vec4<M::u64x2>
|
|
||||||
+ MultiLane<[M::u64x2; 4]>
|
|
||||||
+ ArithOps
|
|
||||||
+ Into<vec512_storage>
|
|
||||||
{
|
|
||||||
}
|
|
||||||
// TODO: Words4
|
|
||||||
pub trait u128x4<M: Machine>:
|
|
||||||
BitOps128
|
|
||||||
+ Store<vec512_storage>
|
|
||||||
+ Vec4<M::u128x1>
|
|
||||||
+ MultiLane<[M::u128x1; 4]>
|
|
||||||
+ Swap64
|
|
||||||
+ Into<vec512_storage>
|
|
||||||
{
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A vector composed of multiple 128-bit lanes.
|
// TODO: multiples of 4 should inherit this
|
||||||
pub trait MultiLane<Lanes> {
|
/// A vector composed of four words; depending on their size, operations may cross lanes.
|
||||||
/// Split a multi-lane vector into single-lane vectors.
|
pub trait Words4 {
|
||||||
fn to_lanes(self) -> Lanes;
|
fn shuffle1230(self) -> Self;
|
||||||
/// Build a multi-lane vector from individual lanes.
|
fn shuffle2301(self) -> Self;
|
||||||
fn from_lanes(lanes: Lanes) -> Self;
|
fn shuffle3012(self) -> Self;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Combine single vectors into a multi-lane vector.
|
/// A vector composed one or more lanes each composed of four words.
|
||||||
pub trait VZip<V> {
|
pub trait LaneWords4 {
|
||||||
fn vzip(self) -> V;
|
fn shuffle_lane_words1230(self) -> Self;
|
||||||
}
|
fn shuffle_lane_words2301(self) -> Self;
|
||||||
|
fn shuffle_lane_words3012(self) -> Self;
|
||||||
|
}
|
||||||
|
|
||||||
impl<V, T> VZip<V> for T
|
// TODO: make this a part of BitOps
|
||||||
where
|
/// Exchange neigboring ranges of bits of the specified size
|
||||||
V: MultiLane<T>,
|
pub trait Swap64 {
|
||||||
{
|
fn swap1(self) -> Self;
|
||||||
#[inline(always)]
|
fn swap2(self) -> Self;
|
||||||
fn vzip(self) -> V {
|
fn swap4(self) -> Self;
|
||||||
V::from_lanes(self)
|
fn swap8(self) -> Self;
|
||||||
}
|
fn swap16(self) -> Self;
|
||||||
|
fn swap32(self) -> Self;
|
||||||
|
fn swap64(self) -> Self;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait u32x4<M: Machine>:
|
||||||
|
BitOps32
|
||||||
|
+ Store<vec128_storage>
|
||||||
|
+ ArithOps
|
||||||
|
+ Vec4<u32>
|
||||||
|
+ Words4
|
||||||
|
+ LaneWords4
|
||||||
|
+ StoreBytes
|
||||||
|
+ MultiLane<[u32; 4]>
|
||||||
|
+ Into<vec128_storage>
|
||||||
|
{
|
||||||
|
}
|
||||||
|
pub trait u64x2<M: Machine>:
|
||||||
|
BitOps64
|
||||||
|
+ Store<vec128_storage>
|
||||||
|
+ ArithOps
|
||||||
|
+ Vec2<u64>
|
||||||
|
+ MultiLane<[u64; 2]>
|
||||||
|
+ Into<vec128_storage>
|
||||||
|
{
|
||||||
|
}
|
||||||
|
pub trait u128x1<M: Machine>:
|
||||||
|
BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait u32x4x2<M: Machine>:
|
||||||
|
BitOps32
|
||||||
|
+ Store<vec256_storage>
|
||||||
|
+ Vec2<M::u32x4>
|
||||||
|
+ MultiLane<[M::u32x4; 2]>
|
||||||
|
+ ArithOps
|
||||||
|
+ Into<vec256_storage>
|
||||||
|
{
|
||||||
|
}
|
||||||
|
pub trait u64x2x2<M: Machine>:
|
||||||
|
BitOps64
|
||||||
|
+ Store<vec256_storage>
|
||||||
|
+ Vec2<M::u64x2>
|
||||||
|
+ MultiLane<[M::u64x2; 2]>
|
||||||
|
+ ArithOps
|
||||||
|
+ StoreBytes
|
||||||
|
+ Into<vec256_storage>
|
||||||
|
{
|
||||||
|
}
|
||||||
|
pub trait u64x4<M: Machine>:
|
||||||
|
BitOps64
|
||||||
|
+ Store<vec256_storage>
|
||||||
|
+ Vec4<u64>
|
||||||
|
+ MultiLane<[u64; 4]>
|
||||||
|
+ ArithOps
|
||||||
|
+ Words4
|
||||||
|
+ StoreBytes
|
||||||
|
+ Into<vec256_storage>
|
||||||
|
{
|
||||||
|
}
|
||||||
|
pub trait u128x2<M: Machine>:
|
||||||
|
BitOps128
|
||||||
|
+ Store<vec256_storage>
|
||||||
|
+ Vec2<M::u128x1>
|
||||||
|
+ MultiLane<[M::u128x1; 2]>
|
||||||
|
+ Swap64
|
||||||
|
+ Into<vec256_storage>
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait u32x4x4<M: Machine>:
|
||||||
|
BitOps32
|
||||||
|
+ Store<vec512_storage>
|
||||||
|
+ Vec4<M::u32x4>
|
||||||
|
+ MultiLane<[M::u32x4; 4]>
|
||||||
|
+ ArithOps
|
||||||
|
+ LaneWords4
|
||||||
|
+ Into<vec512_storage>
|
||||||
|
{
|
||||||
|
}
|
||||||
|
pub trait u64x2x4<M: Machine>:
|
||||||
|
BitOps64
|
||||||
|
+ Store<vec512_storage>
|
||||||
|
+ Vec4<M::u64x2>
|
||||||
|
+ MultiLane<[M::u64x2; 4]>
|
||||||
|
+ ArithOps
|
||||||
|
+ Into<vec512_storage>
|
||||||
|
{
|
||||||
|
}
|
||||||
|
// TODO: Words4
|
||||||
|
pub trait u128x4<M: Machine>:
|
||||||
|
BitOps128
|
||||||
|
+ Store<vec512_storage>
|
||||||
|
+ Vec4<M::u128x1>
|
||||||
|
+ MultiLane<[M::u128x1; 4]>
|
||||||
|
+ Swap64
|
||||||
|
+ Into<vec512_storage>
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A vector composed of multiple 128-bit lanes.
|
||||||
|
pub trait MultiLane<Lanes> {
|
||||||
|
/// Split a multi-lane vector into single-lane vectors.
|
||||||
|
fn to_lanes(self) -> Lanes;
|
||||||
|
/// Build a multi-lane vector from individual lanes.
|
||||||
|
fn from_lanes(lanes: Lanes) -> Self;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Combine single vectors into a multi-lane vector.
|
||||||
|
pub trait VZip<V> {
|
||||||
|
fn vzip(self) -> V;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<V, T> VZip<V> for T
|
||||||
|
where
|
||||||
|
V: MultiLane<T>,
|
||||||
|
{
|
||||||
|
#[inline(always)]
|
||||||
|
fn vzip(self) -> V {
|
||||||
|
V::from_lanes(self)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub use self::types::*;
|
|
||||||
|
|
||||||
pub trait Machine: Sized + Copy {
|
pub trait Machine: Sized + Copy {
|
||||||
type u32x4: u32x4<Self>;
|
type u32x4: u32x4<Self>;
|
||||||
|
@ -264,15 +261,27 @@ pub trait Machine: Sized + Copy {
|
||||||
unsafe { V::unsafe_read_be(input) }
|
unsafe { V::unsafe_read_be(input) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// # Safety
|
||||||
|
/// Caller must ensure the type of Self is appropriate for the hardware of the execution
|
||||||
|
/// environment.
|
||||||
unsafe fn instance() -> Self;
|
unsafe fn instance() -> Self;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait Store<S> {
|
pub trait Store<S> {
|
||||||
|
/// # Safety
|
||||||
|
/// Caller must ensure the type of Self is appropriate for the hardware of the execution
|
||||||
|
/// environment.
|
||||||
unsafe fn unpack(p: S) -> Self;
|
unsafe fn unpack(p: S) -> Self;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait StoreBytes {
|
pub trait StoreBytes {
|
||||||
|
/// # Safety
|
||||||
|
/// Caller must ensure the type of Self is appropriate for the hardware of the execution
|
||||||
|
/// environment.
|
||||||
unsafe fn unsafe_read_le(input: &[u8]) -> Self;
|
unsafe fn unsafe_read_le(input: &[u8]) -> Self;
|
||||||
|
/// # Safety
|
||||||
|
/// Caller must ensure the type of Self is appropriate for the hardware of the execution
|
||||||
|
/// environment.
|
||||||
unsafe fn unsafe_read_be(input: &[u8]) -> Self;
|
unsafe fn unsafe_read_be(input: &[u8]) -> Self;
|
||||||
fn write_le(self, out: &mut [u8]);
|
fn write_le(self, out: &mut [u8]);
|
||||||
fn write_be(self, out: &mut [u8]);
|
fn write_be(self, out: &mut [u8]);
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
// crate minimums: sse2, x86_64
|
// crate minimums: sse2, x86_64
|
||||||
|
|
||||||
use core::arch::x86_64::{__m128i, __m256i};
|
|
||||||
use crate::types::*;
|
use crate::types::*;
|
||||||
|
use core::arch::x86_64::{__m128i, __m256i};
|
||||||
|
|
||||||
mod sse2;
|
mod sse2;
|
||||||
|
|
||||||
|
@ -137,6 +137,13 @@ impl Default for vec128_storage {
|
||||||
vec128_storage { u128x1: [0] }
|
vec128_storage { u128x1: [0] }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
impl Eq for vec128_storage {}
|
||||||
|
impl PartialEq for vec128_storage {
|
||||||
|
#[inline(always)]
|
||||||
|
fn eq(&self, rhs: &Self) -> bool {
|
||||||
|
unsafe { self.u128x1 == rhs.u128x1 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
|
@ -167,6 +174,13 @@ impl vec256_storage {
|
||||||
unsafe { self.sse2 }
|
unsafe { self.sse2 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
impl Eq for vec256_storage {}
|
||||||
|
impl PartialEq for vec256_storage {
|
||||||
|
#[inline(always)]
|
||||||
|
fn eq(&self, rhs: &Self) -> bool {
|
||||||
|
unsafe { self.sse2 == rhs.sse2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
|
@ -193,6 +207,13 @@ impl vec512_storage {
|
||||||
unsafe { self.sse2 }
|
unsafe { self.sse2 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
impl Eq for vec512_storage {}
|
||||||
|
impl PartialEq for vec512_storage {
|
||||||
|
#[inline(always)]
|
||||||
|
fn eq(&self, rhs: &Self) -> bool {
|
||||||
|
unsafe { self.avx == rhs.avx }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
macro_rules! impl_into {
|
macro_rules! impl_into {
|
||||||
($storage:ident, $array:ty, $name:ident) => {
|
($storage:ident, $array:ty, $name:ident) => {
|
||||||
|
|
|
@ -166,28 +166,23 @@ macro_rules! impl_bitops128 {
|
||||||
|
|
||||||
macro_rules! rotr_32_s3 {
|
macro_rules! rotr_32_s3 {
|
||||||
($name:ident, $k0:expr, $k1:expr) => {
|
($name:ident, $k0:expr, $k1:expr) => {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn $name(self) -> Self {
|
fn $name(self) -> Self {
|
||||||
Self::new(unsafe {
|
Self::new(unsafe { _mm_shuffle_epi8(self.x, _mm_set_epi64x($k0, $k1)) })
|
||||||
_mm_shuffle_epi8(
|
|
||||||
self.x,
|
|
||||||
_mm_set_epi64x($k0, $k1),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
macro_rules! rotr_32 {
|
macro_rules! rotr_32 {
|
||||||
($name:ident, $i:expr) => {
|
($name:ident, $i:expr) => {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn $name(self) -> Self {
|
fn $name(self) -> Self {
|
||||||
Self::new(unsafe {
|
Self::new(unsafe {
|
||||||
_mm_or_si128(
|
_mm_or_si128(
|
||||||
_mm_srli_epi32(self.x, $i as i32),
|
_mm_srli_epi32(self.x, $i as i32),
|
||||||
_mm_slli_epi32(self.x, 32 - $i as i32),
|
_mm_slli_epi32(self.x, 32 - $i as i32),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
impl<S4: Copy, NI: Copy> RotateEachWord32 for u32x4_sse2<YesS3, S4, NI> {
|
impl<S4: Copy, NI: Copy> RotateEachWord32 for u32x4_sse2<YesS3, S4, NI> {
|
||||||
|
@ -228,28 +223,23 @@ impl<S4: Copy, NI: Copy> RotateEachWord32 for u32x4_sse2<NoS3, S4, NI> {
|
||||||
|
|
||||||
macro_rules! rotr_64_s3 {
|
macro_rules! rotr_64_s3 {
|
||||||
($name:ident, $k0:expr, $k1:expr) => {
|
($name:ident, $k0:expr, $k1:expr) => {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn $name(self) -> Self {
|
fn $name(self) -> Self {
|
||||||
Self::new(unsafe {
|
Self::new(unsafe { _mm_shuffle_epi8(self.x, _mm_set_epi64x($k0, $k1)) })
|
||||||
_mm_shuffle_epi8(
|
|
||||||
self.x,
|
|
||||||
_mm_set_epi64x($k0, $k1),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
macro_rules! rotr_64 {
|
macro_rules! rotr_64 {
|
||||||
($name:ident, $i:expr) => {
|
($name:ident, $i:expr) => {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn $name(self) -> Self {
|
fn $name(self) -> Self {
|
||||||
Self::new(unsafe {
|
Self::new(unsafe {
|
||||||
_mm_or_si128(
|
_mm_or_si128(
|
||||||
_mm_srli_epi64(self.x, $i as i32),
|
_mm_srli_epi64(self.x, $i as i32),
|
||||||
_mm_slli_epi64(self.x, 64 - $i as i32),
|
_mm_slli_epi64(self.x, 64 - $i as i32),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
impl<S4: Copy, NI: Copy> RotateEachWord32 for u64x2_sse2<YesS3, S4, NI> {
|
impl<S4: Copy, NI: Copy> RotateEachWord32 for u64x2_sse2<YesS3, S4, NI> {
|
||||||
|
@ -296,15 +286,15 @@ impl<S3: Copy, S4: Copy, NI: Copy> RotateEachWord64 for u64x2_sse2<S3, S4, NI> {
|
||||||
|
|
||||||
macro_rules! rotr_128 {
|
macro_rules! rotr_128 {
|
||||||
($name:ident, $i:expr) => {
|
($name:ident, $i:expr) => {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn $name(self) -> Self {
|
fn $name(self) -> Self {
|
||||||
Self::new(unsafe {
|
Self::new(unsafe {
|
||||||
_mm_or_si128(
|
_mm_or_si128(
|
||||||
_mm_srli_si128(self.x, $i as i32),
|
_mm_srli_si128(self.x, $i as i32),
|
||||||
_mm_slli_si128(self.x, 128 - $i as i32),
|
_mm_slli_si128(self.x, 128 - $i as i32),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
// TODO: completely unoptimized
|
// TODO: completely unoptimized
|
||||||
|
@ -411,7 +401,7 @@ impl<S3, S4, NI> MultiLane<[u128; 1]> for u128x1_sse2<S3, S4, NI> {
|
||||||
}
|
}
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn from_lanes(xs: [u128; 1]) -> Self {
|
fn from_lanes(xs: [u128; 1]) -> Self {
|
||||||
unimplemented!()
|
unimplemented!("{:?}", xs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -780,7 +770,7 @@ impl<S4, NI> BSwap for u128x1_sse2<YesS3, S4, NI> {
|
||||||
impl<S4, NI> BSwap for u128x1_sse2<NoS3, S4, NI> {
|
impl<S4, NI> BSwap for u128x1_sse2<NoS3, S4, NI> {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn bswap(self) -> Self {
|
fn bswap(self) -> Self {
|
||||||
Self::new(unsafe { unimplemented!() })
|
unimplemented!()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1078,6 +1068,7 @@ impl<W: PartialEq, G> PartialEq for x2<W, G> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
unsafe fn eq128_s4(x: __m128i, y: __m128i) -> bool {
|
unsafe fn eq128_s4(x: __m128i, y: __m128i) -> bool {
|
||||||
let q = _mm_shuffle_epi32(_mm_cmpeq_epi64(x, y), 0b1100_0110);
|
let q = _mm_shuffle_epi32(_mm_cmpeq_epi64(x, y), 0b1100_0110);
|
||||||
|
@ -1136,13 +1127,14 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::x86_64::{SSE2, SSE41, SSSE3};
|
use crate::x86_64::{SSE2, SSE41, SSSE3};
|
||||||
use crate::Machine;
|
use crate::Machine;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg_attr(not(target_feature = "ssse3"), ignore)]
|
||||||
fn test_bswap32_s2_vs_s3() {
|
fn test_bswap32_s2_vs_s3() {
|
||||||
let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
|
let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
|
||||||
let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
|
let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
|
||||||
|
@ -1165,7 +1157,7 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg_attr(not(target_feature = "ssse3"), ignore)]
|
||||||
fn test_bswap64_s2_vs_s3() {
|
fn test_bswap64_s2_vs_s3() {
|
||||||
let xs = [0x0f0e_0d0c_0b0a_0908, 0x0706_0504_0302_0100];
|
let xs = [0x0f0e_0d0c_0b0a_0908, 0x0706_0504_0302_0100];
|
||||||
let ys = [0x0809_0a0b_0c0d_0e0f, 0x0001_0203_0405_0607];
|
let ys = [0x0809_0a0b_0c0d_0e0f, 0x0001_0203_0405_0607];
|
||||||
|
@ -1188,7 +1180,7 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg_attr(not(target_feature = "ssse3"), ignore)]
|
||||||
fn test_shuffle32_s2_vs_s3() {
|
fn test_shuffle32_s2_vs_s3() {
|
||||||
let xs = [0x0, 0x1, 0x2, 0x3];
|
let xs = [0x0, 0x1, 0x2, 0x3];
|
||||||
let ys = [0x2, 0x3, 0x0, 0x1];
|
let ys = [0x2, 0x3, 0x0, 0x1];
|
||||||
|
@ -1226,7 +1218,7 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg_attr(not(target_feature = "ssse3"), ignore)]
|
||||||
fn test_shuffle64_s2_vs_s3() {
|
fn test_shuffle64_s2_vs_s3() {
|
||||||
let xs = [0x0, 0x1, 0x2, 0x3];
|
let xs = [0x0, 0x1, 0x2, 0x3];
|
||||||
let ys = [0x2, 0x3, 0x0, 0x1];
|
let ys = [0x2, 0x3, 0x0, 0x1];
|
||||||
|
@ -1263,8 +1255,8 @@ mod test {
|
||||||
assert_eq!(x_s3, unsafe { core::mem::transmute(x_s3) });
|
assert_eq!(x_s3, unsafe { core::mem::transmute(x_s3) });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
fn test_lanes_u32x4() {
|
fn test_lanes_u32x4() {
|
||||||
let xs = [0x1, 0x2, 0x3, 0x4];
|
let xs = [0x1, 0x2, 0x3, 0x4];
|
||||||
|
|
||||||
|
@ -1295,7 +1287,7 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
|
||||||
fn test_lanes_u64x2() {
|
fn test_lanes_u64x2() {
|
||||||
let xs = [0x1, 0x2];
|
let xs = [0x1, 0x2];
|
||||||
|
|
||||||
|
@ -1326,7 +1318,6 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
fn test_vec4_u32x4_s2() {
|
fn test_vec4_u32x4_s2() {
|
||||||
let xs = [1, 2, 3, 4];
|
let xs = [1, 2, 3, 4];
|
||||||
let s2 = unsafe { SSE2::instance() };
|
let s2 = unsafe { SSE2::instance() };
|
||||||
|
@ -1342,7 +1333,7 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
|
||||||
fn test_vec4_u32x4_s4() {
|
fn test_vec4_u32x4_s4() {
|
||||||
let xs = [1, 2, 3, 4];
|
let xs = [1, 2, 3, 4];
|
||||||
let s4 = unsafe { SSE41::instance() };
|
let s4 = unsafe { SSE41::instance() };
|
||||||
|
@ -1358,7 +1349,6 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
fn test_vec2_u64x2_s2() {
|
fn test_vec2_u64x2_s2() {
|
||||||
let xs = [0x1, 0x2];
|
let xs = [0x1, 0x2];
|
||||||
let s2 = unsafe { SSE2::instance() };
|
let s2 = unsafe { SSE2::instance() };
|
||||||
|
@ -1370,7 +1360,7 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
|
||||||
fn test_vec4_u64x2_s4() {
|
fn test_vec4_u64x2_s4() {
|
||||||
let xs = [0x1, 0x2];
|
let xs = [0x1, 0x2];
|
||||||
let s4 = unsafe { SSE41::instance() };
|
let s4 = unsafe { SSE41::instance() };
|
||||||
|
@ -1493,19 +1483,13 @@ pub mod avx2 {
|
||||||
impl<NI> ArithOps for u32x4x4_avx2<NI> where NI: Copy {}
|
impl<NI> ArithOps for u32x4x4_avx2<NI> where NI: Copy {}
|
||||||
macro_rules! shuf_lane_bytes {
|
macro_rules! shuf_lane_bytes {
|
||||||
($name:ident, $k0:expr, $k1:expr) => {
|
($name:ident, $k0:expr, $k1:expr) => {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn $name(self) -> Self {
|
fn $name(self) -> Self {
|
||||||
Self::new(unsafe {
|
Self::new(unsafe {
|
||||||
[
|
[
|
||||||
_mm256_shuffle_epi8(
|
_mm256_shuffle_epi8(self.x[0], _mm256_set_epi64x($k0, $k1, $k0, $k1)),
|
||||||
self.x[0],
|
_mm256_shuffle_epi8(self.x[1], _mm256_set_epi64x($k0, $k1, $k0, $k1)),
|
||||||
_mm256_set_epi64x($k0, $k1, $k0, $k1),
|
]
|
||||||
),
|
|
||||||
_mm256_shuffle_epi8(
|
|
||||||
self.x[1],
|
|
||||||
_mm256_set_epi64x($k0, $k1, $k0, $k1),
|
|
||||||
)
|
|
||||||
]
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -1523,7 +1507,7 @@ pub mod avx2 {
|
||||||
_mm256_or_si256(
|
_mm256_or_si256(
|
||||||
_mm256_srli_epi32(self.x[1], $i as i32),
|
_mm256_srli_epi32(self.x[1], $i as i32),
|
||||||
_mm256_slli_epi32(self.x[1], 32 - $i as i32),
|
_mm256_slli_epi32(self.x[1], 32 - $i as i32),
|
||||||
)
|
),
|
||||||
]
|
]
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче