Bug 1716518 - Upgrade ppv-lite86 to v0.2.10. r=emilio

Differential Revision: https://phabricator.services.mozilla.com/D117836
2021-06-15 22:17:27 +00:00 · 2021-06-15 22:17:27 +00:00 · 478846d726
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3902,9 +3902,9 @@ checksum = "b18befed8bc2b61abc79a457295e7e838417326da1586050b919414073977f19"
 [[package]]
 name = "ppv-lite86"
-version = "0.2.6"
+version = "0.2.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"
+checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
 [[package]]
 name = "precomputed-hash"
--- a/third_party/rust/ppv-lite86/.cargo-checksum.json
+++ b/third_party/rust/ppv-lite86/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"3cf6fa0e4089c12be1d19ac9082aabed68d3f193dbd5024379c22a8a4db15abe","LICENSE-APACHE":"0218327e7a480793ffdd4eb792379a9709e5c135c7ba267f709d6f6d4d70af0a","LICENSE-MIT":"4cada0bd02ea3692eee6f16400d86c6508bbd3bafb2b65fed0419f36d4f83e8f","src/generic.rs":"6c19b3062aec77a92130be1ce55c90aeca0811bcb19951c25a50c8fbe89a26d0","src/lib.rs":"75beb27d89dcc7541c8e81ad1f4bec81908d8d5fa0e3adec47cb1a1f008dfd32","src/soft.rs":"6fb8aa428183ec09d63d45761507d8da6dffc45990f2d1fcfd387c4c856599cc","src/types.rs":"4890069359ed53575a6b9a8168037ccdd4b029c8d61d540e9770fe3c90359345","src/x86_64/mod.rs":"e95910e8c9d23c212055598a437bfcdfaebf4f12e03a04e75f961bc3e9e257a1","src/x86_64/sse2.rs":"da72424e9e3fabd6236d4de80edb1448dc0bac02797df8e15298d412cdaef10c"},"package":"74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"}
+{"files":{"Cargo.toml":"5d9b7092f252e3a6f7f50f6aeb1b873803b322cf5edbf0ae07e0a27d57df3fbf","LICENSE-APACHE":"0218327e7a480793ffdd4eb792379a9709e5c135c7ba267f709d6f6d4d70af0a","LICENSE-MIT":"4cada0bd02ea3692eee6f16400d86c6508bbd3bafb2b65fed0419f36d4f83e8f","src/generic.rs":"6f38250421846499c816c222d0b48155bfab09a9921e6c400d7b75567ab98f14","src/lib.rs":"bcf308d7037e259d6640a785556fcdb86653cb4f72f64fbfeda9899857c14479","src/soft.rs":"5cdee0e46c99a9d5078c0b3a733fe6fd1430ed0a888ef747bc2a1271265a1140","src/types.rs":"a354d2e3267c7c451a1420903314a358328346772ca964fa6c1ef7b96c983930","src/x86_64/mod.rs":"4d5a1da816f8e59bb385464f005075de889d1060e24dcee6709b321a3d6c92f7","src/x86_64/sse2.rs":"a9df3e7b3b8ffcd249a2cbed0e538042f7747dfa6ae7af0c9af364dc5a12d409"},"package":"ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"}
--- a/third_party/rust/ppv-lite86/Cargo.toml
+++ b/third_party/rust/ppv-lite86/Cargo.toml
@ -13,7 +13,7 @@
 [package]
 edition = "2018"
 name = "ppv-lite86"
-version = "0.2.6"
+version = "0.2.10"
 authors = ["The CryptoCorrosion Contributors"]
 description = "Implementation of the crypto-simd API for x86"
 keywords = ["crypto", "simd", "x86"]
@ -24,7 +24,8 @@ repository = "https://github.com/cryptocorrosion/cryptocorrosion"
 [dependencies]
 [features]
-default = ["std", "simd"]
+default = ["std"]
 no_simd = []
 simd = []
 std = []
 [badges.travis-ci]
--- a/third_party/rust/ppv-lite86/src/generic.rs
+++ b/third_party/rust/ppv-lite86/src/generic.rs
@ -1,14 +1,14 @@
 #![allow(non_camel_case_types)]
 use core::ops::*;
 use crate::soft::{x2, x4};
 use crate::types::*;
 use core::ops::*;
 #[repr(C)]
 #[derive(Clone, Copy)]
 pub union vec128_storage {
    d: [u32; 4],
    q: [u64; 2],
    o: [u128; 1],
 }
 impl From<[u32; 4]> for vec128_storage {
    #[inline]
@ -16,7 +16,38 @@ impl From<[u32; 4]> for vec128_storage {
        Self { d }
    }
 }
-#[derive(Clone, Copy)]
+impl From<vec128_storage> for [u32; 4] {
    #[inline]
    fn from(d: vec128_storage) -> Self {
        unsafe { d.d }
    }
 }
 impl From<[u64; 2]> for vec128_storage {
    #[inline]
    fn from(q: [u64; 2]) -> Self {
        Self { q }
    }
 }
 impl From<vec128_storage> for [u64; 2] {
    #[inline]
    fn from(q: vec128_storage) -> Self {
        unsafe { q.q }
    }
 }
 impl Default for vec128_storage {
    #[inline]
    fn default() -> Self {
        Self { q: [0, 0] }
    }
 }
 impl Eq for vec128_storage {}
 impl PartialEq<vec128_storage> for vec128_storage {
    #[inline]
    fn eq(&self, rhs: &Self) -> bool {
        unsafe { self.q == rhs.q }
    }
 }
 #[derive(Clone, Copy, PartialEq, Eq, Default)]
 pub struct vec256_storage {
    v128: [vec128_storage; 2],
 }
@ -30,7 +61,15 @@ impl vec256_storage {
        self.v128
    }
 }
-#[derive(Clone, Copy)]
+impl From<vec256_storage> for [u64; 4] {
    #[inline]
    fn from(q: vec256_storage) -> Self {
        let [a, b]: [u64; 2] = q.v128[0].into();
        let [c, d]: [u64; 2] = q.v128[1].into();
        [a, b, c, d]
    }
 }
 #[derive(Clone, Copy, PartialEq, Eq, Default)]
 pub struct vec512_storage {
    v128: [vec128_storage; 4],
 }
@ -106,14 +145,22 @@ where
    unsafe { T::unpack(q) }
 }
 fn o_of_q(q: [u64; 2]) -> u128 {
    u128::from(q[0]) | (u128::from(q[1]) << 64)
 }
 fn q_of_o(o: u128) -> [u64; 2] {
    [o as u64, (o >> 64) as u64]
 }
 fn omap<T, F>(a: T, f: F) -> T
 where
    T: Store<vec128_storage> + Into<vec128_storage>,
    F: Fn(u128) -> u128,
 {
    let a: vec128_storage = a.into();
-    let ao = unsafe { a.o };
+    let ao = o_of_q(unsafe { a.q });
-    let o = vec128_storage { o: [f(ao[0])] };
+    let o = vec128_storage { q: q_of_o(f(ao)) };
    unsafe { T::unpack(o) }
 }
@ -124,10 +171,10 @@ where
 {
    let a: vec128_storage = a.into();
    let b: vec128_storage = b.into();
-    let ao = unsafe { a.o };
+    let ao = o_of_q(unsafe { a.q });
-    let bo = unsafe { b.o };
+    let bo = o_of_q(unsafe { b.q });
    let o = vec128_storage {
-        o: [f(ao[0], bo[0])],
+        q: q_of_o(f(ao, bo)),
    };
    unsafe { T::unpack(o) }
 }
@ -411,7 +458,7 @@ impl From<u64x2_generic> for vec128_storage {
 impl From<u128x1_generic> for vec128_storage {
    #[inline(always)]
    fn from(o: u128x1_generic) -> Self {
-        Self { o: o.0 }
+        Self { q: q_of_o(o.0[0]) }
    }
 }
@ -430,7 +477,7 @@ impl Store<vec128_storage> for u64x2_generic {
 impl Store<vec128_storage> for u128x1_generic {
    #[inline(always)]
    unsafe fn unpack(s: vec128_storage) -> Self {
-        Self(s.o)
+        Self([o_of_q(s.q); 1])
    }
 }
--- a/third_party/rust/ppv-lite86/src/lib.rs
+++ b/third_party/rust/ppv-lite86/src/lib.rs
@ -9,14 +9,14 @@ mod soft;
 mod types;
 pub use self::types::*;
-#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))]
+#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))]
 pub mod x86_64;
-#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))]
+#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))]
 use self::x86_64 as arch;
-#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))]
+#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))]
 pub mod generic;
-#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))]
+#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))]
 use self::generic as arch;
 pub use self::arch::{vec128_storage, vec256_storage, vec512_storage};
--- a/third_party/rust/ppv-lite86/src/soft.rs
+++ b/third_party/rust/ppv-lite86/src/soft.rs
@ -1,9 +1,9 @@
 //! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
 use core::marker::PhantomData;
 use core::ops::*;
 use crate::types::*;
 use crate::{vec128_storage, vec256_storage, vec512_storage};
 use core::marker::PhantomData;
 use core::ops::*;
 #[derive(Copy, Clone, Default)]
 #[allow(non_camel_case_types)]
@ -238,7 +238,12 @@ macro_rules! fwd_unop_x4 {
    ($fn:ident) => {
        #[inline(always)]
        fn $fn(self) -> Self {
-            x4([self.0[0].$fn(), self.0[1].$fn(), self.0[2].$fn(), self.0[3].$fn()])
+            x4([
                self.0[0].$fn(),
                self.0[1].$fn(),
                self.0[2].$fn(),
                self.0[3].$fn(),
            ])
        }
    };
 }
--- a/third_party/rust/ppv-lite86/src/types.rs
+++ b/third_party/rust/ppv-lite86/src/types.rs
@ -1,3 +1,4 @@
 #![allow(non_camel_case_types)]
 use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};
 pub trait AndNot {
@ -44,182 +45,178 @@ pub trait RotateEachWord64 {
 pub trait RotateEachWord128 {}
-#[allow(non_camel_case_types)]
+// Vector type naming scheme:
-mod types {
+// uN[xP]xL
-    //! Vector type naming scheme:
+// Unsigned; N-bit words * P bits per lane * L lanes
-    //! uN[xP]xL
+//
-    //! Unsigned; N-bit words * P bits per lane * L lanes
+// A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
-    //!
+// wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
-    //! A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
+// slow inter-lane operations.
    //! wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
    //! slow inter-lane operations.
-    use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
+use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
    use crate::{ArithOps, BitOps128, BitOps32, BitOps64, Machine, Store, StoreBytes};
-    pub trait UnsafeFrom<T> {
+#[allow(clippy::missing_safety_doc)]
-        unsafe fn unsafe_from(t: T) -> Self;
+pub trait UnsafeFrom<T> {
-    }
+    unsafe fn unsafe_from(t: T) -> Self;
    /// A vector composed of two elements, which may be words or themselves vectors.
    pub trait Vec2<W> {
        fn extract(self, i: u32) -> W;
        fn insert(self, w: W, i: u32) -> Self;
    }
    /// A vector composed of four elements, which may be words or themselves vectors.
    pub trait Vec4<W> {
        fn extract(self, i: u32) -> W;
        fn insert(self, w: W, i: u32) -> Self;
    }
    // TODO: multiples of 4 should inherit this
    /// A vector composed of four words; depending on their size, operations may cross lanes.
    pub trait Words4 {
        fn shuffle1230(self) -> Self;
        fn shuffle2301(self) -> Self;
        fn shuffle3012(self) -> Self;
    }
    /// A vector composed one or more lanes each composed of four words.
    pub trait LaneWords4 {
        fn shuffle_lane_words1230(self) -> Self;
        fn shuffle_lane_words2301(self) -> Self;
        fn shuffle_lane_words3012(self) -> Self;
    }
    // TODO: make this a part of BitOps
    /// Exchange neigboring ranges of bits of the specified size
    pub trait Swap64 {
        fn swap1(self) -> Self;
        fn swap2(self) -> Self;
        fn swap4(self) -> Self;
        fn swap8(self) -> Self;
        fn swap16(self) -> Self;
        fn swap32(self) -> Self;
        fn swap64(self) -> Self;
    }
    pub trait u32x4<M: Machine>:
        BitOps32
        + Store<vec128_storage>
        + ArithOps
        + Vec4<u32>
        + Words4
        + LaneWords4
        + StoreBytes
        + MultiLane<[u32; 4]>
        + Into<vec128_storage>
    {
 }
    pub trait u64x2<M: Machine>:
        BitOps64
        + Store<vec128_storage>
        + ArithOps
        + Vec2<u64>
        + MultiLane<[u64; 2]>
        + Into<vec128_storage>
    {
 }
    pub trait u128x1<M: Machine>:
        BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
    {
 }
-    pub trait u32x4x2<M: Machine>:
+/// A vector composed of two elements, which may be words or themselves vectors.
-        BitOps32
+pub trait Vec2<W> {
-        + Store<vec256_storage>
+    fn extract(self, i: u32) -> W;
-        + Vec2<M::u32x4>
+    fn insert(self, w: W, i: u32) -> Self;
        + MultiLane<[M::u32x4; 2]>
        + ArithOps
        + Into<vec256_storage>
    {
 }
    pub trait u64x2x2<M: Machine>:
        BitOps64
        + Store<vec256_storage>
        + Vec2<M::u64x2>
        + MultiLane<[M::u64x2; 2]>
        + ArithOps
        + StoreBytes
        + Into<vec256_storage>
    {
 }
    pub trait u64x4<M: Machine>:
        BitOps64
        + Store<vec256_storage>
        + Vec4<u64>
        + MultiLane<[u64; 4]>
        + ArithOps
        + Words4
        + StoreBytes
        + Into<vec256_storage>
    {
 }
    pub trait u128x2<M: Machine>:
        BitOps128
        + Store<vec256_storage>
        + Vec2<M::u128x1>
        + MultiLane<[M::u128x1; 2]>
        + Swap64
        + Into<vec256_storage>
    {
 }
-    pub trait u32x4x4<M: Machine>:
+/// A vector composed of four elements, which may be words or themselves vectors.
-        BitOps32
+pub trait Vec4<W> {
-        + Store<vec512_storage>
+    fn extract(self, i: u32) -> W;
-        + Vec4<M::u32x4>
+    fn insert(self, w: W, i: u32) -> Self;
        + MultiLane<[M::u32x4; 4]>
        + ArithOps
        + LaneWords4
        + Into<vec512_storage>
    {
 }
    pub trait u64x2x4<M: Machine>:
        BitOps64
        + Store<vec512_storage>
        + Vec4<M::u64x2>
        + MultiLane<[M::u64x2; 4]>
        + ArithOps
        + Into<vec512_storage>
    {
 }
    // TODO: Words4
    pub trait u128x4<M: Machine>:
        BitOps128
        + Store<vec512_storage>
        + Vec4<M::u128x1>
        + MultiLane<[M::u128x1; 4]>
        + Swap64
        + Into<vec512_storage>
    {
 }
-    /// A vector composed of multiple 128-bit lanes.
+// TODO: multiples of 4 should inherit this
-    pub trait MultiLane<Lanes> {
+/// A vector composed of four words; depending on their size, operations may cross lanes.
-        /// Split a multi-lane vector into single-lane vectors.
+pub trait Words4 {
-        fn to_lanes(self) -> Lanes;
+    fn shuffle1230(self) -> Self;
-        /// Build a multi-lane vector from individual lanes.
+    fn shuffle2301(self) -> Self;
-        fn from_lanes(lanes: Lanes) -> Self;
+    fn shuffle3012(self) -> Self;
-    }
+}
-    /// Combine single vectors into a multi-lane vector.
+/// A vector composed one or more lanes each composed of four words.
-    pub trait VZip<V> {
+pub trait LaneWords4 {
-        fn vzip(self) -> V;
+    fn shuffle_lane_words1230(self) -> Self;
-    }
+    fn shuffle_lane_words2301(self) -> Self;
    fn shuffle_lane_words3012(self) -> Self;
 }
-    impl<V, T> VZip<V> for T
+// TODO: make this a part of BitOps
-    where
+/// Exchange neigboring ranges of bits of the specified size
-        V: MultiLane<T>,
+pub trait Swap64 {
-    {
+    fn swap1(self) -> Self;
-        #[inline(always)]
+    fn swap2(self) -> Self;
-        fn vzip(self) -> V {
+    fn swap4(self) -> Self;
-            V::from_lanes(self)
+    fn swap8(self) -> Self;
-        }
+    fn swap16(self) -> Self;
    fn swap32(self) -> Self;
    fn swap64(self) -> Self;
 }
 pub trait u32x4<M: Machine>:
    BitOps32
    + Store<vec128_storage>
    + ArithOps
    + Vec4<u32>
    + Words4
    + LaneWords4
    + StoreBytes
    + MultiLane<[u32; 4]>
    + Into<vec128_storage>
 {
 }
 pub trait u64x2<M: Machine>:
    BitOps64
    + Store<vec128_storage>
    + ArithOps
    + Vec2<u64>
    + MultiLane<[u64; 2]>
    + Into<vec128_storage>
 {
 }
 pub trait u128x1<M: Machine>:
    BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
 {
 }
 pub trait u32x4x2<M: Machine>:
    BitOps32
    + Store<vec256_storage>
    + Vec2<M::u32x4>
    + MultiLane<[M::u32x4; 2]>
    + ArithOps
    + Into<vec256_storage>
 {
 }
 pub trait u64x2x2<M: Machine>:
    BitOps64
    + Store<vec256_storage>
    + Vec2<M::u64x2>
    + MultiLane<[M::u64x2; 2]>
    + ArithOps
    + StoreBytes
    + Into<vec256_storage>
 {
 }
 pub trait u64x4<M: Machine>:
    BitOps64
    + Store<vec256_storage>
    + Vec4<u64>
    + MultiLane<[u64; 4]>
    + ArithOps
    + Words4
    + StoreBytes
    + Into<vec256_storage>
 {
 }
 pub trait u128x2<M: Machine>:
    BitOps128
    + Store<vec256_storage>
    + Vec2<M::u128x1>
    + MultiLane<[M::u128x1; 2]>
    + Swap64
    + Into<vec256_storage>
 {
 }
 pub trait u32x4x4<M: Machine>:
    BitOps32
    + Store<vec512_storage>
    + Vec4<M::u32x4>
    + MultiLane<[M::u32x4; 4]>
    + ArithOps
    + LaneWords4
    + Into<vec512_storage>
 {
 }
 pub trait u64x2x4<M: Machine>:
    BitOps64
    + Store<vec512_storage>
    + Vec4<M::u64x2>
    + MultiLane<[M::u64x2; 4]>
    + ArithOps
    + Into<vec512_storage>
 {
 }
 // TODO: Words4
 pub trait u128x4<M: Machine>:
    BitOps128
    + Store<vec512_storage>
    + Vec4<M::u128x1>
    + MultiLane<[M::u128x1; 4]>
    + Swap64
    + Into<vec512_storage>
 {
 }
 /// A vector composed of multiple 128-bit lanes.
 pub trait MultiLane<Lanes> {
    /// Split a multi-lane vector into single-lane vectors.
    fn to_lanes(self) -> Lanes;
    /// Build a multi-lane vector from individual lanes.
    fn from_lanes(lanes: Lanes) -> Self;
 }
 /// Combine single vectors into a multi-lane vector.
 pub trait VZip<V> {
    fn vzip(self) -> V;
 }
 impl<V, T> VZip<V> for T
 where
    V: MultiLane<T>,
 {
    #[inline(always)]
    fn vzip(self) -> V {
        V::from_lanes(self)
    }
 }
 pub use self::types::*;
 pub trait Machine: Sized + Copy {
    type u32x4: u32x4<Self>;
@ -264,15 +261,27 @@ pub trait Machine: Sized + Copy {
        unsafe { V::unsafe_read_be(input) }
    }
    /// # Safety
    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
    /// environment.
    unsafe fn instance() -> Self;
 }
 pub trait Store<S> {
    /// # Safety
    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
    /// environment.
    unsafe fn unpack(p: S) -> Self;
 }
 pub trait StoreBytes {
    /// # Safety
    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
    /// environment.
    unsafe fn unsafe_read_le(input: &[u8]) -> Self;
    /// # Safety
    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
    /// environment.
    unsafe fn unsafe_read_be(input: &[u8]) -> Self;
    fn write_le(self, out: &mut [u8]);
    fn write_be(self, out: &mut [u8]);
--- a/third_party/rust/ppv-lite86/src/x86_64/mod.rs
+++ b/third_party/rust/ppv-lite86/src/x86_64/mod.rs
@ -1,7 +1,7 @@
 // crate minimums: sse2, x86_64
 use core::arch::x86_64::{__m128i, __m256i};
 use crate::types::*;
 use core::arch::x86_64::{__m128i, __m256i};
 mod sse2;
@ -137,6 +137,13 @@ impl Default for vec128_storage {
        vec128_storage { u128x1: [0] }
    }
 }
 impl Eq for vec128_storage {}
 impl PartialEq for vec128_storage {
    #[inline(always)]
    fn eq(&self, rhs: &Self) -> bool {
        unsafe { self.u128x1 == rhs.u128x1 }
    }
 }
 #[allow(non_camel_case_types)]
 #[derive(Copy, Clone)]
@ -167,6 +174,13 @@ impl vec256_storage {
        unsafe { self.sse2 }
    }
 }
 impl Eq for vec256_storage {}
 impl PartialEq for vec256_storage {
    #[inline(always)]
    fn eq(&self, rhs: &Self) -> bool {
        unsafe { self.sse2 == rhs.sse2 }
    }
 }
 #[allow(non_camel_case_types)]
 #[derive(Copy, Clone)]
@ -193,6 +207,13 @@ impl vec512_storage {
        unsafe { self.sse2 }
    }
 }
 impl Eq for vec512_storage {}
 impl PartialEq for vec512_storage {
    #[inline(always)]
    fn eq(&self, rhs: &Self) -> bool {
        unsafe { self.avx == rhs.avx }
    }
 }
 macro_rules! impl_into {
    ($storage:ident, $array:ty, $name:ident) => {
--- a/third_party/rust/ppv-lite86/src/x86_64/sse2.rs
+++ b/third_party/rust/ppv-lite86/src/x86_64/sse2.rs
@ -166,28 +166,23 @@ macro_rules! impl_bitops128 {
 macro_rules! rotr_32_s3 {
    ($name:ident, $k0:expr, $k1:expr) => {
-    #[inline(always)]
+        #[inline(always)]
-    fn $name(self) -> Self {
+        fn $name(self) -> Self {
-        Self::new(unsafe {
+            Self::new(unsafe { _mm_shuffle_epi8(self.x, _mm_set_epi64x($k0, $k1)) })
                _mm_shuffle_epi8(
                    self.x,
                    _mm_set_epi64x($k0, $k1),
                )
            })
        }
    };
 }
 macro_rules! rotr_32 {
    ($name:ident, $i:expr) => {
-    #[inline(always)]
+        #[inline(always)]
-    fn $name(self) -> Self {
+        fn $name(self) -> Self {
-        Self::new(unsafe {
+            Self::new(unsafe {
-            _mm_or_si128(
+                _mm_or_si128(
-                _mm_srli_epi32(self.x, $i as i32),
+                    _mm_srli_epi32(self.x, $i as i32),
-                _mm_slli_epi32(self.x, 32 - $i as i32),
+                    _mm_slli_epi32(self.x, 32 - $i as i32),
-            )
+                )
-        })
+            })
-    }
+        }
    };
 }
 impl<S4: Copy, NI: Copy> RotateEachWord32 for u32x4_sse2<YesS3, S4, NI> {
@ -228,28 +223,23 @@ impl<S4: Copy, NI: Copy> RotateEachWord32 for u32x4_sse2<NoS3, S4, NI> {
 macro_rules! rotr_64_s3 {
    ($name:ident, $k0:expr, $k1:expr) => {
-    #[inline(always)]
+        #[inline(always)]
-    fn $name(self) -> Self {
+        fn $name(self) -> Self {
-        Self::new(unsafe {
+            Self::new(unsafe { _mm_shuffle_epi8(self.x, _mm_set_epi64x($k0, $k1)) })
                _mm_shuffle_epi8(
                    self.x,
                    _mm_set_epi64x($k0, $k1),
                )
            })
        }
    };
 }
 macro_rules! rotr_64 {
    ($name:ident, $i:expr) => {
-    #[inline(always)]
+        #[inline(always)]
-    fn $name(self) -> Self {
+        fn $name(self) -> Self {
-        Self::new(unsafe {
+            Self::new(unsafe {
-            _mm_or_si128(
+                _mm_or_si128(
-                _mm_srli_epi64(self.x, $i as i32),
+                    _mm_srli_epi64(self.x, $i as i32),
-                _mm_slli_epi64(self.x, 64 - $i as i32),
+                    _mm_slli_epi64(self.x, 64 - $i as i32),
-            )
+                )
-        })
+            })
-    }
+        }
    };
 }
 impl<S4: Copy, NI: Copy> RotateEachWord32 for u64x2_sse2<YesS3, S4, NI> {
@ -296,15 +286,15 @@ impl<S3: Copy, S4: Copy, NI: Copy> RotateEachWord64 for u64x2_sse2<S3, S4, NI> {
 macro_rules! rotr_128 {
    ($name:ident, $i:expr) => {
-    #[inline(always)]
+        #[inline(always)]
-    fn $name(self) -> Self {
+        fn $name(self) -> Self {
-        Self::new(unsafe {
+            Self::new(unsafe {
-            _mm_or_si128(
+                _mm_or_si128(
-                _mm_srli_si128(self.x, $i as i32),
+                    _mm_srli_si128(self.x, $i as i32),
-                _mm_slli_si128(self.x, 128 - $i as i32),
+                    _mm_slli_si128(self.x, 128 - $i as i32),
-            )
+                )
-        })
+            })
-    }
+        }
    };
 }
 // TODO: completely unoptimized
@ -411,7 +401,7 @@ impl<S3, S4, NI> MultiLane<[u128; 1]> for u128x1_sse2<S3, S4, NI> {
    }
    #[inline(always)]
    fn from_lanes(xs: [u128; 1]) -> Self {
-        unimplemented!()
+        unimplemented!("{:?}", xs)
    }
 }
@ -780,7 +770,7 @@ impl<S4, NI> BSwap for u128x1_sse2<YesS3, S4, NI> {
 impl<S4, NI> BSwap for u128x1_sse2<NoS3, S4, NI> {
    #[inline(always)]
    fn bswap(self) -> Self {
-        Self::new(unsafe { unimplemented!() })
+        unimplemented!()
    }
 }
@ -1078,6 +1068,7 @@ impl<W: PartialEq, G> PartialEq for x2<W, G> {
    }
 }
 #[allow(unused)]
 #[inline(always)]
 unsafe fn eq128_s4(x: __m128i, y: __m128i) -> bool {
    let q = _mm_shuffle_epi32(_mm_cmpeq_epi64(x, y), 0b1100_0110);
@ -1136,13 +1127,14 @@ where
 }
 #[cfg(test)]
 #[cfg(target_arch = "x86_64")]
 mod test {
    use super::*;
    use crate::x86_64::{SSE2, SSE41, SSSE3};
    use crate::Machine;
    #[test]
-    #[cfg(target_arch = "x86_64")]
+    #[cfg_attr(not(target_feature = "ssse3"), ignore)]
    fn test_bswap32_s2_vs_s3() {
        let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
        let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
@ -1165,7 +1157,7 @@ mod test {
    }
    #[test]
-    #[cfg(target_arch = "x86_64")]
+    #[cfg_attr(not(target_feature = "ssse3"), ignore)]
    fn test_bswap64_s2_vs_s3() {
        let xs = [0x0f0e_0d0c_0b0a_0908, 0x0706_0504_0302_0100];
        let ys = [0x0809_0a0b_0c0d_0e0f, 0x0001_0203_0405_0607];
@ -1188,7 +1180,7 @@ mod test {
    }
    #[test]
-    #[cfg(target_arch = "x86_64")]
+    #[cfg_attr(not(target_feature = "ssse3"), ignore)]
    fn test_shuffle32_s2_vs_s3() {
        let xs = [0x0, 0x1, 0x2, 0x3];
        let ys = [0x2, 0x3, 0x0, 0x1];
@ -1226,7 +1218,7 @@ mod test {
    }
    #[test]
-    #[cfg(target_arch = "x86_64")]
+    #[cfg_attr(not(target_feature = "ssse3"), ignore)]
    fn test_shuffle64_s2_vs_s3() {
        let xs = [0x0, 0x1, 0x2, 0x3];
        let ys = [0x2, 0x3, 0x0, 0x1];
@ -1263,8 +1255,8 @@ mod test {
        assert_eq!(x_s3, unsafe { core::mem::transmute(x_s3) });
    }
    #[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
    #[test]
    #[cfg(target_arch = "x86_64")]
    fn test_lanes_u32x4() {
        let xs = [0x1, 0x2, 0x3, 0x4];
@ -1295,7 +1287,7 @@ mod test {
    }
    #[test]
-    #[cfg(target_arch = "x86_64")]
+    #[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
    fn test_lanes_u64x2() {
        let xs = [0x1, 0x2];
@ -1326,7 +1318,6 @@ mod test {
    }
    #[test]
    #[cfg(target_arch = "x86_64")]
    fn test_vec4_u32x4_s2() {
        let xs = [1, 2, 3, 4];
        let s2 = unsafe { SSE2::instance() };
@ -1342,7 +1333,7 @@ mod test {
    }
    #[test]
-    #[cfg(target_arch = "x86_64")]
+    #[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
    fn test_vec4_u32x4_s4() {
        let xs = [1, 2, 3, 4];
        let s4 = unsafe { SSE41::instance() };
@ -1358,7 +1349,6 @@ mod test {
    }
    #[test]
    #[cfg(target_arch = "x86_64")]
    fn test_vec2_u64x2_s2() {
        let xs = [0x1, 0x2];
        let s2 = unsafe { SSE2::instance() };
@ -1370,7 +1360,7 @@ mod test {
    }
    #[test]
-    #[cfg(target_arch = "x86_64")]
+    #[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
    fn test_vec4_u64x2_s4() {
        let xs = [0x1, 0x2];
        let s4 = unsafe { SSE41::instance() };
@ -1493,19 +1483,13 @@ pub mod avx2 {
    impl<NI> ArithOps for u32x4x4_avx2<NI> where NI: Copy {}
    macro_rules! shuf_lane_bytes {
        ($name:ident, $k0:expr, $k1:expr) => {
-        #[inline(always)]
+            #[inline(always)]
-        fn $name(self) -> Self {
+            fn $name(self) -> Self {
-            Self::new(unsafe {
+                Self::new(unsafe {
-                [
+                    [
-                    _mm256_shuffle_epi8(
+                        _mm256_shuffle_epi8(self.x[0], _mm256_set_epi64x($k0, $k1, $k0, $k1)),
-                        self.x[0],
+                        _mm256_shuffle_epi8(self.x[1], _mm256_set_epi64x($k0, $k1, $k0, $k1)),
-                        _mm256_set_epi64x($k0, $k1, $k0, $k1),
+                    ]
                    ),
                    _mm256_shuffle_epi8(
                        self.x[1],
                        _mm256_set_epi64x($k0, $k1, $k0, $k1),
                    )
                ]
                })
            }
        };
@ -1523,7 +1507,7 @@ pub mod avx2 {
                        _mm256_or_si256(
                            _mm256_srli_epi32(self.x[1], $i as i32),
                            _mm256_slli_epi32(self.x[1], 32 - $i as i32),
-                        )
+                        ),
                    ]
                })
            }
		`@ -1 +1 @@`
			{"files":{"Cargo.toml":"3cf6fa0e4089c12be1d19ac9082aabed68d3f193dbd5024379c22a8a4db15abe","LICENSE-APACHE":"0218327e7a480793ffdd4eb792379a9709e5c135c7ba267f709d6f6d4d70af0a","LICENSE-MIT":"4cada0bd02ea3692eee6f16400d86c6508bbd3bafb2b65fed0419f36d4f83e8f","src/generic.rs":"6c19b3062aec77a92130be1ce55c90aeca0811bcb19951c25a50c8fbe89a26d0","src/lib.rs":"75beb27d89dcc7541c8e81ad1f4bec81908d8d5fa0e3adec47cb1a1f008dfd32","src/soft.rs":"6fb8aa428183ec09d63d45761507d8da6dffc45990f2d1fcfd387c4c856599cc","src/types.rs":"4890069359ed53575a6b9a8168037ccdd4b029c8d61d540e9770fe3c90359345","src/x86_64/mod.rs":"e95910e8c9d23c212055598a437bfcdfaebf4f12e03a04e75f961bc3e9e257a1","src/x86_64/sse2.rs":"da72424e9e3fabd6236d4de80edb1448dc0bac02797df8e15298d412cdaef10c"},"package":"74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"}				{"files":{"Cargo.toml":"5d9b7092f252e3a6f7f50f6aeb1b873803b322cf5edbf0ae07e0a27d57df3fbf","LICENSE-APACHE":"0218327e7a480793ffdd4eb792379a9709e5c135c7ba267f709d6f6d4d70af0a","LICENSE-MIT":"4cada0bd02ea3692eee6f16400d86c6508bbd3bafb2b65fed0419f36d4f83e8f","src/generic.rs":"6f38250421846499c816c222d0b48155bfab09a9921e6c400d7b75567ab98f14","src/lib.rs":"bcf308d7037e259d6640a785556fcdb86653cb4f72f64fbfeda9899857c14479","src/soft.rs":"5cdee0e46c99a9d5078c0b3a733fe6fd1430ed0a888ef747bc2a1271265a1140","src/types.rs":"a354d2e3267c7c451a1420903314a358328346772ca964fa6c1ef7b96c983930","src/x86_64/mod.rs":"4d5a1da816f8e59bb385464f005075de889d1060e24dcee6709b321a3d6c92f7","src/x86_64/sse2.rs":"a9df3e7b3b8ffcd249a2cbed0e538042f7747dfa6ae7af0c9af364dc5a12d409"},"package":"ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"}