Bug 1882209 - Update encoding_rs to 0.8.34 to deal with rustc changes. r=glandium,supply-chain-reviewers

Differential Revision: https://phabricator.services.mozilla.com/D207167
This commit is contained in:
Henri Sivonen 2024-04-19 04:21:21 +00:00
Родитель 34dd45ff4c
Коммит 5b199dcf62
230 изменённых файлов: 776 добавлений и 18449 удалений

25
Cargo.lock сгенерированный
Просмотреть файл

@ -91,6 +91,15 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b84bf0a05bbb2a83e5eb6fa36bb6e87baa08193c35ff52bbf6b38d8af2890e46"
[[package]]
name = "any_all_workaround"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88fea40735f2cc320a5133ce772d39c571bd6c9b0d4c1a326926eecdd5af2e86"
dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "anyhow"
version = "1.0.69"
@ -1574,12 +1583,12 @@ dependencies = [
[[package]]
name = "encoding_rs"
version = "0.8.33"
version = "0.8.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1"
checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59"
dependencies = [
"any_all_workaround",
"cfg-if 1.0.0",
"packed_simd",
]
[[package]]
@ -4320,16 +4329,6 @@ dependencies = [
"oxilangtag",
]
[[package]]
name = "packed_simd"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f9f08af0c877571712e2e3e686ad79efad9657dbf0f7c3c8ba943ff6c38932d"
dependencies = [
"cfg-if 1.0.0",
"num-traits",
]
[[package]]
name = "parking_lot"
version = "0.12.1"

Просмотреть файл

@ -265,7 +265,7 @@ endif
ifndef RUSTC_BOOTSTRAP
RUSTC_BOOTSTRAP := mozglue_static,qcms
ifdef MOZ_RUST_SIMD
RUSTC_BOOTSTRAP := $(RUSTC_BOOTSTRAP),encoding_rs,packed_simd
RUSTC_BOOTSTRAP := $(RUSTC_BOOTSTRAP),encoding_rs,any_all_workaround
endif
export RUSTC_BOOTSTRAP
endif

Просмотреть файл

@ -588,6 +588,12 @@ who = "Mike Hommey <mh+mozilla@glandium.org>"
criteria = "safe-to-deploy"
delta = "0.1.4 -> 0.1.5"
[[audits.any_all_workaround]]
who = "Henri Sivonen <hsivonen@hsivonen.fi>"
criteria = "safe-to-deploy"
version = "0.1.0"
notes = "The little code that is in this crate I reviewed and modified from packed_simd (which has previously been vendored in full instead of just this small part)."
[[audits.anyhow]]
who = "Mike Hommey <mh+mozilla@glandium.org>"
criteria = "safe-to-deploy"

Просмотреть файл

@ -190,6 +190,13 @@ user-id = 4484
user-login = "hsivonen"
user-name = "Henri Sivonen"
[[publisher.encoding_rs]]
version = "0.8.34"
when = "2024-04-10"
user-id = 4484
user-login = "hsivonen"
user-name = "Henri Sivonen"
[[publisher.errno]]
version = "0.3.8"
when = "2023-11-28"

1
third_party/rust/any_all_workaround/.cargo-checksum.json поставляемый Normal file
Просмотреть файл

@ -0,0 +1 @@
{"files":{"Cargo.toml":"c38be4bc8ef1c4df398b3eae589681d1bbb54a2577c71d592e12db0af757c472","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"abebbd2620f915c70a873dd8221d99eadd8d017b7b194c22f3e0051f1fde193f","src/lib.rs":"e8a36b888f0f20accd4e7bfb2db9196e42b4be2d1014cb675981543d1372c610"},"package":"88fea40735f2cc320a5133ce772d39c571bd6c9b0d4c1a326926eecdd5af2e86"}

25
third_party/rust/any_all_workaround/Cargo.toml поставляемый Normal file
Просмотреть файл

@ -0,0 +1,25 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2021"
name = "any_all_workaround"
version = "0.1.0"
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
description = "Workaround for bad LLVM codegen for boolean reductions on 32-bit ARM"
homepage = "https://docs.rs/any_all_workaround/"
documentation = "https://docs.rs/any_all_workaround/"
readme = "README.md"
license = "MIT OR Apache-2.0"
repository = "https://github.com/hsivonen/any_all_workaround"
[dependencies.cfg-if]
version = "1.0"

Просмотреть файл

Просмотреть файл

13
third_party/rust/any_all_workaround/README.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1,13 @@
# any_all_workaround
This is a workaround for bad codegen ([Rust bug](https://github.com/rust-lang/portable-simd/issues/146), [LLVM bug](https://github.com/llvm/llvm-project/issues/50466)) for the `any()` and `all()` reductions for NEON-backed SIMD vectors on 32-bit ARM. On other platforms these delegate to `any()` and `all()` in `core::simd`.
The plan is to abandon this crate once the LLVM bug is fixed or `core::simd` works around the LLVM bug.
The code is forked from the [`packed_simd` crate](https://raw.githubusercontent.com/hsivonen/packed_simd/d938e39bee9bc5c222f5f2f2a0df9e53b5ce36ae/src/codegen/reductions/mask/arm.rs).
This crate requires Nightly Rust as it depends on the `portable_simd` feature.
# License
`MIT OR Apache-2.0`, since that's how `packed_simd` is licensed.

99
third_party/rust/any_all_workaround/src/lib.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,99 @@
// This code began as a fork of
// https://raw.githubusercontent.com/rust-lang/packed_simd/d938e39bee9bc5c222f5f2f2a0df9e53b5ce36ae/src/codegen/reductions/mask/arm.rs
// which didn't have a license header on the file, but Cargo.toml said "MIT OR Apache-2.0".
// See LICENSE-MIT and LICENSE-APACHE.
#![no_std]
#![feature(portable_simd)]
#![cfg_attr(
all(
target_arch = "arm",
target_endian = "little",
target_feature = "neon",
target_feature = "v7"
),
feature(stdarch_arm_neon_intrinsics)
)]
use cfg_if::cfg_if;
use core::simd::mask16x8;
use core::simd::mask32x4;
use core::simd::mask8x16;
cfg_if! {
if #[cfg(all(target_arch = "arm", target_endian = "little", target_feature = "neon", target_feature = "v7"))] {
use core::simd::mask8x8;
use core::simd::mask16x4;
use core::simd::mask32x2;
macro_rules! arm_128_v7_neon_impl {
($all:ident, $any:ident, $id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => {
#[inline]
pub fn $all(s: $id) -> bool {
use core::arch::arm::$vpmin;
use core::mem::transmute;
unsafe {
union U {
halves: ($half, $half),
vec: $id,
}
let halves = U { vec: s }.halves;
let h: $half = transmute($vpmin(transmute(halves.0), transmute(halves.1)));
h.all()
}
}
#[inline]
pub fn $any(s: $id) -> bool {
use core::arch::arm::$vpmax;
use core::mem::transmute;
unsafe {
union U {
halves: ($half, $half),
vec: $id,
}
let halves = U { vec: s }.halves;
let h: $half = transmute($vpmax(transmute(halves.0), transmute(halves.1)));
h.any()
}
}
}
}
} else {
macro_rules! arm_128_v7_neon_impl {
($all:ident, $any:ident, $id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => {
#[inline(always)]
pub fn $all(s: $id) -> bool {
s.all()
}
#[inline(always)]
pub fn $any(s: $id) -> bool {
s.any()
}
}
}
}
}
arm_128_v7_neon_impl!(
all_mask8x16,
any_mask8x16,
mask8x16,
mask8x8,
vpmin_u8,
vpmax_u8
);
arm_128_v7_neon_impl!(
all_mask16x8,
any_mask16x8,
mask16x8,
mask16x4,
vpmin_u16,
vpmax_u16
);
arm_128_v7_neon_impl!(
all_mask32x4,
any_mask32x4,
mask32x4,
mask32x2,
vpmin_u32,
vpmax_u32
);

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

19
third_party/rust/encoding_rs/Cargo.toml поставляемый
Просмотреть файл

@ -11,8 +11,9 @@
[package]
edition = "2018"
rust-version = "1.36"
name = "encoding_rs"
version = "0.8.33"
version = "0.8.34"
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
description = "A Gecko-oriented implementation of the Encoding Standard"
homepage = "https://docs.rs/encoding_rs/"
@ -36,13 +37,13 @@ repository = "https://github.com/hsivonen/encoding_rs"
[profile.release]
lto = true
[dependencies.any_all_workaround]
version = "0.1.0"
optional = true
[dependencies.cfg-if]
version = "1.0"
[dependencies.packed_simd]
version = "0.3.9"
optional = true
[dependencies.serde]
version = "1.0"
optional = true
@ -74,10 +75,4 @@ fast-legacy-encode = [
less-slow-big5-hanzi-encode = []
less-slow-gb-hanzi-encode = []
less-slow-kanji-encode = []
simd-accel = [
"packed_simd",
"packed_simd/into_bits",
]
[badges.travis-ci]
repository = "hsivonen/encoding_rs"
simd-accel = ["any_all_workaround"]

34
third_party/rust/encoding_rs/README.md поставляемый
Просмотреть файл

@ -167,13 +167,15 @@ There are currently these optional cargo features:
### `simd-accel`
Enables SIMD acceleration using the nightly-dependent `packed_simd` crate.
Enables SIMD acceleration using the nightly-dependent `portable_simd` standard
library feature.
This is an opt-in feature, because enabling this feature _opts out_ of Rust's
guarantees of future compilers compiling old code (aka. "stability story").
Currently, this has not been tested to be an improvement except for these
targets:
targets and enabling the `simd-accel` feature is expected to break the build
on other targets:
* x86_64
* i686
@ -185,22 +187,6 @@ above, and you are prepared _to have to revise your configuration when updating
Rust_, you should enable this feature. Otherwise, please _do not_ enable this
feature.
_Note!_ If you are compiling for a target that does not have 128-bit SIMD
enabled as part of the target definition and you are enabling 128-bit SIMD
using `-C target_feature`, you need to enable the `core_arch` Cargo feature
for `packed_simd` to compile a crates.io snapshot of `core_arch` instead of
using the standard-library copy of `core::arch`, because the `core::arch`
module of the pre-compiled standard library has been compiled with the
assumption that the CPU doesn't have 128-bit SIMD. At present this applies
mainly to 32-bit ARM targets whose first component does not include the
substring `neon`.
The encoding_rs side of things has not been properly set up for POWER,
PowerPC, MIPS, etc., SIMD at this time, so even if you were to follow
the advice from the previous paragraph, you probably shouldn't use
the `simd-accel` option on the less mainstream architectures at this
time.
Used by Firefox.
### `serde`
@ -381,8 +367,9 @@ as semver-breaking, because this crate depends on `cfg-if`, which doesn't
appear to treat MSRV changes as semver-breaking, so it would be useless for
this crate to treat MSRV changes as semver-breaking.
As of 2021-02-04, MSRV appears to be Rust 1.36.0 for using the crate and
As of 2024-04-04, MSRV appears to be Rust 1.36.0 for using the crate and
1.42.0 for doc tests to pass without errors about the global allocator.
With the `simd-accel` feature, the MSRV is even higher.
## Compatibility with rust-encoding
@ -446,10 +433,17 @@ To regenerate the generated code:
- [x] Add actually fast CJK encode options.
- [ ] ~Investigate [Bob Steagall's lookup table acceleration for UTF-8](https://github.com/BobSteagall/CppNow2018/blob/master/FastConversionFromUTF-8/Fast%20Conversion%20From%20UTF-8%20with%20C%2B%2B%2C%20DFAs%2C%20and%20SSE%20Intrinsics%20-%20Bob%20Steagall%20-%20C%2B%2BNow%202018.pdf).~
- [x] Provide a build mode that works without `alloc` (with lesser API surface).
- [ ] Migrate to `std::simd` once it is stable and declare 1.0.
- [x] Migrate to `std::simd` ~once it is stable and declare 1.0.~
- [ ] Migrate `unsafe` slice access by larger types than `u8`/`u16` to `align_to`.
## Release Notes
### 0.8.34
* Use the `portable_simd` nightly feature of the standard library instead of the `packed_simd` crate. Only affects the `simd-accel` optional nightly feature.
* Internal documentation improvements and minor code improvements around `unsafe`.
* Added `rust-version` to `Cargo.toml`.
### 0.8.33
* Use `packed_simd` instead of `packed_simd_2` again now that updates are back under the `packed_simd` name. Only affects the `simd-accel` optional nightly feature.

388
third_party/rust/encoding_rs/src/ascii.rs поставляемый

Разница между файлами не показана из-за своего большого размера Загрузить разницу

36
third_party/rust/encoding_rs/src/handles.rs поставляемый
Просмотреть файл

@ -34,7 +34,7 @@ use crate::simd_funcs::*;
all(target_endian = "little", target_feature = "neon")
)
))]
use packed_simd::u16x8;
use core::simd::u16x8;
use super::DecoderResult;
use super::EncoderResult;
@ -90,19 +90,23 @@ impl Endian for LittleEndian {
#[derive(Debug, Copy, Clone)]
struct UnalignedU16Slice {
// Safety invariant: ptr must be valid for reading 2*len bytes
ptr: *const u8,
len: usize,
}
impl UnalignedU16Slice {
/// Safety: ptr must be valid for reading 2*len bytes
#[inline(always)]
pub unsafe fn new(ptr: *const u8, len: usize) -> UnalignedU16Slice {
// Safety: field invariant passed up to caller here
UnalignedU16Slice { ptr, len }
}
#[inline(always)]
pub fn trim_last(&mut self) {
assert!(self.len > 0);
// Safety: invariant upheld here: a slice is still valid with a shorter len
self.len -= 1;
}
@ -113,7 +117,9 @@ impl UnalignedU16Slice {
assert!(i < self.len);
unsafe {
let mut u: MaybeUninit<u16> = MaybeUninit::uninit();
// Safety: i is at most len - 1, which works here
::core::ptr::copy_nonoverlapping(self.ptr.add(i * 2), u.as_mut_ptr() as *mut u8, 2);
// Safety: valid read above lets us do this
u.assume_init()
}
}
@ -121,8 +127,13 @@ impl UnalignedU16Slice {
#[cfg(feature = "simd-accel")]
#[inline(always)]
pub fn simd_at(&self, i: usize) -> u16x8 {
// Safety: i/len are on the scale of u16s, each one corresponds to 2 u8s
assert!(i + SIMD_STRIDE_SIZE / 2 <= self.len);
let byte_index = i * 2;
// Safety: load16_unaligned needs SIMD_STRIDE_SIZE=16 u8 elements to read,
// or 16/2 = 8 u16 elements to read.
// We have checked that we have at least that many above.
unsafe { to_u16_lanes(load16_unaligned(self.ptr.add(byte_index))) }
}
@ -136,6 +147,7 @@ impl UnalignedU16Slice {
// XXX the return value should be restricted not to
// outlive self.
assert!(from <= self.len);
// Safety: This upholds the same invariant: `from` is in bounds and we're returning a shorter slice
unsafe { UnalignedU16Slice::new(self.ptr.add(from * 2), self.len - from) }
}
@ -144,6 +156,8 @@ impl UnalignedU16Slice {
pub fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> {
assert!(self.len <= other.len());
let mut offset = 0;
// Safety: SIMD_STRIDE_SIZE is measured in bytes, whereas len is in u16s. We check we can
// munch SIMD_STRIDE_SIZE / 2 u16s which means we can write SIMD_STRIDE_SIZE u8s
if SIMD_STRIDE_SIZE / 2 <= self.len {
let len_minus_stride = self.len - SIMD_STRIDE_SIZE / 2;
loop {
@ -151,6 +165,7 @@ impl UnalignedU16Slice {
if E::OPPOSITE_ENDIAN {
simd = simd_byte_swap(simd);
}
// Safety: we have enough space on the other side to write this
unsafe {
store8_unaligned(other.as_mut_ptr().add(offset), simd);
}
@ -158,6 +173,7 @@ impl UnalignedU16Slice {
break;
}
offset += SIMD_STRIDE_SIZE / 2;
// Safety: This ensures we still have space for writing SIMD_STRIDE_SIZE u8s
if offset > len_minus_stride {
break;
}
@ -236,6 +252,7 @@ fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
) -> CopyAsciiResult<usize, (u16, usize)> {
let len = ::core::cmp::min(src.len(), dst.len());
let mut offset = 0;
// Safety: This check ensures we are able to read/write at least SIMD_STRIDE_SIZE elements
if SIMD_STRIDE_SIZE <= len {
let len_minus_stride = len - SIMD_STRIDE_SIZE;
loop {
@ -249,10 +266,13 @@ fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
break;
}
let packed = simd_pack(first, second);
// Safety: We are able to write SIMD_STRIDE_SIZE elements in this iteration
unsafe {
store16_unaligned(dst.as_mut_ptr().add(offset), packed);
}
offset += SIMD_STRIDE_SIZE;
// Safety: This is `offset > len - SIMD_STRIDE_SIZE`, which ensures that we can write at least SIMD_STRIDE_SIZE elements
// in the next iteration
if offset > len_minus_stride {
break;
}
@ -637,7 +657,7 @@ impl<'a> Utf16Destination<'a> {
self.write_code_unit((0xDC00 + (astral & 0x3FF)) as u16);
}
#[inline(always)]
pub fn write_surrogate_pair(&mut self, high: u16, low: u16) {
fn write_surrogate_pair(&mut self, high: u16, low: u16) {
self.write_code_unit(high);
self.write_code_unit(low);
}
@ -646,6 +666,7 @@ impl<'a> Utf16Destination<'a> {
self.write_bmp_excl_ascii(combined);
self.write_bmp_excl_ascii(combining);
}
// Safety-usable invariant: CopyAsciiResult::GoOn will only contain bytes >=0x80
#[inline(always)]
pub fn copy_ascii_from_check_space_bmp<'b>(
&'b mut self,
@ -659,6 +680,8 @@ impl<'a> Utf16Destination<'a> {
} else {
(DecoderResult::InputEmpty, src_remaining.len())
};
// Safety: This function is documented as needing valid pointers for src/dest and len, which
// is true since we've passed the minumum length of the two
match unsafe {
ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
} {
@ -667,16 +690,20 @@ impl<'a> Utf16Destination<'a> {
self.pos += length;
return CopyAsciiResult::Stop((pending, source.pos, self.pos));
}
// Safety: the function is documented as returning bytes >=0x80 in the Some
Some((non_ascii, consumed)) => {
source.pos += consumed;
self.pos += consumed;
source.pos += 1; // +1 for non_ascii
// Safety: non-ascii bubbled out here
non_ascii
}
}
};
// Safety: non-ascii returned here
CopyAsciiResult::GoOn((non_ascii_ret, Utf16BmpHandle::new(self)))
}
// Safety-usable invariant: CopyAsciiResult::GoOn will only contain bytes >=0x80
#[inline(always)]
pub fn copy_ascii_from_check_space_astral<'b>(
&'b mut self,
@ -691,6 +718,8 @@ impl<'a> Utf16Destination<'a> {
} else {
(DecoderResult::InputEmpty, src_remaining.len())
};
// Safety: This function is documented as needing valid pointers for src/dest and len, which
// is true since we've passed the minumum length of the two
match unsafe {
ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
} {
@ -699,11 +728,13 @@ impl<'a> Utf16Destination<'a> {
self.pos += length;
return CopyAsciiResult::Stop((pending, source.pos, self.pos));
}
// Safety: the function is documented as returning bytes >=0x80 in the Some
Some((non_ascii, consumed)) => {
source.pos += consumed;
self.pos += consumed;
if self.pos + 1 < dst_len {
source.pos += 1; // +1 for non_ascii
// Safety: non-ascii bubbled out here
non_ascii
} else {
return CopyAsciiResult::Stop((
@ -715,6 +746,7 @@ impl<'a> Utf16Destination<'a> {
}
}
};
// Safety: non-ascii returned here
CopyAsciiResult::GoOn((non_ascii_ret, Utf16AstralHandle::new(self)))
}
#[inline(always)]

13
third_party/rust/encoding_rs/src/lib.rs поставляемый
Просмотреть файл

@ -689,7 +689,7 @@
//! for discussion about the UTF-16 family.
#![no_std]
#![cfg_attr(feature = "simd-accel", feature(core_intrinsics))]
#![cfg_attr(feature = "simd-accel", feature(core_intrinsics, portable_simd))]
#[cfg(feature = "alloc")]
#[cfg_attr(test, macro_use)]
@ -699,17 +699,6 @@ extern crate core;
#[macro_use]
extern crate cfg_if;
#[cfg(all(
feature = "simd-accel",
any(
target_feature = "sse2",
all(target_endian = "little", target_arch = "aarch64"),
all(target_endian = "little", target_feature = "neon")
)
))]
#[macro_use(shuffle)]
extern crate packed_simd;
#[cfg(feature = "serde")]
extern crate serde;

18
third_party/rust/encoding_rs/src/mem.rs поставляемый
Просмотреть файл

@ -116,6 +116,11 @@ macro_rules! by_unit_check_alu {
}
let len_minus_stride = len - ALU_ALIGNMENT / unit_size;
if offset + (4 * (ALU_ALIGNMENT / unit_size)) <= len {
// Safety: the above check lets us perform 4 consecutive reads of
// length ALU_ALIGNMENT / unit_size. ALU_ALIGNMENT is the size of usize, and unit_size
// is the size of the `src` pointer, so this is equal to performing four usize reads.
//
// This invariant is upheld on all loop iterations
let len_minus_unroll = len - (4 * (ALU_ALIGNMENT / unit_size));
loop {
let unroll_accu = unsafe { *(src.add(offset) as *const usize) }
@ -134,12 +139,14 @@ macro_rules! by_unit_check_alu {
return false;
}
offset += 4 * (ALU_ALIGNMENT / unit_size);
// Safety: this check lets us continue to perform the 4 reads earlier
if offset > len_minus_unroll {
break;
}
}
}
while offset <= len_minus_stride {
// Safety: the above check lets us perform one usize read.
accu |= unsafe { *(src.add(offset) as *const usize) };
offset += ALU_ALIGNMENT / unit_size;
}
@ -189,6 +196,11 @@ macro_rules! by_unit_check_simd {
}
let len_minus_stride = len - SIMD_STRIDE_SIZE / unit_size;
if offset + (4 * (SIMD_STRIDE_SIZE / unit_size)) <= len {
// Safety: the above check lets us perform 4 consecutive reads of
// length SIMD_STRIDE_SIZE / unit_size. SIMD_STRIDE_SIZE is the size of $simd_ty, and unit_size
// is the size of the `src` pointer, so this is equal to performing four $simd_ty reads.
//
// This invariant is upheld on all loop iterations
let len_minus_unroll = len - (4 * (SIMD_STRIDE_SIZE / unit_size));
loop {
let unroll_accu = unsafe { *(src.add(offset) as *const $simd_ty) }
@ -208,6 +220,7 @@ macro_rules! by_unit_check_simd {
return false;
}
offset += 4 * (SIMD_STRIDE_SIZE / unit_size);
// Safety: this check lets us continue to perform the 4 reads earlier
if offset > len_minus_unroll {
break;
}
@ -215,6 +228,7 @@ macro_rules! by_unit_check_simd {
}
let mut simd_accu = $splat;
while offset <= len_minus_stride {
// Safety: the above check lets us perform one $simd_ty read.
simd_accu = simd_accu | unsafe { *(src.add(offset) as *const $simd_ty) };
offset += SIMD_STRIDE_SIZE / unit_size;
}
@ -234,8 +248,8 @@ macro_rules! by_unit_check_simd {
cfg_if! {
if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
use crate::simd_funcs::*;
use packed_simd::u8x16;
use packed_simd::u16x8;
use core::simd::u8x16;
use core::simd::u16x8;
const SIMD_ALIGNMENT: usize = 16;

146
third_party/rust/encoding_rs/src/simd_funcs.rs поставляемый
Просмотреть файл

@ -7,55 +7,74 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use packed_simd::u16x8;
use packed_simd::u8x16;
use packed_simd::IntoBits;
use any_all_workaround::all_mask16x8;
use any_all_workaround::all_mask8x16;
use any_all_workaround::any_mask16x8;
use any_all_workaround::any_mask8x16;
use core::simd::cmp::SimdPartialEq;
use core::simd::cmp::SimdPartialOrd;
use core::simd::mask16x8;
use core::simd::mask8x16;
use core::simd::simd_swizzle;
use core::simd::u16x8;
use core::simd::u8x16;
use core::simd::ToBytes;
// TODO: Migrate unaligned access to stdlib code if/when the RFC
// https://github.com/rust-lang/rfcs/pull/1725 is implemented.
/// Safety invariant: ptr must be valid for an unaligned read of 16 bytes
#[inline(always)]
pub unsafe fn load16_unaligned(ptr: *const u8) -> u8x16 {
let mut simd = ::core::mem::uninitialized();
::core::ptr::copy_nonoverlapping(ptr, &mut simd as *mut u8x16 as *mut u8, 16);
simd
let mut simd = ::core::mem::MaybeUninit::<u8x16>::uninit();
::core::ptr::copy_nonoverlapping(ptr, simd.as_mut_ptr() as *mut u8, 16);
// Safety: copied 16 bytes of initialized memory into this, it is now initialized
simd.assume_init()
}
/// Safety invariant: ptr must be valid for an aligned-for-u8x16 read of 16 bytes
#[allow(dead_code)]
#[inline(always)]
pub unsafe fn load16_aligned(ptr: *const u8) -> u8x16 {
*(ptr as *const u8x16)
}
/// Safety invariant: ptr must be valid for an unaligned store of 16 bytes
#[inline(always)]
pub unsafe fn store16_unaligned(ptr: *mut u8, s: u8x16) {
::core::ptr::copy_nonoverlapping(&s as *const u8x16 as *const u8, ptr, 16);
}
/// Safety invariant: ptr must be valid for an aligned-for-u8x16 store of 16 bytes
#[allow(dead_code)]
#[inline(always)]
pub unsafe fn store16_aligned(ptr: *mut u8, s: u8x16) {
*(ptr as *mut u8x16) = s;
}
/// Safety invariant: ptr must be valid for an unaligned read of 16 bytes
#[inline(always)]
pub unsafe fn load8_unaligned(ptr: *const u16) -> u16x8 {
let mut simd = ::core::mem::uninitialized();
::core::ptr::copy_nonoverlapping(ptr as *const u8, &mut simd as *mut u16x8 as *mut u8, 16);
simd
let mut simd = ::core::mem::MaybeUninit::<u16x8>::uninit();
::core::ptr::copy_nonoverlapping(ptr as *const u8, simd.as_mut_ptr() as *mut u8, 16);
// Safety: copied 16 bytes of initialized memory into this, it is now initialized
simd.assume_init()
}
/// Safety invariant: ptr must be valid for an aligned-for-u16x8 read of 16 bytes
#[allow(dead_code)]
#[inline(always)]
pub unsafe fn load8_aligned(ptr: *const u16) -> u16x8 {
*(ptr as *const u16x8)
}
/// Safety invariant: ptr must be valid for an unaligned store of 16 bytes
#[inline(always)]
pub unsafe fn store8_unaligned(ptr: *mut u16, s: u16x8) {
::core::ptr::copy_nonoverlapping(&s as *const u16x8 as *const u8, ptr as *mut u8, 16);
}
/// Safety invariant: ptr must be valid for an aligned-for-u16x8 store of 16 bytes
#[allow(dead_code)]
#[inline(always)]
pub unsafe fn store8_aligned(ptr: *mut u16, s: u16x8) {
@ -100,7 +119,7 @@ pub fn simd_byte_swap(s: u16x8) -> u16x8 {
#[inline(always)]
pub fn to_u16_lanes(s: u8x16) -> u16x8 {
s.into_bits()
u16x8::from_ne_bytes(s)
}
cfg_if! {
@ -108,10 +127,11 @@ cfg_if! {
// Expose low-level mask instead of higher-level conclusion,
// because the non-ASCII case would perform less well otherwise.
// Safety-usable invariant: This returned value is whether each high bit is set
#[inline(always)]
pub fn mask_ascii(s: u8x16) -> i32 {
unsafe {
_mm_movemask_epi8(s.into_bits())
_mm_movemask_epi8(s.into())
}
}
@ -125,14 +145,16 @@ cfg_if! {
#[inline(always)]
pub fn simd_is_ascii(s: u8x16) -> bool {
unsafe {
_mm_movemask_epi8(s.into_bits()) == 0
// Safety: We have cfg()d the correct platform
_mm_movemask_epi8(s.into()) == 0
}
}
} else if #[cfg(target_arch = "aarch64")]{
#[inline(always)]
pub fn simd_is_ascii(s: u8x16) -> bool {
unsafe {
vmaxvq_u8(s.into_bits()) < 0x80
// Safety: We have cfg()d the correct platform
vmaxvq_u8(s.into()) < 0x80
}
}
} else {
@ -141,7 +163,7 @@ cfg_if! {
// This optimizes better on ARM than
// the lt formulation.
let highest_ascii = u8x16::splat(0x7F);
!s.gt(highest_ascii).any()
!any_mask8x16(s.simd_gt(highest_ascii))
}
}
}
@ -154,20 +176,21 @@ cfg_if! {
return true;
}
let above_str_latin1 = u8x16::splat(0xC4);
s.lt(above_str_latin1).all()
s.simd_lt(above_str_latin1).all()
}
} else if #[cfg(target_arch = "aarch64")]{
#[inline(always)]
pub fn simd_is_str_latin1(s: u8x16) -> bool {
unsafe {
vmaxvq_u8(s.into_bits()) < 0xC4
// Safety: We have cfg()d the correct platform
vmaxvq_u8(s.into()) < 0xC4
}
}
} else {
#[inline(always)]
pub fn simd_is_str_latin1(s: u8x16) -> bool {
let above_str_latin1 = u8x16::splat(0xC4);
s.lt(above_str_latin1).all()
all_mask8x16(s.simd_lt(above_str_latin1))
}
}
}
@ -177,21 +200,23 @@ cfg_if! {
#[inline(always)]
pub fn simd_is_basic_latin(s: u16x8) -> bool {
unsafe {
vmaxvq_u16(s.into_bits()) < 0x80
// Safety: We have cfg()d the correct platform
vmaxvq_u16(s.into()) < 0x80
}
}
#[inline(always)]
pub fn simd_is_latin1(s: u16x8) -> bool {
unsafe {
vmaxvq_u16(s.into_bits()) < 0x100
// Safety: We have cfg()d the correct platform
vmaxvq_u16(s.into()) < 0x100
}
}
} else {
#[inline(always)]
pub fn simd_is_basic_latin(s: u16x8) -> bool {
let above_ascii = u16x8::splat(0x80);
s.lt(above_ascii).all()
all_mask16x8(s.simd_lt(above_ascii))
}
#[inline(always)]
@ -200,7 +225,7 @@ cfg_if! {
// seems faster in this case while the above
// function is better the other way round...
let highest_latin1 = u16x8::splat(0xFF);
!s.gt(highest_latin1).any()
!any_mask16x8(s.simd_gt(highest_latin1))
}
}
}
@ -209,7 +234,7 @@ cfg_if! {
pub fn contains_surrogates(s: u16x8) -> bool {
let mask = u16x8::splat(0xF800);
let surrogate_bits = u16x8::splat(0xD800);
(s & mask).eq(surrogate_bits).any()
any_mask16x8((s & mask).simd_eq(surrogate_bits))
}
cfg_if! {
@ -217,7 +242,8 @@ cfg_if! {
macro_rules! aarch64_return_false_if_below_hebrew {
($s:ident) => ({
unsafe {
if vmaxvq_u16($s.into_bits()) < 0x0590 {
// Safety: We have cfg()d the correct platform
if vmaxvq_u16($s.into()) < 0x0590 {
return false;
}
}
@ -234,7 +260,7 @@ cfg_if! {
macro_rules! non_aarch64_return_false_if_all {
($s:ident) => ({
if $s.all() {
if all_mask16x8($s) {
return false;
}
})
@ -245,7 +271,7 @@ cfg_if! {
macro_rules! in_range16x8 {
($s:ident, $start:expr, $end:expr) => {{
// SIMD sub is wrapping
($s - u16x8::splat($start)).lt(u16x8::splat($end - $start))
($s - u16x8::splat($start)).simd_lt(u16x8::splat($end - $start))
}};
}
@ -259,43 +285,44 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool {
aarch64_return_false_if_below_hebrew!(s);
let below_hebrew = s.lt(u16x8::splat(0x0590));
let below_hebrew = s.simd_lt(u16x8::splat(0x0590));
non_aarch64_return_false_if_all!(below_hebrew);
if (below_hebrew | in_range16x8!(s, 0x0900, 0x200F) | in_range16x8!(s, 0x2068, 0xD802)).all() {
if all_mask16x8(
below_hebrew | in_range16x8!(s, 0x0900, 0x200F) | in_range16x8!(s, 0x2068, 0xD802),
) {
return false;
}
// Quick refutation failed. Let's do the full check.
(in_range16x8!(s, 0x0590, 0x0900)
| in_range16x8!(s, 0xFB1D, 0xFE00)
| in_range16x8!(s, 0xFE70, 0xFEFF)
| in_range16x8!(s, 0xD802, 0xD804)
| in_range16x8!(s, 0xD83A, 0xD83C)
| s.eq(u16x8::splat(0x200F))
| s.eq(u16x8::splat(0x202B))
| s.eq(u16x8::splat(0x202E))
| s.eq(u16x8::splat(0x2067)))
.any()
any_mask16x8(
(in_range16x8!(s, 0x0590, 0x0900)
| in_range16x8!(s, 0xFB1D, 0xFE00)
| in_range16x8!(s, 0xFE70, 0xFEFF)
| in_range16x8!(s, 0xD802, 0xD804)
| in_range16x8!(s, 0xD83A, 0xD83C)
| s.simd_eq(u16x8::splat(0x200F))
| s.simd_eq(u16x8::splat(0x202B))
| s.simd_eq(u16x8::splat(0x202E))
| s.simd_eq(u16x8::splat(0x2067))),
)
}
#[inline(always)]
pub fn simd_unpack(s: u8x16) -> (u16x8, u16x8) {
unsafe {
let first: u8x16 = shuffle!(
s,
u8x16::splat(0),
[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
);
let second: u8x16 = shuffle!(
s,
u8x16::splat(0),
[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
);
(first.into_bits(), second.into_bits())
}
let first: u8x16 = simd_swizzle!(
s,
u8x16::splat(0),
[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
);
let second: u8x16 = simd_swizzle!(
s,
u8x16::splat(0),
[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
);
(u16x8::from_ne_bytes(first), u16x8::from_ne_bytes(second))
}
cfg_if! {
@ -303,21 +330,20 @@ cfg_if! {
#[inline(always)]
pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 {
unsafe {
_mm_packus_epi16(a.into_bits(), b.into_bits()).into_bits()
// Safety: We have cfg()d the correct platform
_mm_packus_epi16(a.into(), b.into()).into()
}
}
} else {
#[inline(always)]
pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 {
unsafe {
let first: u8x16 = a.into_bits();
let second: u8x16 = b.into_bits();
shuffle!(
first,
second,
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
)
}
let first: u8x16 = a.to_ne_bytes();
let second: u8x16 = b.to_ne_bytes();
simd_swizzle!(
first,
second,
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
)
}
}
}

Просмотреть файл

@ -53,6 +53,9 @@ impl SingleByteDecoder {
// statically omit the bound check when accessing
// `[u16; 128]` with an index
// `non_ascii as usize - 0x80usize`.
//
// Safety: `non_ascii` is a u8 byte >=0x80, from the invariants
// on Utf8Destination::copy_ascii_from_check_space_bmp()
let mapped =
unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) };
// let mapped = self.table[non_ascii as usize - 0x80usize];
@ -151,9 +154,12 @@ impl SingleByteDecoder {
} else {
(DecoderResult::InputEmpty, src.len())
};
// Safety invariant: converted <= length. Quite often we have `converted < length`
// which will be separately marked.
let mut converted = 0usize;
'outermost: loop {
match unsafe {
// Safety: length is the minimum length, `src/dst + x` will always be valid for reads/writes of `len - x`
ascii_to_basic_latin(
src.as_ptr().add(converted),
dst.as_mut_ptr().add(converted),
@ -164,6 +170,12 @@ impl SingleByteDecoder {
return (pending, length, length);
}
Some((mut non_ascii, consumed)) => {
// Safety invariant: `converted <= length` upheld, since this can only consume
// up to `length - converted` bytes.
//
// Furthermore, in this context,
// we can assume `converted < length` since this branch is only ever hit when
// ascii_to_basic_latin fails to consume the entire slice
converted += consumed;
'middle: loop {
// `converted` doesn't count the reading of `non_ascii` yet.
@ -172,6 +184,9 @@ impl SingleByteDecoder {
// statically omit the bound check when accessing
// `[u16; 128]` with an index
// `non_ascii as usize - 0x80usize`.
//
// Safety: We can rely on `non_ascii` being between `0x80` and `0xFF` due to
// the invariants of `ascii_to_basic_latin()`, and our table has enough space for that.
let mapped =
unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) };
// let mapped = self.table[non_ascii as usize - 0x80usize];
@ -183,9 +198,10 @@ impl SingleByteDecoder {
);
}
unsafe {
// The bound check has already been performed
// Safety: As mentioned above, `converted < length`
*(dst.get_unchecked_mut(converted)) = mapped;
}
// Safety: `converted <= length` upheld, since `converted < length` before this
converted += 1;
// Next, handle ASCII punctuation and non-ASCII without
// going back to ASCII acceleration. Non-ASCII scripts
@ -198,7 +214,10 @@ impl SingleByteDecoder {
if converted == length {
return (pending, length, length);
}
// Safety: We are back to `converted < length` because of the == above
// and can perform this check.
let mut b = unsafe { *(src.get_unchecked(converted)) };
// Safety: `converted < length` is upheld for this loop
'innermost: loop {
if b > 127 {
non_ascii = b;
@ -208,15 +227,20 @@ impl SingleByteDecoder {
// byte unconditionally instead of trying to unread it
// to make it part of the next SIMD stride.
unsafe {
// Safety: `converted < length` is true for this loop
*(dst.get_unchecked_mut(converted)) = u16::from(b);
}
// Safety: We are now at `converted <= length`. We should *not* `continue`
// the loop without reverifying
converted += 1;
if b < 60 {
// We've got punctuation
if converted == length {
return (pending, length, length);
}
// Safety: we're back to `converted <= length` because of the == above
b = unsafe { *(src.get_unchecked(converted)) };
// Safety: The loop continues as `converted < length`
continue 'innermost;
}
// We've got markup or ASCII text
@ -234,6 +258,8 @@ impl SingleByteDecoder {
loop {
if let Some((non_ascii, offset)) = validate_ascii(bytes) {
total += offset;
// Safety: We can rely on `non_ascii` being between `0x80` and `0xFF` due to
// the invariants of `ascii_to_basic_latin()`, and our table has enough space for that.
let mapped = unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) };
if mapped != u16::from(non_ascii) {
return total;
@ -384,9 +410,12 @@ impl SingleByteEncoder {
} else {
(EncoderResult::InputEmpty, src.len())
};
// Safety invariant: converted <= length. Quite often we have `converted < length`
// which will be separately marked.
let mut converted = 0usize;
'outermost: loop {
match unsafe {
// Safety: length is the minimum length, `src/dst + x` will always be valid for reads/writes of `len - x`
basic_latin_to_ascii(
src.as_ptr().add(converted),
dst.as_mut_ptr().add(converted),
@ -397,15 +426,23 @@ impl SingleByteEncoder {
return (pending, length, length);
}
Some((mut non_ascii, consumed)) => {
// Safety invariant: `converted <= length` upheld, since this can only consume
// up to `length - converted` bytes.
//
// Furthermore, in this context,
// we can assume `converted < length` since this branch is only ever hit when
// ascii_to_basic_latin fails to consume the entire slice
converted += consumed;
'middle: loop {
// `converted` doesn't count the reading of `non_ascii` yet.
match self.encode_u16(non_ascii) {
Some(byte) => {
unsafe {
// Safety: we're allowed this access since `converted < length`
*(dst.get_unchecked_mut(converted)) = byte;
}
converted += 1;
// `converted <= length` now
}
None => {
// At this point, we need to know if we
@ -421,6 +458,8 @@ impl SingleByteEncoder {
converted,
);
}
// Safety: convered < length from outside the match, and `converted + 1 != length`,
// So `converted + 1 < length` as well. We're in bounds
let second =
u32::from(unsafe { *src.get_unchecked(converted + 1) });
if second & 0xFC00u32 != 0xDC00u32 {
@ -432,6 +471,18 @@ impl SingleByteEncoder {
}
// The next code unit is a low surrogate.
let astral: char = unsafe {
// Safety: We can rely on non_ascii being 0xD800-0xDBFF since the high bits are 0xD800
// Then, (non_ascii << 10 - 0xD800 << 10) becomes between (0 to 0x3FF) << 10, which is between
// 0x400 to 0xffc00. Adding the 0x10000 gives a range of 0x10400 to 0x10fc00. Subtracting the 0xDC00
// gives 0x2800 to 0x102000
// The second term is between 0xDC00 and 0xDFFF from the check above. This gives a maximum
// possible range of (0x10400 + 0xDC00) to (0x102000 + 0xDFFF) which is 0x1E000 to 0x10ffff.
// This is in range.
//
// From a Unicode principles perspective this can also be verified as we have checked that `non_ascii` is a high surrogate
// (0xD800..=0xDBFF), and that `second` is a low surrogate (`0xDC00..=0xDFFF`), and we are applying reverse of the UTC16 transformation
// algorithm <https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF>, by applying the high surrogate - 0xD800 to the
// high ten bits, and the low surrogate - 0xDc00 to the low ten bits, and then adding 0x10000
::core::char::from_u32_unchecked(
(u32::from(non_ascii) << 10) + second
- (((0xD800u32 << 10) - 0x1_0000u32) + 0xDC00u32),
@ -456,6 +507,7 @@ impl SingleByteEncoder {
converted + 1, // +1 `for non_ascii`
converted,
);
// Safety: This branch diverges, so no need to uphold invariants on `converted`
}
}
// Next, handle ASCII punctuation and non-ASCII without
@ -469,8 +521,12 @@ impl SingleByteEncoder {
if converted == length {
return (pending, length, length);
}
// Safety: we're back to `converted < length` due to the == above and can perform
// the unchecked read
let mut unit = unsafe { *(src.get_unchecked(converted)) };
'innermost: loop {
// Safety: This loop always begins with `converted < length`, see
// the invariant outside and the comment on the continue below
if unit > 127 {
non_ascii = unit;
continue 'middle;
@ -479,19 +535,25 @@ impl SingleByteEncoder {
// byte unconditionally instead of trying to unread it
// to make it part of the next SIMD stride.
unsafe {
// Safety: Can rely on converted < length
*(dst.get_unchecked_mut(converted)) = unit as u8;
}
converted += 1;
// `converted <= length` here
if unit < 60 {
// We've got punctuation
if converted == length {
return (pending, length, length);
}
// Safety: `converted < length` due to the == above. The read is safe.
unit = unsafe { *(src.get_unchecked(converted)) };
// Safety: This only happens if `converted < length`, maintaining it
continue 'innermost;
}
// We've got markup or ASCII text
continue 'outermost;
// Safety: All other routes to here diverge so the continue is the only
// way to run the innermost loop.
}
}
}

Просмотреть файл

@ -14,12 +14,13 @@ use crate::variant::*;
cfg_if! {
if #[cfg(feature = "simd-accel")] {
use simd_funcs::*;
use packed_simd::u16x8;
use core::simd::u16x8;
use core::simd::cmp::SimdPartialOrd;
#[inline(always)]
fn shift_upper(unpacked: u16x8) -> u16x8 {
let highest_ascii = u16x8::splat(0x7F);
unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) }
unpacked + unpacked.simd_gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) }
} else {
}
}
@ -116,10 +117,15 @@ impl UserDefinedDecoder {
let simd_iterations = length >> 4;
let src_ptr = src.as_ptr();
let dst_ptr = dst.as_mut_ptr();
// Safety: This is `for i in 0..length / 16`
for i in 0..simd_iterations {
// Safety: This is in bounds: length is the minumum valid length for both src/dst
// and i ranges to length/16, so multiplying by 16 will always be `< length` and can do
// a 16 byte read
let input = unsafe { load16_unaligned(src_ptr.add(i * 16)) };
let (first, second) = simd_unpack(input);
unsafe {
// Safety: same as above, but this is two consecutive 8-byte reads
store8_unaligned(dst_ptr.add(i * 16), shift_upper(first));
store8_unaligned(dst_ptr.add((i * 16) + 8), shift_upper(second));
}

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

83
third_party/rust/packed_simd/Cargo.toml поставляемый
Просмотреть файл

@ -1,83 +0,0 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2018"
name = "packed_simd"
version = "0.3.9"
build = "build.rs"
description = "Portable Packed SIMD vectors"
homepage = "https://github.com/rust-lang/packed_simd"
documentation = "https://docs.rs/crate/packed_simd/"
readme = "README.md"
keywords = [
"simd",
"vector",
"portability",
]
categories = [
"hardware-support",
"concurrency",
"no-std",
"data-structures",
]
license = "MIT OR Apache-2.0"
repository = "https://github.com/rust-lang/packed_simd"
[package.metadata.docs.rs]
features = ["into_bits"]
rustdoc-args = [
"--cfg",
"doc_cfg",
]
[dependencies.cfg-if]
version = "1.0.0"
[dependencies.core_arch]
version = "0.1.5"
optional = true
[dependencies.num-traits]
version = "0.2.14"
features = ["libm"]
default-features = false
[dev-dependencies.arrayvec]
version = "^0.5"
default-features = false
[dev-dependencies.paste]
version = "^1"
[features]
default = []
into_bits = []
libcore_neon = []
[target."cfg(target_arch = \"x86_64\")".dependencies.sleef-sys]
version = "0.1.2"
optional = true
[target.wasm32-unknown-unknown.dev-dependencies.wasm-bindgen]
version = "=0.2.87"
[target.wasm32-unknown-unknown.dev-dependencies.wasm-bindgen-test]
version = "=0.3.37"
[badges.is-it-maintained-issue-resolution]
repository = "rust-lang/packed_simd"
[badges.is-it-maintained-open-issues]
repository = "rust-lang/packed_simd"
[badges.maintenance]
status = "experimental"

144
third_party/rust/packed_simd/README.md поставляемый
Просмотреть файл

@ -1,144 +0,0 @@
# `Simd<[T; N]>`
## Implementation of [Rust RFC #2366: `std::simd`][rfc2366]
[![Latest Version]][crates.io] [![docs]][master_docs]
**WARNING**: this crate only supports the most recent nightly Rust toolchain
and will be superseded by [`#![feature(portable_simd)]`](https://github.com/rust-lang/portable-simd).
## Documentation
* [API docs (`master` branch)][master_docs]
* [Performance guide][perf_guide]
* [API docs (`docs.rs`)][docs.rs]
* [RFC2366 `std::simd`][rfc2366]: - contains motivation, design rationale,
discussion, etc.
## Examples
Most of the examples come with both a scalar and a vectorized implementation.
* [`aobench`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench)
* [`fannkuch_redux`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/fannkuch_redux)
* [`matrix inverse`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/matrix_inverse)
* [`mandelbrot`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/mandelbrot)
* [`n-body`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/nbody)
* [`options_pricing`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/options_pricing)
* [`spectral_norm`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/spectral_norm)
* [`triangle transform`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/triangle_xform)
* [`stencil`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/stencil)
* [`vector dot product`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/dot_product)
## Cargo features
* `into_bits` (default: disabled): enables `FromBits`/`IntoBits` trait
implementations for the vector types. These allow reinterpreting the bits of a
vector type as those of another vector type safely by just using the
`.into_bits()` method.
## Performance
The following [ISPC] examples are also part of `packed_simd`'s
[`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/)
directory, where `packed_simd`+[`rayon`][rayon] are used to emulate [ISPC]'s
Single-Program-Multiple-Data (SPMD) programming model. The performance results
on different hardware is shown in the `readme.md` of each example. The following
table summarizes the performance ranges, where `+` means speed-up and `-`
slowdown:
* `aobench`: `[-1.02x, +1.53x]`,
* `stencil`: `[+1.06x, +1.72x]`,
* `mandelbrot`: `[-1.74x, +1.2x]`,
* `options_pricing`:
* `black_scholes`: `+1.0x`
* `binomial_put`: `+1.4x`
While SPMD is not the intended use case for `packed_simd`, it is possible to
combine the library with [`rayon`][rayon] to poorly emulate [ISPC]'s SPMD programming
model in Rust. Writing performant code is not as straightforward as with
[ISPC], but with some care (e.g. see the [Performance Guide][perf_guide]) one
can easily match and often out-perform [ISPC]'s "default performance".
## Platform support
The following table describes the supported platforms: `build` shows whether
the library compiles without issues for a given target, while `run` shows
whether the test suite passes for a given target.
| **Linux** | **build** | **run** |
|---------------------------------------|-----------|---------|
| `i586-unknown-linux-gnu` | ✓ | ✗ |
| `i686-unknown-linux-gnu` | ✓ | ✗ |
| `x86_64-unknown-linux-gnu` | ✓ | ✓ |
| `arm-unknown-linux-gnueabihf` | ✓ | ✓ |
| `armv7-unknown-linux-gnueabi` | ✓ | ✓ |
| `aarch64-unknown-linux-gnu` | ✓ | ✓ |
| `powerpc-unknown-linux-gnu` | ✓ | ✗ |
| `powerpc64-unknown-linux-gnu` | ✓ | ✗ |
| `powerpc64le-unknown-linux-gnu` | ✓ | ✓ |
| `s390x-unknown-linux-gnu` | ✓ | ✗ |
| `sparc64-unknown-linux-gnu` | ✓ | ✗ |
| `thumbv7neon-unknown-linux-gnueabihf` | ✓ | ✓ |
| **MacOSX** | **build** | **run** |
| `x86_64-apple-darwin` | ✓ | ✓ |
| **Android** | **build** | **run** |
| `x86_64-linux-android` | ✓ | ✓ |
| `armv7-linux-androideabi` | ✓ | ✗ |
| `aarch64-linux-android` | ✓ | ✗ |
| `thumbv7neon-linux-androideabi` | ✓ | ✗ |
| **iOS** | **build** | **run** |
| `x86_64-apple-ios` | ✗ | ✗ |
| `aarch64-apple-ios` | ✗ | ✗ |
## Machine code verification
The
[`verify/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/verify)
crate tests disassembles the portable packed vector APIs at run-time and
compares the generated machine code against the desired one to make sure that
this crate remains efficient.
## License
This project is licensed under either of
* [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
([LICENSE-APACHE](LICENSE-APACHE))
* [MIT License](http://opensource.org/licenses/MIT)
([LICENSE-MIT](LICENSE-MIT))
at your option.
## Contributing
We welcome all people who want to contribute.
Please see the [contributing instructions] for more information.
Contributions in any form (issues, pull requests, etc.) to this project
must adhere to Rust's [Code of Conduct].
Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in `packed_simd` by you, as defined in the Apache-2.0 license, shall be
dual licensed as above, without any additional terms or conditions.
[travis]: https://travis-ci.com/rust-lang/packed_simd
[Travis-CI Status]: https://travis-ci.com/rust-lang/packed_simd.svg?branch=master
[appveyor]: https://ci.appveyor.com/project/gnzlbg/packed-simd
[Appveyor Status]: https://ci.appveyor.com/api/projects/status/hd7v9dvr442hgdix?svg=true
[Latest Version]: https://img.shields.io/crates/v/packed_simd.svg
[crates.io]: https://crates.io/crates/packed_simd
[docs]: https://docs.rs/packed_simd/badge.svg
[docs.rs]: https://docs.rs/packed_simd
[master_docs]: https://rust-lang-nursery.github.io/packed_simd/packed_simd/
[perf_guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/
[rfc2366]: https://github.com/rust-lang/rfcs/pull/2366
[ISPC]: https://ispc.github.io/
[rayon]: https://crates.io/crates/rayon
[boost_license]: https://www.boost.org/LICENSE_1_0.txt
[SLEEF]: https://sleef.org/
[sleef_sys]: https://crates.io/crates/sleef-sys
[contributing instructions]: contributing.md
[Code of Conduct]: https://www.rust-lang.org/en-US/conduct.html

3
third_party/rust/packed_simd/bors.toml поставляемый
Просмотреть файл

@ -1,3 +0,0 @@
status = [
"continuous-integration/travis-ci/push"
]

6
third_party/rust/packed_simd/build.rs поставляемый
Просмотреть файл

@ -1,6 +0,0 @@
fn main() {
let target = std::env::var("TARGET").expect("TARGET environment variable not defined");
if target.contains("neon") {
println!("cargo:rustc-cfg=libcore_neon");
}
}

71
third_party/rust/packed_simd/ci/all.sh поставляемый
Просмотреть файл

@ -1,71 +0,0 @@
#!/usr/bin/env bash
#
# Performs an operation on all targets
set -ex
: "${1?The all.sh script requires one argument.}"
op=$1
cargo_clean() {
cargo clean
}
cargo_check_fmt() {
cargo fmt --all -- --check
}
cargo_fmt() {
cargo fmt --all
}
cargo_clippy() {
cargo clippy --all -- -D clippy::perf
}
CMD="-1"
case $op in
clean*)
CMD=cargo_clean
;;
check_fmt*)
CMD=cargo_check_fmt
;;
fmt*)
CMD=cargo_fmt
;;
clippy)
CMD=cargo_clippy
;;
*)
echo "Unknown operation: \"${op}\""
exit 1
;;
esac
echo "Operation is: ${CMD}"
# On src/
$CMD
# Check examples/
for dir in examples/*/
do
dir=${dir%*/}
(
cd "${dir%*/}"
$CMD
)
done
(
cd verify/verify
$CMD
)
(
cd micro_benchmarks
$CMD
)

Просмотреть файл

@ -1,21 +0,0 @@
#!/usr/bin/env sh
# Copyright 2016 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
set -ex
ANDROID_NDK_URL=https://dl.google.com/android/repository
ANDROID_NDK_ARCHIVE=android-ndk-r25b-linux.zip
curl -fO "$ANDROID_NDK_URL/$ANDROID_NDK_ARCHIVE"
unzip -q $ANDROID_NDK_ARCHIVE
rm $ANDROID_NDK_ARCHIVE
mv android-ndk-* ndk
rm -rf android-ndk-*

Просмотреть файл

@ -1,60 +0,0 @@
#!/usr/bin/env sh
# Copyright 2016 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
set -ex
# Prep the SDK and emulator
#
# Note that the update process requires that we accept a bunch of licenses, and
# we can't just pipe `yes` into it for some reason, so we take the same strategy
# located in https://github.com/appunite/docker by just wrapping it in a script
# which apparently magically accepts the licenses.
mkdir sdk
curl --retry 5 https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip -O
unzip -d sdk sdk-tools-linux-3859397.zip
case "$1" in
arm | armv7)
abi=armeabi-v7a
;;
aarch64)
abi=arm64-v8a
;;
i686)
abi=x86
;;
x86_64)
abi=x86_64
;;
*)
echo "invalid arch: $1"
exit 1
;;
esac;
# --no_https avoids
# javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: No trusted certificate found
yes | ./sdk/tools/bin/sdkmanager --licenses --no_https
yes | ./sdk/tools/bin/sdkmanager --no_https \
"emulator" \
"platform-tools" \
"platforms;android-24" \
"system-images;android-24;default;$abi"
echo "no" |
./sdk/tools/bin/avdmanager create avd \
--name "${1}" \
--package "system-images;android-24;default;$abi"

Просмотреть файл

@ -1,56 +0,0 @@
#!/usr/bin/env bash
# Copyright 2017 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
set -ex
URL=https://dl.google.com/android/repository/sys-img/android
main() {
local arch="${1}"
local name="${2}"
local dest=/system
local td
td="$(mktemp -d)"
apt-get install --no-install-recommends e2tools
pushd "${td}"
curl --retry 5 -O "${URL}/${name}"
unzip -q "${name}"
local system
system="$(find . -name system.img)"
mkdir -p ${dest}/{bin,lib,lib64}
# Extract android linker and libraries to /system
# This allows android executables to be run directly (or with qemu)
if [ "${arch}" = "x86_64" ] || [ "${arch}" = "arm64" ]; then
e2cp -p "${system}:/bin/linker64" "${dest}/bin/"
e2cp -p "${system}:/lib64/libdl.so" "${dest}/lib64/"
e2cp -p "${system}:/lib64/libc.so" "${dest}/lib64/"
e2cp -p "${system}:/lib64/libm.so" "${dest}/lib64/"
else
e2cp -p "${system}:/bin/linker" "${dest}/bin/"
e2cp -p "${system}:/lib/libdl.so" "${dest}/lib/"
e2cp -p "${system}:/lib/libc.so" "${dest}/lib/"
e2cp -p "${system}:/lib/libm.so" "${dest}/lib/"
fi
# clean up
apt-get purge --auto-remove -y e2tools
popd
rm -rf "${td}"
}
main "${@}"

32
third_party/rust/packed_simd/ci/benchmark.sh поставляемый
Просмотреть файл

@ -1,32 +0,0 @@
#!/usr/bin/env bash
#
# Runs all benchmarks. Controlled by the following environment variables:
#
# FEATURES={} - cargo features to pass to all benchmarks (e.g. core_arch,sleef-sys,ispc)
# NORUN={1} - only builds the benchmarks
set -ex
if [[ ${NORUN} != 1 ]]; then
# Most benchmarks require hyperfine; require it upfront.
hash hyperfine 2>/dev/null || { echo >&2 "hyperfine is not in PATH."; exit 1; }
fi
# If the ispc benchmark feature is enabled, ispc must be in the path of the
# benchmarks.
if echo "$FEATURES" | grep -q "ispc"; then
hash ispc 2>/dev/null || { echo >&2 "ispc is not in PATH."; exit 1; }
fi
# An example with a benchmark.sh is a benchmark:
for dir in examples/*/
do
dir=${dir%*/}
cd ${dir%*/}
if [ -f "benchmark.sh" ]; then
./benchmark.sh
fi
cd -
done

Просмотреть файл

@ -1,176 +0,0 @@
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// This is a script to deploy and execute a binary on an iOS simulator.
// The primary use of this is to be able to run unit tests on the simulator and
// retrieve the results.
//
// To do this through Cargo instead, use Dinghy
// (https://github.com/snipsco/dinghy): cargo dinghy install, then cargo dinghy
// test.
use std::env;
use std::fs::{self, File};
use std::io::Write;
use std::path::Path;
use std::process;
use std::process::Command;
macro_rules! t {
($e:expr) => (match $e {
Ok(e) => e,
Err(e) => panic!("{} failed with: {}", stringify!($e), e),
})
}
// Step one: Wrap as an app
fn package_as_simulator_app(crate_name: &str, test_binary_path: &Path) {
println!("Packaging simulator app");
drop(fs::remove_dir_all("ios_simulator_app"));
t!(fs::create_dir("ios_simulator_app"));
t!(fs::copy(test_binary_path,
Path::new("ios_simulator_app").join(crate_name)));
let mut f = t!(File::create("ios_simulator_app/Info.plist"));
t!(f.write_all(format!(r#"
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC
"-//Apple//DTD PLIST 1.0//EN"
"http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleExecutable</key>
<string>{}</string>
<key>CFBundleIdentifier</key>
<string>com.rust.unittests</string>
</dict>
</plist>
"#, crate_name).as_bytes()));
}
// Step two: Start the iOS simulator
fn start_simulator() {
println!("Looking for iOS simulator");
let output = t!(Command::new("xcrun").arg("simctl").arg("list").output());
assert!(output.status.success());
let mut simulator_exists = false;
let mut simulator_booted = false;
let mut found_rust_sim = false;
let stdout = t!(String::from_utf8(output.stdout));
for line in stdout.lines() {
if line.contains("rust_ios") {
if found_rust_sim {
panic!("Duplicate rust_ios simulators found. Please \
double-check xcrun simctl list.");
}
simulator_exists = true;
simulator_booted = line.contains("(Booted)");
found_rust_sim = true;
}
}
if simulator_exists == false {
println!("Creating iOS simulator");
Command::new("xcrun")
.arg("simctl")
.arg("create")
.arg("rust_ios")
.arg("com.apple.CoreSimulator.SimDeviceType.iPhone-SE")
.arg("com.apple.CoreSimulator.SimRuntime.iOS-10-2")
.check_status();
} else if simulator_booted == true {
println!("Shutting down already-booted simulator");
Command::new("xcrun")
.arg("simctl")
.arg("shutdown")
.arg("rust_ios")
.check_status();
}
println!("Starting iOS simulator");
// We can't uninstall the app (if present) as that will hang if the
// simulator isn't completely booted; just erase the simulator instead.
Command::new("xcrun").arg("simctl").arg("erase").arg("rust_ios").check_status();
Command::new("xcrun").arg("simctl").arg("boot").arg("rust_ios").check_status();
}
// Step three: Install the app
fn install_app_to_simulator() {
println!("Installing app to simulator");
Command::new("xcrun")
.arg("simctl")
.arg("install")
.arg("booted")
.arg("ios_simulator_app/")
.check_status();
}
// Step four: Run the app
fn run_app_on_simulator() {
println!("Running app");
let output = t!(Command::new("xcrun")
.arg("simctl")
.arg("launch")
.arg("--console")
.arg("booted")
.arg("com.rust.unittests")
.output());
println!("stdout --\n{}\n", String::from_utf8_lossy(&output.stdout));
println!("stderr --\n{}\n", String::from_utf8_lossy(&output.stderr));
let stdout = String::from_utf8_lossy(&output.stdout);
let failed = stdout.lines()
.find(|l| l.contains("FAILED"))
.map(|l| l.contains("FAILED"))
.unwrap_or(false);
let passed = stdout.lines()
.find(|l| l.contains("test result: ok"))
.map(|l| l.contains("test result: ok"))
.unwrap_or(false);
println!("Shutting down simulator");
Command::new("xcrun")
.arg("simctl")
.arg("shutdown")
.arg("rust_ios")
.check_status();
if !(passed && !failed) {
panic!("tests didn't pass");
}
}
trait CheckStatus {
fn check_status(&mut self);
}
impl CheckStatus for Command {
fn check_status(&mut self) {
println!("\trunning: {:?}", self);
assert!(t!(self.status()).success());
}
}
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() != 2 {
println!("Usage: {} <executable>", args[0]);
process::exit(-1);
}
let test_binary_path = Path::new(&args[1]);
let crate_name = test_binary_path.file_name().unwrap();
package_as_simulator_app(crate_name.to_str().unwrap(), test_binary_path);
start_simulator();
install_app_to_simulator();
run_app_on_simulator();
}

Просмотреть файл

@ -1,47 +0,0 @@
FROM ubuntu:16.04
RUN dpkg --add-architecture i386 && \
apt-get update && \
apt-get install -y --no-install-recommends \
file \
make \
curl \
ca-certificates \
python \
unzip \
expect \
openjdk-9-jre \
libstdc++6:i386 \
libpulse0 \
gcc \
libc6-dev
WORKDIR /android/
COPY android* /android/
ENV ANDROID_ARCH=aarch64
ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
RUN mv /root/.android /tmp
RUN chmod 777 -R /tmp/.android
RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
ENV PATH=$PATH:/rust/bin \
CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER=aarch64-linux-android-gcc \
CARGO_TARGET_AARCH64_LINUX_ANDROID_RUNNER=/tmp/runtest \
OBJDUMP=aarch64-linux-android-objdump \
HOME=/tmp
ADD runtest-android.rs /tmp/runtest.rs
ENTRYPOINT [ \
"bash", \
"-c", \
# set SHELL so android can detect a 64bits system, see
# http://stackoverflow.com/a/41789144
"SHELL=/bin/dash /android/sdk/emulator/emulator @aarch64 -no-window & \
rustc /tmp/runtest.rs -o /tmp/runtest && \
exec \"$@\"", \
"--" \
]

Просмотреть файл

@ -1,14 +0,0 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
ca-certificates \
libc6-dev \
gcc-aarch64-linux-gnu \
libc6-dev-arm64-cross \
qemu-user \
make \
file
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" \
OBJDUMP=aarch64-linux-gnu-objdump

Просмотреть файл

@ -1,15 +0,0 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
ca-certificates \
libc6-dev \
libc6-armel-cross \
libc6-dev-armel-cross \
binutils-arm-linux-gnueabi \
gcc-arm-linux-gnueabi \
qemu-user \
make \
file
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc \
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER="qemu-arm -L /usr/arm-linux-gnueabi" \
OBJDUMP=arm-linux-gnueabi-objdump

Просмотреть файл

@ -1,13 +0,0 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
ca-certificates \
libc6-dev \
gcc-arm-linux-gnueabihf \
libc6-dev-armhf-cross \
qemu-user \
make \
file
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
OBJDUMP=arm-linux-gnueabihf-objdump

Просмотреть файл

@ -1,47 +0,0 @@
FROM ubuntu:16.04
RUN dpkg --add-architecture i386 && \
apt-get update && \
apt-get install -y --no-install-recommends \
file \
make \
curl \
ca-certificates \
python \
unzip \
expect \
openjdk-9-jre \
libstdc++6:i386 \
libpulse0 \
gcc \
libc6-dev
WORKDIR /android/
COPY android* /android/
ENV ANDROID_ARCH=arm
ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
RUN mv /root/.android /tmp
RUN chmod 777 -R /tmp/.android
RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
ENV PATH=$PATH:/rust/bin \
CARGO_TARGET_ARM_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \
CARGO_TARGET_ARM_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \
OBJDUMP=arm-linux-androideabi-objdump \
HOME=/tmp
ADD runtest-android.rs /tmp/runtest.rs
ENTRYPOINT [ \
"bash", \
"-c", \
# set SHELL so android can detect a 64bits system, see
# http://stackoverflow.com/a/41789144
"SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \
rustc /tmp/runtest.rs -o /tmp/runtest && \
exec \"$@\"", \
"--" \
]

Просмотреть файл

@ -1,13 +0,0 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
ca-certificates \
libc6-dev \
gcc-arm-linux-gnueabihf \
libc6-dev-armhf-cross \
qemu-user \
make \
file
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
OBJDUMP=arm-linux-gnueabihf-objdump

Просмотреть файл

@ -1,7 +0,0 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc-multilib \
libc6-dev \
file \
make \
ca-certificates

Просмотреть файл

@ -1,7 +0,0 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc-multilib \
libc6-dev \
file \
make \
ca-certificates

Просмотреть файл

@ -1,13 +0,0 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-mips-linux-gnu libc6-dev-mips-cross \
qemu-system-mips \
qemu-user \
make \
file
ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \
CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER="qemu-mips -L /usr/mips-linux-gnu" \
OBJDUMP=mips-linux-gnu-objdump

Просмотреть файл

@ -1,10 +0,0 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-mips64-linux-gnuabi64 libc6-dev-mips64-cross \
qemu-system-mips64 qemu-user
ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \
CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64 -L /usr/mips64-linux-gnuabi64" \
OBJDUMP=mips64-linux-gnuabi64-objdump

Просмотреть файл

@ -1,10 +0,0 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross \
qemu-system-mips64el
ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \
CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64el -L /usr/mips64el-linux-gnuabi64" \
OBJDUMP=mips64el-linux-gnuabi64-objdump

Просмотреть файл

@ -1,25 +0,0 @@
FROM ubuntu:18.10
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
gcc \
libc6-dev \
make \
qemu-user \
qemu-system-mips \
bzip2 \
curl \
file
RUN mkdir /toolchain
# Note that this originally came from:
# https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2
RUN curl -L https://ci-mirrors.rust-lang.org/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \
tar xjf - -C /toolchain --strip-components=2
ENV PATH=$PATH:/rust/bin:/toolchain/bin \
CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \
CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \
CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain"

Просмотреть файл

@ -1,13 +0,0 @@
FROM ubuntu:22.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \
qemu-system-ppc \
make \
file
ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \
CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu Vger -L /usr/powerpc-linux-gnu" \
CC=powerpc-linux-gnu-gcc \
OBJDUMP=powerpc-linux-gnu-objdump

Просмотреть файл

@ -1,17 +0,0 @@
FROM ubuntu:22.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
ca-certificates \
libc6-dev \
gcc-powerpc64-linux-gnu \
libc6-dev-ppc64-cross \
qemu-user \
qemu-system-ppc \
make \
file
ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \
CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64 -L /usr/powerpc64-linux-gnu" \
CC=powerpc64-linux-gnu-gcc \
OBJDUMP=powerpc64-linux-gnu-objdump

Просмотреть файл

@ -1,11 +0,0 @@
FROM ubuntu:22.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \
qemu-system-ppc file make
ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \
CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64le -L /usr/powerpc64le-linux-gnu" \
CC=powerpc64le-linux-gnu-gcc \
OBJDUMP=powerpc64le-linux-gnu-objdump

Просмотреть файл

@ -1,20 +0,0 @@
FROM ubuntu:22.04
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
curl \
cmake \
gcc \
libc6-dev \
g++-s390x-linux-gnu \
libc6-dev-s390x-cross \
qemu-user \
make \
file
ENV CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_LINKER=s390x-linux-gnu-gcc \
CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_RUNNER="qemu-s390x -L /usr/s390x-linux-gnu" \
CC_s390x_unknown_linux_gnu=s390x-linux-gnu-gcc \
CXX_s390x_unknown_linux_gnu=s390x-linux-gnu-g++ \
OBJDUMP=s390x-linux-gnu-objdump

Просмотреть файл

@ -1,18 +0,0 @@
FROM debian:bookworm
RUN apt-get update && apt-get install -y --no-install-recommends \
curl ca-certificates \
gcc libc6-dev \
gcc-sparc64-linux-gnu libc6-dev-sparc64-cross \
qemu-system-sparc64 openbios-sparc seabios ipxe-qemu \
p7zip-full cpio
COPY linux-sparc64.sh /
RUN bash /linux-sparc64.sh
COPY test-runner-linux /
ENV CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_LINKER=sparc64-linux-gnu-gcc \
CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_RUNNER="/test-runner-linux sparc64" \
CC_sparc64_unknown_linux_gnu=sparc64-linux-gnu-gcc \
PATH=$PATH:/rust/bin

Просмотреть файл

@ -1,47 +0,0 @@
FROM ubuntu:16.04
RUN dpkg --add-architecture i386 && \
apt-get update && \
apt-get install -y --no-install-recommends \
file \
make \
curl \
ca-certificates \
python \
unzip \
expect \
openjdk-9-jre \
libstdc++6:i386 \
libpulse0 \
gcc \
libc6-dev
WORKDIR /android/
COPY android* /android/
ENV ANDROID_ARCH=arm
ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
RUN mv /root/.android /tmp
RUN chmod 777 -R /tmp/.android
RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
ENV PATH=$PATH:/rust/bin \
CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \
CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \
OBJDUMP=arm-linux-androideabi-objdump \
HOME=/tmp
ADD runtest-android.rs /tmp/runtest.rs
ENTRYPOINT [ \
"bash", \
"-c", \
# set SHELL so android can detect a 64bits system, see
# http://stackoverflow.com/a/41789144
"SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \
rustc /tmp/runtest.rs -o /tmp/runtest && \
exec \"$@\"", \
"--" \
]

Просмотреть файл

@ -1,13 +0,0 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
ca-certificates \
libc6-dev \
gcc-arm-linux-gnueabihf \
libc6-dev-armhf-cross \
qemu-user \
make \
file
ENV CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
OBJDUMP=arm-linux-gnueabihf-objdump

Просмотреть файл

@ -1,39 +0,0 @@
FROM ubuntu:22.04
RUN apt-get update -y && apt-get install -y --no-install-recommends \
ca-certificates \
clang \
cmake \
curl \
git \
libc6-dev \
make \
ninja-build \
python-is-python3 \
xz-utils
# Install `wasm2wat`
RUN git clone --recursive https://github.com/WebAssembly/wabt
RUN make -C wabt -j$(nproc)
ENV PATH=$PATH:/wabt/bin
# Install `wasm-bindgen-test-runner`
RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.87/wasm-bindgen-0.2.87-x86_64-unknown-linux-musl.tar.gz \
| tar xzf -
# Keep in sync with the version on Cargo.toml.
ENV PATH=$PATH:/wasm-bindgen-0.2.87-x86_64-unknown-linux-musl
ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner
# Install `node`
RUN curl https://nodejs.org/dist/v14.16.0/node-v14.16.0-linux-x64.tar.xz | tar xJf -
ENV PATH=$PATH:/node-v14.16.0-linux-x64/bin
# We use a shim linker that removes `--strip-debug` when passed to LLD. While
# this typically results in invalid debug information in release mode it doesn't
# result in an invalid names section which is what we're interested in.
COPY lld-shim.rs /
ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_LINKER=/tmp/lld-shim
# Rustc isn't available until this container starts, so defer compilation of the
# shim.
ENTRYPOINT /rust/bin/rustc /lld-shim.rs -o /tmp/lld-shim && exec bash "$@"

Просмотреть файл

@ -1,31 +0,0 @@
FROM ubuntu:20.04
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
curl \
gcc \
libc-dev \
python \
unzip \
file \
make
WORKDIR /android/
ENV ANDROID_ARCH=x86_64
COPY android-install-ndk.sh /android/
RUN sh /android/android-install-ndk.sh
ENV STDARCH_ASSERT_INSTR_LIMIT=30
# We do not run x86_64-linux-android tests on an android emulator.
# See ci/android-sysimage.sh for informations about how tests are run.
COPY android-sysimage.sh /android/
RUN bash /android/android-sysimage.sh x86_64 x86_64-24_r07.zip
ENV PATH=$PATH:/rust/bin:/android/ndk/toolchains/llvm/prebuilt/linux-x86_64/bin \
CARGO_TARGET_X86_64_LINUX_ANDROID_LINKER=x86_64-linux-android21-clang \
CC_x86_64_linux_android=x86_64-linux-android21-clang \
CXX_x86_64_linux_android=x86_64-linux-android21-clang++ \
OBJDUMP=llvm-objdump \
HOME=/tmp

Просмотреть файл

@ -1,16 +0,0 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libc6-dev \
file \
make \
ca-certificates \
wget \
bzip2 \
cmake \
libclang-dev \
clang
RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.16.0-2018-01-30-lin.tar.bz2
RUN tar -xjf sde-external-8.16.0-2018-01-30-lin.tar.bz2
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.16.0-2018-01-30-lin/sde64 --"

Просмотреть файл

@ -1,10 +0,0 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libc6-dev \
file \
make \
ca-certificates \
cmake \
libclang-dev \
clang

27
third_party/rust/packed_simd/ci/dox.sh поставляемый
Просмотреть файл

@ -1,27 +0,0 @@
#!/bin/sh
set -ex
rm -rf target/doc
mkdir -p target/doc
# Build API documentation
cargo doc --features=into_bits
# Build Performance Guide
# FIXME: https://github.com/rust-lang-nursery/mdBook/issues/780
# mdbook build perf-guide -d target/doc/perf-guide
cd perf-guide
mdbook build
cd -
cp -r perf-guide/book target/doc/perf-guide
# If we're on travis, not a PR, and on the right branch, publish!
if [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$TRAVIS_BRANCH" = "master" ]; then
python3 -vV
pip -vV
python3.9 -vV
pip install ghp_import --user
ghp-import -n target/doc
git push -qf https://${GH_PAGES}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
fi

Просмотреть файл

@ -1,18 +0,0 @@
set -ex
mkdir -m 777 /qemu
cd /qemu
curl -LO https://github.com/qemu/qemu/raw/master/pc-bios/s390-ccw.img
curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/kernel.debian
curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/initrd.debian
mv kernel.debian kernel
mv initrd.debian initrd.gz
mkdir init
cd init
gunzip -c ../initrd.gz | cpio -id
rm ../initrd.gz
cp /usr/s390x-linux-gnu/lib/libgcc_s.so.1 usr/lib/
chmod a+w .

Просмотреть файл

@ -1,17 +0,0 @@
set -ex
mkdir -m 777 /qemu
cd /qemu
curl -LO https://cdimage.debian.org/cdimage/ports/9.0/sparc64/iso-cd/debian-9.0-sparc64-NETINST-1.iso
7z e debian-9.0-sparc64-NETINST-1.iso boot/initrd.gz
7z e debian-9.0-sparc64-NETINST-1.iso boot/sparc64
mv sparc64 kernel
rm debian-9.0-sparc64-NETINST-1.iso
mkdir init
cd init
gunzip -c ../initrd.gz | cpio -id
rm ../initrd.gz
cp /usr/sparc64-linux-gnu/lib/libgcc_s.so.1 usr/lib/
chmod a+w .

11
third_party/rust/packed_simd/ci/lld-shim.rs поставляемый
Просмотреть файл

@ -1,11 +0,0 @@
use std::os::unix::prelude::*;
use std::process::Command;
use std::env;
fn main() {
let args = env::args()
.skip(1)
.filter(|s| s != "--strip-debug")
.collect::<Vec<_>>();
panic!("failed to exec: {}", Command::new("rust-lld").args(&args).exec());
}

Просмотреть файл

@ -1,17 +0,0 @@
#!/usr/bin/env sh
set -x
export success=true
find . -iname '*.rs' | while read -r file; do
result=$(grep '.\{79\}' "${file}" | grep --invert 'http')
if [ "${result}" = "" ]
then
:
else
echo "file \"${file}\": $result"
exit 1
fi
done

38
third_party/rust/packed_simd/ci/run-docker.sh поставляемый
Просмотреть файл

@ -1,38 +0,0 @@
# Small script to run tests for a target (or all targets) inside all the
# respective docker images.
set -ex
run() {
echo "Building docker container for TARGET=${TARGET} RUSTFLAGS=${RUSTFLAGS}"
docker build -t packed_simd -f ci/docker/${TARGET}/Dockerfile ci/
mkdir -p target
target=$(echo "${TARGET}" | sed 's/-emulated//')
echo "Running docker"
docker run \
--user `id -u`:`id -g` \
--rm \
--init \
--volume $HOME/.cargo:/cargo \
--env CARGO_HOME=/cargo \
--volume `rustc --print sysroot`:/rust:ro \
--env TARGET=$target \
--env NORUN \
--env NOVERIFY \
--env RUSTFLAGS \
--volume `pwd`:/checkout:ro \
--volume `pwd`/target:/checkout/target \
--workdir /checkout \
--privileged \
packed_simd \
bash \
-c 'PATH=$PATH:/rust/bin exec ci/run.sh'
}
if [ -z "${TARGET}" ]; then
for d in `ls ci/docker/`; do
run $d
done
else
run ${TARGET}
fi

99
third_party/rust/packed_simd/ci/run.sh поставляемый
Просмотреть файл

@ -1,99 +0,0 @@
#!/usr/bin/env bash
set -ex
: ${TARGET?"The TARGET environment variable must be set."}
# Tests are all super fast anyway, and they fault often enough on travis that
# having only one thread increases debuggability to be worth it.
#export RUST_TEST_THREADS=1
#export RUST_BACKTRACE=full
#export RUST_TEST_NOCAPTURE=1
# Some appveyor builds run out-of-memory; this attempts to mitigate that:
# https://github.com/rust-lang-nursery/packed_simd/issues/39
# export RUSTFLAGS="${RUSTFLAGS} -C codegen-units=1"
# export CARGO_BUILD_JOBS=1
export CARGO_SUBCMD=test
if [[ "${NORUN}" == "1" ]]; then
export CARGO_SUBCMD=build
fi
if [[ ${TARGET} == "x86_64-apple-ios" ]] || [[ ${TARGET} == "i386-apple-ios" ]]; then
export RUSTFLAGS="${RUSTFLAGS} -Clink-arg=-mios-simulator-version-min=7.0"
rustc ./ci/deploy_and_run_on_ios_simulator.rs -o $HOME/runtest
export CARGO_TARGET_X86_64_APPLE_IOS_RUNNER=$HOME/runtest
export CARGO_TARGET_I386_APPLE_IOS_RUNNER=$HOME/runtest
fi
# The source directory is read-only. Need to copy internal crates to the target
# directory for their Cargo.lock to be properly written.
mkdir target || true
rustc --version
cargo --version
echo "TARGET=${TARGET}"
echo "HOST=${HOST}"
echo "RUSTFLAGS=${RUSTFLAGS}"
echo "NORUN=${NORUN}"
echo "NOVERIFY=${NOVERIFY}"
echo "CARGO_SUBCMD=${CARGO_SUBCMD}"
echo "CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS}"
echo "CARGO_INCREMENTAL=${CARGO_INCREMENTAL}"
echo "RUST_TEST_THREADS=${RUST_TEST_THREADS}"
echo "RUST_BACKTRACE=${RUST_BACKTRACE}"
echo "RUST_TEST_NOCAPTURE=${RUST_TEST_NOCAPTURE}"
cargo_test() {
cmd="cargo ${CARGO_SUBCMD} --verbose --target=${TARGET} ${@}"
if [ "${NORUN}" != "1" ]
then
if [ "$TARGET" != "wasm32-unknown-unknown" ]
then
cmd="$cmd -- --quiet"
fi
fi
mkdir target || true
${cmd} 2>&1 | tee > target/output
if [[ ${PIPESTATUS[0]} != 0 ]]; then
cat target/output
return 1
fi
}
cargo_test_impl() {
ORIGINAL_RUSTFLAGS=${RUSTFLAGS}
RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v16 --cfg test_v32 --cfg test_v64" cargo_test ${@}
RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v128 --cfg test_v256" cargo_test ${@}
RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v512" cargo_test ${@}
RUSTFLAGS=${ORIGINAL_RUSTFLAGS}
}
# Debug run:
if [[ "${TARGET}" != "wasm32-unknown-unknown" ]]; then
# Run wasm32-unknown-unknown in release mode only
cargo_test_impl
fi
if [[ "${TARGET}" == "x86_64-unknown-linux-gnu" ]] || [[ "${TARGET}" == "x86_64-pc-windows-msvc" ]]; then
# use sleef on linux and windows x86_64 builds
# FIXME: Use `core_arch,sleef-sys` features once they works again
cargo_test_impl --release --features=into_bits
else
# FIXME: Use `core_arch` feature once it works again
cargo_test_impl --release --features=into_bits
fi
# Verify code generation
if [[ "${NOVERIFY}" != "1" ]]; then
cp -r verify/verify target/verify
export STDSIMD_ASSERT_INSTR_LIMIT=30
if [[ "${TARGET}" == "i586-unknown-linux-gnu" ]]; then
export STDSIMD_ASSERT_INSTR_LIMIT=50
fi
cargo_test --release --manifest-path=target/verify/Cargo.toml
fi
# FIXME: Figure out which examples take too long to run and ignore or adjust those
#. ci/run_examples.sh

Просмотреть файл

@ -1,51 +0,0 @@
# Runs all examples.
# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/55
# All examples fail to build for `armv7-apple-ios`.
if [[ ${TARGET} == "armv7-apple-ios" ]]; then
exit 0
fi
# FIXME: travis exceeds 50 minutes on these targets
# Skipping the examples is an attempt at preventing travis from timing-out
if [[ ${TARGET} == "arm-linux-androidabi" ]] || [[ ${TARGET} == "aarch64-linux-androidabi" ]] \
|| [[ ${TARGET} == "sparc64-unknown-linux-gnu" ]]; then
exit 0
fi
if [[ ${TARGET} == "wasm32-unknown-unknown" ]]; then
exit 0
fi
cp -r examples/aobench target/aobench
cargo_test --manifest-path=target/aobench/Cargo.toml --release --no-default-features
cargo_test --manifest-path=target/aobench/Cargo.toml --release --features=256bit
cp -r examples/dot_product target/dot_product
cargo_test --manifest-path=target/dot_product/Cargo.toml --release
cp -r examples/fannkuch_redux target/fannkuch_redux
cargo_test --manifest-path=target/fannkuch_redux/Cargo.toml --release
# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/56
if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then
cp -r examples/mandelbrot target/mandelbrot
cargo_test --manifest-path=target/mandelbrot/Cargo.toml --release
fi
cp -r examples/matrix_inverse target/matrix_inverse
cargo_test --manifest-path=target/matrix_inverse/Cargo.toml --release
cp -r examples/nbody target/nbody
cargo_test --manifest-path=target/nbody/Cargo.toml --release
cp -r examples/spectral_norm target/spectral_norm
cargo_test --manifest-path=target/spectral_norm/Cargo.toml --release
if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then
cp -r examples/stencil target/stencil
cargo_test --manifest-path=target/stencil/Cargo.toml --release
fi
cp -r examples/triangle_xform target/triangle_xform
cargo_test --manifest-path=target/triangle_xform/Cargo.toml --release

Просмотреть файл

@ -1,45 +0,0 @@
use std::env;
use std::process::Command;
use std::path::{Path, PathBuf};
fn main() {
let args = env::args_os()
.skip(1)
.filter(|arg| arg != "--quiet")
.collect::<Vec<_>>();
assert_eq!(args.len(), 1);
let test = PathBuf::from(&args[0]);
let dst = Path::new("/data/local/tmp").join(test.file_name().unwrap());
let status = Command::new("adb")
.arg("wait-for-device")
.status()
.expect("failed to run: adb wait-for-device");
assert!(status.success());
let status = Command::new("adb")
.arg("push")
.arg(&test)
.arg(&dst)
.status()
.expect("failed to run: adb pushr");
assert!(status.success());
let output = Command::new("adb")
.arg("shell")
.arg(&dst)
.output()
.expect("failed to run: adb shell");
assert!(status.success());
println!("status: {}\nstdout ---\n{}\nstderr ---\n{}",
output.status,
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr));
let stdout = String::from_utf8_lossy(&output.stdout);
let mut lines = stdout.lines().filter(|l| l.starts_with("test result"));
if !lines.all(|l| l.contains("test result: ok") && l.contains("0 failed")) {
panic!("failed to find successful test run");
}
}

Просмотреть файл

@ -1,7 +0,0 @@
#!/usr/bin/env bash
set -ex
# Get latest ISPC binary for the target and put it in the path
git clone https://github.com/gnzlbg/ispc-binaries
cp ispc-binaries/ispc-${TARGET} ispc

Просмотреть файл

@ -1,24 +0,0 @@
#!/bin/sh
set -e
arch=$1
prog=$2
cd /qemu/init
cp -f $2 prog
find . | cpio --create --format='newc' --quiet | gzip > ../initrd.gz
cd ..
timeout 30s qemu-system-$arch \
-m 1024 \
-nographic \
-kernel kernel \
-initrd initrd.gz \
-append init=/prog > output || true
# remove kernel messages
tr -d '\r' < output | egrep -v '^\['
# if the output contains a failure, return error
! grep FAILED output > /dev/null

67
third_party/rust/packed_simd/contributing.md поставляемый
Просмотреть файл

@ -1,67 +0,0 @@
# Contributing to `packed_simd`
Welcome! If you are reading this document, it means you are interested in contributing
to the `packed_simd` crate.
## Reporting issues
All issues with this crate are tracked using GitHub's [Issue Tracker].
You can use issues to bring bugs to the attention of the maintainers, to discuss
certain problems encountered with the crate, or to request new features (although
feature requests should be limited to things mentioned in the [RFC]).
One thing to keep in mind is to always use the **latest** nightly toolchain when
working on this crate. Due to the nature of this project, we use a lot of unstable
features, meaning breakage happens often.
[Issue Tracker]: https://github.com/rust-lang-nursery/packed_simd/issues
[RFC]: https://github.com/rust-lang/rfcs/pull/2366
### LLVM issues
The Rust compiler relies on [LLVM](https://llvm.org/) for machine code generation,
and quite a few LLVM bugs have been discovered during the development of this project.
If you encounter issues with incorrect/suboptimal codegen, which you do not encounter
when using the [SIMD vendor intrinsics](https://doc.rust-lang.org/nightly/std/arch/),
it is likely the issue is with LLVM, or this crate's interaction with it.
You should first open an issue **in this repo** to help us track the problem, and we
will help determine what is the exact cause of the problem.
If LLVM is indeed the cause, the issue will be reported upstream to the
[LLVM bugtracker](https://bugs.llvm.org/).
## Submitting Pull Requests
New code is submitted to the crate using GitHub's [pull request] mechanism.
You should first fork this repository, make your changes (preferably in a new
branch), then use GitHub's web UI to create a new PR.
[pull request]: https://help.github.com/articles/about-pull-requests/
### Examples
The `examples` directory contains code showcasing SIMD code written with this crate,
usually in comparison to scalar or ISPC code. If you have a project / idea which
uses SIMD, we'd love to add it to the examples list.
Every example should include a small `README`, describing the example code's purpose.
If your example could potentially work as a benchmark, then add a `benchmark.sh`
script to allow running the example benchmark code in CI. See an existing example's
[`benchmark.sh`](examples/aobench/benchmark.sh) for a sample.
Don't forget to update the crate's top-level `README` with a link to your example.
### Perf guide
The objective of the [performance guide][perf-guide] is to be a comprehensive
resource detailing the process of optimizing Rust code with SIMD support.
If you believe a certain section could be reworded, or if you have any tips & tricks
related to SIMD which you'd like to share, please open a PR.
[mdBook] is used to manage the formatting of the guide as a book.
[perf-guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/
[mdBook]: https://github.com/rust-lang-nursery/mdBook

Просмотреть файл

@ -1,12 +0,0 @@
[book]
authors = ["Gonzalo Brito Gadeschi", "Gabriel Majeri"]
multilingual = false
src = "src"
title = "Rust SIMD Performance Guide"
description = "This book describes how to write performant SIMD code in Rust."
[build]
create-missing = false
[output.html]
additional-css = ["./src/ascii.css"]

Просмотреть файл

@ -1,21 +0,0 @@
# Summary
[Introduction](./introduction.md)
- [Floating-point Math](./float-math/fp.md)
- [Short-vector Math Library](./float-math/svml.md)
- [Approximate functions](./float-math/approx.md)
- [Fused multiply-accumulate](./float-math/fma.md)
- [Target features](./target-feature/features.md)
- [Using `RUSTFLAGS`](./target-feature/rustflags.md)
- [Using the `target_feature` attribute](./target-feature/attribute.md)
- [Interaction with inlining](./target-feature/inlining.md)
- [Detecting features at runtime](./target-feature/runtime.md)
- [Bounds checking](./bound_checks.md)
- [Vertical and horizontal operations](./vert-hor-ops.md)
- [Performance profiling](./prof/profiling.md)
- [Profiling on Linux](./prof/linux.md)
- [Using machine code analyzers](./prof/mca.md)

Просмотреть файл

@ -1,4 +0,0 @@
code {
/* "Source Code Pro" breaks ASCII art */
font-family: Consolas, "Ubuntu Mono", Menlo, "DejaVu Sans Mono", monospace;
}

Просмотреть файл

@ -1,22 +0,0 @@
# Bounds checking
Reading and writing packed vectors to/from slices is checked by default.
Independently of the configuration options used, the safe functions:
* `Simd<[T; N]>::from_slice_aligned(& s[..])`
* `Simd<[T; N]>::write_to_slice_aligned(&mut s[..])`
always check that:
* the slice is big enough to hold the vector
* the slice is suitably aligned to perform an aligned load/store for a `Simd<[T;
N]>` (this alignment is often much larger than that of `T`).
There are `_unaligned` versions that use unaligned load and stores, as well as
`unsafe` `_unchecked` that do not perform any checks iff `debug-assertions =
false` / `debug = false`. That is, the `_unchecked` methods do still assert size
and alignment in debug builds and could also do so in release builds depending
on the configuration options.
These assertions do often significantly impact performance and you should be
aware of them.

Просмотреть файл

@ -1,8 +0,0 @@
# Approximate functions
<!-- TODO:
Explain that they exists, that they are often _much_ faster, how to use them,
that people should check whether the error is good enough for their
applications. Explain that this error is currently unstable and might change.
-->

Просмотреть файл

@ -1,6 +0,0 @@
# Fused Multiply Add
<!-- TODO:
Explain that this is a compound operation, infinite precision, difference
between `mul_add` and `mul_adde`, that LLVM cannot do this by itself, etc.
-->

Просмотреть файл

@ -1,3 +0,0 @@
# Floating-point math
This chapter contains information pertaining to working with floating-point numbers.

Просмотреть файл

@ -1,7 +0,0 @@
# Short Vector Math Library
<!-- TODO:
Explain how is short-vector math performed by default (just scalarized libm calls).
Explain how to enable `sleef`, etc.
-->

Просмотреть файл

@ -1,26 +0,0 @@
# Introduction
## What is SIMD
<!-- TODO:
describe what SIMD is, which algorithms can benefit from it,
give usage examples
-->
## History of SIMD in Rust
<!-- TODO:
discuss history of unstable std::simd,
stabilization of std::arch, etc.
-->
## Discover packed_simd
<!-- TODO: describe scope of this project -->
Writing fast and portable SIMD algorithms using `packed_simd` is, unfortunately,
not trivial. There are many pitfals that one should be aware of, and some idioms
that help avoid those pitfalls.
This book attempts to document these best practices and provides practical examples
on how to apply the tips to _your_ code.

Просмотреть файл

@ -1,107 +0,0 @@
# Performance profiling on Linux
## Using `perf`
[perf](https://perf.wiki.kernel.org/) is the most powerful performance profiler
for Linux, featuring support for various hardware Performance Monitoring Units,
as well as integration with the kernel's performance events framework.
We will only look at how can the `perf` command can be used to profile SIMD code.
Full system profiling is outside of the scope of this book.
### Recording
The first step is to record a program's execution during an average workload.
It helps if you can isolate the parts of your program which have performance
issues, and set up a benchmark which can be easily (re)run.
Build the benchmark binary in release mode, after having enabled debug info:
```sh
$ cargo build --release
Finished release [optimized + debuginfo] target(s) in 0.02s
```
Then use the `perf record` subcommand:
```sh
$ perf record --call-graph=dwarf ./target/release/my-program
[ perf record: Woken up 10 times to write data ]
[ perf record: Captured and wrote 2,356 MB perf.data (292 samples) ]
```
Instead of using `--call-graph=dwarf`, which can become pretty slow, you can use
`--call-graph=lbr` if you have a processor with support for Last Branch Record
(i.e. Intel Haswell and newer).
`perf` will, by default, record the count of CPU cycles it takes to execute
various parts of your program. You can use the `-e` command line option
to enable other performance events, such as `cache-misses`. Use `perf list`
to get a list of all hardware counters supported by your CPU.
### Viewing the report
The next step is getting a bird's eye view of the program's execution.
`perf` provides a `ncurses`-based interface which will get you started.
Use `perf report` to open a visualization of your program's performance:
```sh
perf report --hierarchy -M intel
```
`--hierarchy` will display a tree-like structure of where your program spent
most of its time. `-M intel` enables disassembly output with Intel syntax, which
is subjectively more readable than the default AT&T syntax.
Here is the output from profiling the `nbody` benchmark:
```
- 100,00% nbody
- 94,18% nbody
+ 93,48% [.] nbody_lib::simd::advance
+ 0,70% [.] nbody_lib::run
+ 5,06% libc-2.28.so
```
If you move with the arrow keys to any node in the tree, you can the press `a`
to have `perf` _annotate_ that node. This means it will:
- disassemble the function
- associate every instruction with the percentage of time which was spent executing it
- interleaves the disassembly with the source code,
assuming it found the debug symbols
(you can use `s` to toggle this behaviour)
`perf` will, by default, open the instruction which it identified as being the
hottest spot in the function:
```
0,76 │ movapd xmm2,xmm0
0,38 │ movhlps xmm2,xmm0
│ addpd xmm2,xmm0
│ unpcklpd xmm1,xmm2
12,50 │ sqrtpd xmm0,xmm1
1,52 │ mulpd xmm0,xmm1
```
In this case, `sqrtpd` will be highlighted in red, since that's the instruction
which the CPU spends most of its time executing.
## Using Valgrind
Valgrind is a set of tools which initially helped C/C++ programmers find unsafe
memory accesses in their code. Nowadays the project also has
- a heap profiler called `massif`
- a cache utilization profiler called `cachegrind`
- a call-graph performance profiler called `callgrind`
<!--
TODO: explain valgrind's dynamic binary translation, warn about massive
slowdown, talk about `kcachegrind` for a GUI
-->

Просмотреть файл

@ -1,100 +0,0 @@
# Machine code analysis tools
## The microarchitecture of modern CPUs
While you might have heard of Instruction Set Architectures, such as `x86` or
`arm` or `mips`, the term _microarchitecture_ (also written here as _µ-arch_),
refers to the internal details of an actual family of CPUs, such as Intel's
_Haswell_ or AMD's _Jaguar_.
Replacing scalar code with SIMD code will improve performance on all CPUs
supporting the required vector extensions.
However, due to microarchitectural differences, the actual speed-up at
runtime might vary.
**Example**: a simple example arises when optimizing for AMD K8 CPUs.
The assembly generated for an empty function should look like this:
```asm
nop
ret
```
The `nop` is used to align the `ret` instruction for better performance.
However, the compiler will actually generated the following code:
```asm
repz ret
```
The `repz` instruction will repeat the following instruction until a certain
condition. Of course, in this situation, the function will simply immediately
return, and the `ret` instruction is still aligned.
However, AMD K8's branch predictor performs better with the latter code.
For those looking to absolutely maximize performance for a certain target µ-arch,
you will have to read some CPU manuals, or ask the compiler to do it for you
with `-C target-cpu`.
### Summary of CPU internals
Modern processors are able to execute instructions out-of-order for better performance,
by utilizing tricks such as [branch prediction], [instruction pipelining],
or [superscalar execution].
[branch prediction]: https://en.wikipedia.org/wiki/Branch_predictor
[instruction pipelining]: https://en.wikipedia.org/wiki/Instruction_pipelining
[superscalar execution]: https://en.wikipedia.org/wiki/Superscalar_processor
SIMD instructions are also subject to these optimizations, meaning it can get pretty
difficult to determine where the slowdown happens.
For example, if the profiler reports a store operation is slow, one of two things
could be happening:
- the store is limited by the CPU's memory bandwidth, which is actually an ideal
scenario, all things considered;
- memory bandwidth is nowhere near its peak, but the value to be stored is at the
end of a long chain of operations, and this store is where the profiler
encountered the pipeline stall;
Since most profilers are simple tools which don't understand the subtleties of
instruction scheduling, you
## Analyzing the machine code
Certain tools have knowledge of internal CPU microarchitecture, i.e. they know
- how many physical [register files] a CPU actually has
- what is the latency / throughtput of an instruction
- what [µ-ops] are generated for a set of instructions
and many other architectural details.
[register files]: https://en.wikipedia.org/wiki/Register_file
[µ-ops]: https://en.wikipedia.org/wiki/Micro-operation
These tools are therefore able to provide accurate information as to why some
instructions are inefficient, and where the bottleneck is.
The disadvantage is that the output of these tools requires advanced knowledge
of the target architecture to understand, i.e. they **cannot** point out what
the cause of the issue is explicitly.
## Intel's Architecture Code Analyzer (IACA)
[IACA] is a free tool offered by Intel for analyzing the performance of various
computational kernels.
Being a proprietary, closed source tool, it _only_ supports Intel's µ-arches.
[IACA]: https://software.intel.com/en-us/articles/intel-architecture-code-analyzer
## llvm-mca
<!--
TODO: once LLVM 7 gets released, write a chapter on using llvm-mca
with SIMD disassembly.
-->

Просмотреть файл

@ -1,14 +0,0 @@
# Performance profiling
While the rest of the book provides practical advice on how to improve the performance
of SIMD code, this chapter is dedicated to [**performance profiling**][profiling].
Profiling consists of recording a program's execution in order to identify program
hotspots.
**Important**: most profilers require debug information in order to accurately
link the program hotspots back to the corresponding source code lines. Rust will
disable debug info generation by default for optimized builds, but you can change
that [in your `Cargo.toml`][cargo-ref].
[profiling]: https://en.wikipedia.org/wiki/Profiling_(computer_programming)
[cargo-ref]: https://doc.rust-lang.org/cargo/reference/manifest.html#the-profile-sections

Просмотреть файл

@ -1,5 +0,0 @@
# The `target_feature` attribute
<!-- TODO:
Explain the `#[target_feature]` attribute
-->

Просмотреть файл

@ -1,13 +0,0 @@
# Enabling target features
Not all processors of a certain architecture will have SIMD processing units,
and using a SIMD instruction which is not supported will trigger undefined behavior.
To allow building safe, portable programs, the Rust compiler will **not**, by default,
generate any sort of vector instructions, unless it can statically determine
they are supported. For example, on AMD64, SSE2 support is architecturally guaranteed.
The `x86_64-apple-darwin` target enables up to SSSE3. The get a defintive list of
which features are enabled by default on various platforms, refer to the target
specifications [in the compiler's source code][targets].
[targets]: https://github.com/rust-lang/rust/tree/master/src/librustc_target/spec

Просмотреть файл

@ -1,5 +0,0 @@
# Inlining
<!-- TODO:
Explain how the `#[target_feature]` attribute interacts with inlining
-->

Просмотреть файл

@ -1,31 +0,0 @@
# Target features in practice
Using `RUSTFLAGS` will allow the crate being compiled, as well as all its
transitive dependencies to use certain target features.
A tehnique used to avoid undefined behavior at runtime is to compile and
ship multiple binaries, each compiled with a certain set of features.
This might not be feasible in some cases, and can quickly get out of hand
as more and more vector extensions are added to an architecture.
Rust can be more flexible: you can build a single binary/library which automatically
picks the best supported vector instructions depending on the host machine.
The trick consists of monomorphizing parts of the code during building, and then
using run-time feature detection to select the right code path when running.
<!-- TODO
Explain how to create efficient functions that dispatch to different
implementations at run-time without issues (e.g. using `#[inline(always)]` for
the impls, wrapping in `#[target_feature]`, and the wrapping those in a function
that does run-time feature detection).
-->
**NOTE** (x86 specific): because the AVX (256-bit) registers extend the existing
SSE (128-bit) registers, mixing SSE and AVX instructions in a program can cause
performance issues.
The solution is to compile all code, even the code written with 128-bit vectors,
with the AVX target feature enabled. This will cause the compiler to prefix the
generated instructions with the [VEX] prefix.
[VEX]: https://en.wikipedia.org/wiki/VEX_prefix

Просмотреть файл

@ -1,5 +0,0 @@
# Detecting host features at runtime
<!-- TODO:
Explain cost (how it works).
-->

Просмотреть файл

@ -1,77 +0,0 @@
# Using RUSTFLAGS
One of the easiest ways to benefit from SIMD is to allow the compiler
to generate code using certain vector instruction extensions.
The environment variable `RUSTFLAGS` can be used to pass options for code
generation to the Rust compiler. These flags will affect **all** compiled crates.
There are two flags which can be used to enable specific vector extensions:
## target-feature
- Syntax: `-C target-feature=<features>`
- Provides the compiler with a comma-separated set of instruction extensions
to enable.
**Example**: Use `-C target-feature=+sse3,+avx` to enable generating instructions
for [Streaming SIMD Extensions 3](https://en.wikipedia.org/wiki/SSE3) and
[Advanced Vector Extensions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions).
- To list target triples for all targets supported by Rust, use:
```sh
rustc --print target-list
```
- To list all support target features for a certain target triple, use:
```sh
rustc --target=${TRIPLE} --print target-features
```
- Note that all CPU features are independent, and will have to be enabled individually.
**Example**: Setting `-C target-feature=+avx2` will _not_ enable `fma`, even though
all CPUs which support AVX2 also support FMA. To enable both, one has to use
`-C target-feature=+avx2,+fma`
- Some features also depend on other features, which need to be enabled for the
target instructions to be generated.
**Example**: Unless `v7` is specified as the target CPU (see below), to enable
NEON on ARM it is necessary to use `-C target-feature=+v7,+neon`.
## target-cpu
- Syntax: `-C target-cpu=<cpu>`
- Sets the identifier of a CPU family / model for which to build and optimize the code.
**Example**: `RUSTFLAGS='-C target-cpu=cortex-a75'`
- To list all supported target CPUs for a certain target triple, use:
```sh
rustc --target=${TRIPLE} --print target-cpus
```
**Example**:
```sh
rustc --target=i686-pc-windows-msvc --print target-cpus
```
- The compiler will translate this into a list of target features. Therefore,
individual feature checks (`#[cfg(target_feature = "...")]`) will still
work properly.
- It will cause the code generator to optimize the generated code for that
specific CPU model.
- Using `native` as the CPU model will cause Rust to generate and optimize code
for the CPU running the compiler. It is useful when building programs which you
plan to only use locally. This should never be used when the generated programs
are meant to be run on other computers, such as when packaging for distribution
or cross-compiling.

Просмотреть файл

@ -1,76 +0,0 @@
# Vertical and horizontal operations
In SIMD terminology, each vector has a certain "width" (number of lanes).
A vector processor is able to perform two kinds of operations on a vector:
- Vertical operations:
operate on two vectors of the same width, result has same width
**Example**: vertical addition of two `f32x4` vectors
%0 == | 2 | -3.5 | 0 | 7 |
+ + + +
%1 == | 4 | 1.5 | -1 | 0 |
= = = =
%0 + %1 == | 6 | -2 | -1 | 7 |
- Horizontal operations:
reduce the elements of two vectors in some way,
the result's elements combine information from the two original ones
**Example**: horizontal addition of two `u64x2` vectors
%0 == | 1 | 3 |
└─+───┘
└───────┐
%1 == | 4 | -1 | │
└─+──┘ │
└───┐ │
│ │
┌─────│───┘
▼ ▼
%0 + %1 == | 4 | 3 |
## Performance consideration of horizontal operations
The result of vertical operations, like vector negation: `-a`, for a given lane,
does not depend on the result of the operation for the other lanes. The result
of horizontal operations, like the vector `sum` reduction: `a.sum()`, depends on
the value of all vector lanes.
In virtually all architectures vertical operations are fast, while horizontal
operations are, by comparison, very slow.
Consider the following two functions for computing the sum of all `f32` values
in a slice:
```rust
fn fast_sum(x: &[f32]) -> f32 {
assert!(x.len() % 4 == 0);
let mut sum = f32x4::splat(0.); // [0., 0., 0., 0.]
for i in (0..x.len()).step_by(4) {
sum += f32x4::from_slice_unaligned(&x[i..]);
}
sum.sum()
}
fn slow_sum(x: &[f32]) -> f32 {
assert!(x.len() % 4 == 0);
let mut sum: f32 = 0.;
for i in (0..x.len()).step_by(4) {
sum += f32x4::from_slice_unaligned(&x[i..]).sum();
}
sum
}
```
The inner loop over the slice is where the bulk of the work actually happens.
There, the `fast_sum` function perform vertical operations into a vector, doing
a single horizontal reduction at the end, while the `slow_sum` function performs
horizontal vector operations inside of the loop.
On all widely-used architectures, `fast_sum` is a large constant factor faster
than `slow_sum`. You can run the [slice_sum]() example and see for yourself. On
the particular machine tested there the algorithm using the horizontal vector
addition is 2.7x slower than the one using vertical vector operations!

1
third_party/rust/packed_simd/rust-toolchain поставляемый
Просмотреть файл

@ -1 +0,0 @@
nightly

5
third_party/rust/packed_simd/rustfmt.toml поставляемый
Просмотреть файл

@ -1,5 +0,0 @@
max_width = 110
use_small_heuristics = "Max"
wrap_comments = true
edition = "2018"
error_on_line_overflow = true

309
third_party/rust/packed_simd/src/api.rs поставляемый
Просмотреть файл

@ -1,309 +0,0 @@
//! Implements the Simd<[T; N]> APIs
#[macro_use]
mod bitmask;
pub(crate) mod cast;
#[macro_use]
mod cmp;
#[macro_use]
mod default;
#[macro_use]
mod fmt;
#[macro_use]
mod from;
#[macro_use]
mod hash;
#[macro_use]
mod math;
#[macro_use]
mod minimal;
#[macro_use]
mod ops;
#[macro_use]
mod ptr;
#[macro_use]
mod reductions;
#[macro_use]
mod select;
#[macro_use]
mod shuffle;
#[macro_use]
mod shuffle1_dyn;
#[macro_use]
mod slice;
#[macro_use]
mod swap_bytes;
#[macro_use]
mod bit_manip;
#[cfg(feature = "into_bits")]
pub(crate) mod into_bits;
macro_rules! impl_i {
([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
| $ielem_ty:ident, $ibitmask_ty:ident | $test_tt:tt | $($elem_ids:ident),*
| From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
| $($elem_ids),* | $(#[$doc])*);
impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_ops_vector_bitwise!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
);
impl_ops_scalar_bitwise!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
);
impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_ops_vector_int_min_max!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt
);
impl_reduction_integer_arithmetic!(
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
);
impl_reduction_min_max!(
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
);
impl_reduction_bitwise!(
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
| (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0)
);
impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1));
impl_from_vectors!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
);
impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_cmp_partial_eq!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)
);
impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
impl_cmp_vertical!(
[$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt
);
impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
impl_bitmask!($tuple_id | $ibitmask_ty | (-1, 0) | $test_tt);
test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt);
test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
}
}
macro_rules! impl_u {
([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
| $ielem_ty:ident, $ibitmask_ty:ident | $test_tt:tt | $($elem_ids:ident),*
| From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
| $($elem_ids),* | $(#[$doc])*);
impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_ops_vector_bitwise!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
);
impl_ops_scalar_bitwise!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
);
impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_ops_vector_int_min_max!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt
);
impl_reduction_integer_arithmetic!(
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
);
impl_reduction_min_max!(
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
);
impl_reduction_bitwise!(
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
| (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0)
);
impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1));
impl_from_vectors!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
);
impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_cmp_partial_eq!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 0)
);
impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
impl_cmp_vertical!(
[$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt
);
impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
impl_bitmask!($tuple_id | $ibitmask_ty | ($ielem_ty::max_value(), 0) |
$test_tt);
test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt);
test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
}
}
macro_rules! impl_f {
([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
| $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),*
| From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
| $($elem_ids),* | $(#[$doc])*);
impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_ops_vector_float_min_max!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt
);
impl_reduction_float_arithmetic!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_reduction_min_max!(
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
);
impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 1.));
impl_from_vectors!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
);
impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_cmp_partial_eq!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 0.)
);
impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_float_consts!([$elem_ty; $elem_n]: $tuple_id);
impl_float_category!([$elem_ty; $elem_n]: $tuple_id, $mask_ty);
// floating-point math
impl_math_float_abs!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_math_float_cos!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_math_float_exp!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_math_float_ln!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_math_float_mul_add!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_math_float_mul_adde!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_math_float_powf!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_math_float_recpre!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_math_float_rsqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_math_float_sin!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_math_float_sqrt!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_math_float_sqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_math_float_tanh!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_cmp_vertical!(
[$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1., 0.)
| $test_tt
);
test_select!($elem_ty, $mask_ty, $tuple_id, (1., 2.) | $test_tt);
test_reduction_float_min_max!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt
);
test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
}
}
macro_rules! impl_m {
([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident
| $ielem_ty:ident, $ibitmask_ty:ident
| $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),*
| $(#[$doc:meta])*) => {
impl_minimal_mask!(
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
| $($elem_ids),* | $(#[$doc])*
);
impl_ops_vector_mask_bitwise!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
);
impl_ops_scalar_mask_bitwise!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
);
impl_reduction_bitwise!(
[bool; $elem_n]: $tuple_id | $ielem_ty | $test_tt
| (|x|{ x != 0 }) | (true, false)
);
impl_reduction_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_fmt_debug!([bool; $elem_n]: $tuple_id | $test_tt);
impl_from_array!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt
| (crate::$elem_ty::new(true), true)
);
impl_from_vectors!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
);
impl_default!([bool; $elem_n]: $tuple_id | $test_tt);
impl_cmp_partial_eq!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
);
impl_cmp_eq!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
);
impl_cmp_vertical!(
[$elem_ty; $elem_n]: $tuple_id, $tuple_id, true, (true, false)
| $test_tt
);
impl_select!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_cmp_ord!(
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (false, true)
);
impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
impl_bitmask!($tuple_id | $ibitmask_ty | (true, false) | $test_tt);
test_cmp_partial_ord_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
test_shuffle1_dyn_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
}
}
macro_rules! impl_const_p {
([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident,
$usize_ty:ident, $isize_ty:ident
| $test_tt:tt | $($elem_ids:ident),*
| From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
impl_minimal_p!(
[$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty
| ref_ | $test_tt | $($elem_ids),*
| (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])*
);
impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
}
}
macro_rules! impl_mut_p {
([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident,
$usize_ty:ident, $isize_ty:ident
| $test_tt:tt | $($elem_ids:ident),*
| From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
impl_minimal_p!(
[$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty
| ref_mut_ | $test_tt | $($elem_ids),*
| (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])*
);
impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
impl_ptr_write!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
}
}

Просмотреть файл

@ -1,129 +0,0 @@
//! Bit manipulations.
macro_rules! impl_bit_manip {
([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
impl $id {
/// Returns the number of ones in the binary representation of
/// the lanes of `self`.
#[inline]
pub fn count_ones(self) -> Self {
super::codegen::bit_manip::BitManip::ctpop(self)
}
/// Returns the number of zeros in the binary representation of
/// the lanes of `self`.
#[inline]
pub fn count_zeros(self) -> Self {
super::codegen::bit_manip::BitManip::ctpop(!self)
}
/// Returns the number of leading zeros in the binary
/// representation of the lanes of `self`.
#[inline]
pub fn leading_zeros(self) -> Self {
super::codegen::bit_manip::BitManip::ctlz(self)
}
/// Returns the number of trailing zeros in the binary
/// representation of the lanes of `self`.
#[inline]
pub fn trailing_zeros(self) -> Self {
super::codegen::bit_manip::BitManip::cttz(self)
}
}
test_if! {
$test_tt:
paste::item! {
#[allow(overflowing_literals)]
pub mod [<$id _bit_manip>] {
#![allow(const_item_mutation)]
use super::*;
const LANE_WIDTH: usize = mem::size_of::<$elem_ty>() * 8;
macro_rules! test_func {
($x:expr, $func:ident) => {{
let mut actual = $x;
for i in 0..$id::lanes() {
actual = actual.replace(
i,
$x.extract(i).$func() as $elem_ty
);
}
let expected = $x.$func();
assert_eq!(actual, expected);
}};
}
const BYTES: [u8; 64] = [
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63,
];
fn load_bytes() -> $id {
let elems: &mut [$elem_ty] = unsafe {
slice::from_raw_parts_mut(
BYTES.as_mut_ptr() as *mut $elem_ty,
$id::lanes(),
)
};
$id::from_slice_unaligned(elems)
}
#[cfg_attr(not(target_arch = "wasm32"), test)]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn count_ones() {
test_func!($id::splat(0), count_ones);
test_func!($id::splat(!0), count_ones);
test_func!(load_bytes(), count_ones);
}
#[cfg_attr(not(target_arch = "wasm32"), test)]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn count_zeros() {
test_func!($id::splat(0), count_zeros);
test_func!($id::splat(!0), count_zeros);
test_func!(load_bytes(), count_zeros);
}
#[cfg_attr(not(target_arch = "wasm32"), test)]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn leading_zeros() {
test_func!($id::splat(0), leading_zeros);
test_func!($id::splat(1), leading_zeros);
// some implementations use `pshufb` which has unique
// behavior when the 8th bit is set.
test_func!($id::splat(0b1000_0010), leading_zeros);
test_func!($id::splat(!0), leading_zeros);
test_func!(
$id::splat(1 << (LANE_WIDTH - 1)),
leading_zeros
);
test_func!(load_bytes(), leading_zeros);
}
#[cfg_attr(not(target_arch = "wasm32"), test)]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn trailing_zeros() {
test_func!($id::splat(0), trailing_zeros);
test_func!($id::splat(1), trailing_zeros);
test_func!($id::splat(0b1000_0010), trailing_zeros);
test_func!($id::splat(!0), trailing_zeros);
test_func!(
$id::splat(1 << (LANE_WIDTH - 1)),
trailing_zeros
);
test_func!(load_bytes(), trailing_zeros);
}
}
}
}
};
}

Просмотреть файл

@ -1,79 +0,0 @@
//! Bitmask API
macro_rules! impl_bitmask {
($id:ident | $ibitmask_ty:ident | ($set:expr, $clear:expr)
| $test_tt:tt) => {
impl $id {
/// Creates a bitmask with the MSB of each vector lane.
///
/// If the vector has less than 8 lanes, the bits that do not
/// correspond to any vector lanes are cleared.
#[inline]
pub fn bitmask(self) -> $ibitmask_ty {
unsafe { codegen::llvm::simd_bitmask(self.0) }
}
}
test_if! {
$test_tt:
paste::item! {
#[cfg(not(
// FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/210
target_endian = "big"
))]
pub mod [<$id _bitmask>] {
use super::*;
#[cfg_attr(not(target_arch = "wasm32"), test)]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn bitmask() {
// clear all lanes
let vec = $id::splat($clear as _);
let bitmask: $ibitmask_ty = 0;
assert_eq!(vec.bitmask(), bitmask);
// set even lanes
let mut vec = $id::splat($clear as _);
for i in 0..$id::lanes() {
if i % 2 == 0 {
vec = vec.replace(i, $set as _);
}
}
// create bitmask with even lanes set:
let mut bitmask: $ibitmask_ty = 0;
for i in 0..$id::lanes() {
if i % 2 == 0 {
bitmask |= 1 << i;
}
}
assert_eq!(vec.bitmask(), bitmask);
// set odd lanes
let mut vec = $id::splat($clear as _);
for i in 0..$id::lanes() {
if i % 2 != 0 {
vec = vec.replace(i, $set as _);
}
}
// create bitmask with odd lanes set:
let mut bitmask: $ibitmask_ty = 0;
for i in 0..$id::lanes() {
if i % 2 != 0 {
bitmask |= 1 << i;
}
}
assert_eq!(vec.bitmask(), bitmask);
// set all lanes
let vec = $id::splat($set as _);
let mut bitmask: $ibitmask_ty = 0;
for i in 0..$id::lanes() {
bitmask |= 1 << i;
}
assert_eq!(vec.bitmask(), bitmask);
}
}
}
}
};
}

108
third_party/rust/packed_simd/src/api/cast.rs поставляемый
Просмотреть файл

@ -1,108 +0,0 @@
//! Implementation of `FromCast` and `IntoCast`.
#![allow(clippy::module_name_repetitions)]
/// Numeric cast from `T` to `Self`.
///
/// > Note: This is a temporary workaround until the conversion traits
/// specified > in [RFC2484] are implemented.
///
/// Numeric cast between vectors with the same number of lanes, such that:
///
/// * casting integer vectors whose lane types have the same size (e.g. `i32xN`
/// -> `u32xN`) is a **no-op**,
///
/// * casting from a larger integer to a smaller integer (e.g. `u32xN` ->
/// `u8xN`) will **truncate**,
///
/// * casting from a smaller integer to a larger integer (e.g. `u8xN` ->
/// `u32xN`) will:
/// * **zero-extend** if the source is unsigned, or
/// * **sign-extend** if the source is signed,
///
/// * casting from a float to an integer will **round the float towards zero**,
///
/// * casting from an integer to float will produce the floating point
/// representation of the integer, **rounding to nearest, ties to even**,
///
/// * casting from an `f32` to an `f64` is perfect and lossless,
///
/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**.
///
/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484
pub trait FromCast<T>: crate::marker::Sized {
/// Numeric cast from `T` to `Self`.
fn from_cast(_: T) -> Self;
}
/// Numeric cast from `Self` to `T`.
///
/// > Note: This is a temporary workaround until the conversion traits
/// specified > in [RFC2484] are implemented.
///
/// Numeric cast between vectors with the same number of lanes, such that:
///
/// * casting integer vectors whose lane types have the same size (e.g. `i32xN`
/// -> `u32xN`) is a **no-op**,
///
/// * casting from a larger integer to a smaller integer (e.g. `u32xN` ->
/// `u8xN`) will **truncate**,
///
/// * casting from a smaller integer to a larger integer (e.g. `u8xN` ->
/// `u32xN`) will:
/// * **zero-extend** if the source is unsigned, or
/// * **sign-extend** if the source is signed,
///
/// * casting from a float to an integer will **round the float towards zero**,
///
/// * casting from an integer to float will produce the floating point
/// representation of the integer, **rounding to nearest, ties to even**,
///
/// * casting from an `f32` to an `f64` is perfect and lossless,
///
/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**.
///
/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484
pub trait Cast<T>: crate::marker::Sized {
/// Numeric cast from `self` to `T`.
fn cast(self) -> T;
}
/// `FromCast` implies `Cast`.
impl<T, U> Cast<U> for T
where
U: FromCast<T>,
{
#[inline]
fn cast(self) -> U {
U::from_cast(self)
}
}
/// `FromCast` and `Cast` are reflexive
impl<T> FromCast<T> for T {
#[inline]
fn from_cast(t: Self) -> Self {
t
}
}
#[macro_use]
mod macros;
mod v16;
pub use self::v16::*;
mod v32;
pub use self::v32::*;
mod v64;
pub use self::v64::*;
mod v128;
pub use self::v128::*;
mod v256;
pub use self::v256::*;
mod v512;
pub use self::v512::*;

Просмотреть файл

@ -1,82 +0,0 @@
//! Macros implementing `FromCast`
macro_rules! impl_from_cast_ {
($id:ident[$test_tt:tt]: $from_ty:ident) => {
impl crate::api::cast::FromCast<$from_ty> for $id {
#[inline]
fn from_cast(x: $from_ty) -> Self {
use crate::llvm::simd_cast;
debug_assert_eq!($from_ty::lanes(), $id::lanes());
Simd(unsafe { simd_cast(x.0) })
}
}
test_if!{
$test_tt:
paste::item! {
pub mod [<$id _from_cast_ $from_ty>] {
use super::*;
#[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn test() {
assert_eq!($id::lanes(), $from_ty::lanes());
}
}
}
}
};
}
macro_rules! impl_from_cast {
($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
$(
impl_from_cast_!($id[$test_tt]: $from_ty);
)*
}
}
macro_rules! impl_from_cast_mask_ {
($id:ident[$test_tt:tt]: $from_ty:ident) => {
impl crate::api::cast::FromCast<$from_ty> for $id {
#[inline]
fn from_cast(x: $from_ty) -> Self {
debug_assert_eq!($from_ty::lanes(), $id::lanes());
x.ne($from_ty::default())
.select($id::splat(true), $id::splat(false))
}
}
test_if!{
$test_tt:
paste::item! {
pub mod [<$id _from_cast_ $from_ty>] {
use super::*;
#[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn test() {
assert_eq!($id::lanes(), $from_ty::lanes());
let x = $from_ty::default();
let m: $id = x.cast();
assert!(m.none());
}
}
}
}
};
}
macro_rules! impl_from_cast_mask {
($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
$(
impl_from_cast_mask_!($id[$test_tt]: $from_ty);
)*
}
}
#[allow(unused)]
macro_rules! impl_into_cast {
($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
$(
impl_from_cast_!($from_ty[$test_tt]: $id);
)*
}
}

Просмотреть файл

@ -1,302 +0,0 @@
//! `FromCast` and `IntoCast` implementations for portable 128-bit wide vectors
#[rustfmt::skip]
use crate::*;
impl_from_cast!(i8x16[test_v128]: u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
impl_from_cast!(u8x16[test_v128]: i8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
impl_from_cast_mask!(m8x16[test_v128]: i8x16, u8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
impl_from_cast!(
i16x8[test_v128]: i8x8,
u8x8,
m8x8,
u16x8,
m16x8,
i32x8,
u32x8,
f32x8,
m32x8,
i64x8,
u64x8,
f64x8,
m64x8,
isizex8,
usizex8,
msizex8
);
impl_from_cast!(
u16x8[test_v128]: i8x8,
u8x8,
m8x8,
i16x8,
m16x8,
i32x8,
u32x8,
f32x8,
m32x8,
i64x8,
u64x8,
f64x8,
m64x8,
isizex8,
usizex8,
msizex8
);
impl_from_cast_mask!(
m16x8[test_v128]: i8x8,
u8x8,
m8x8,
i16x8,
u16x8,
i32x8,
u32x8,
f32x8,
m32x8,
i64x8,
u64x8,
f64x8,
m64x8,
isizex8,
usizex8,
msizex8
);
impl_from_cast!(
i32x4[test_v128]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast!(
u32x4[test_v128]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
f32x4,
m32x4,
i64x4,
u64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast!(
f32x4[test_v128]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
m32x4,
i64x4,
u64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast_mask!(
m32x4[test_v128]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
i64x4,
u64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast!(
i64x2[test_v128]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
u64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);
impl_from_cast!(
u64x2[test_v128]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);
impl_from_cast!(
f64x2[test_v128]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);
impl_from_cast_mask!(
m64x2[test_v128]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
f64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);
impl_from_cast!(
isizex2[test_v128]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
usizex2,
msizex2
);
impl_from_cast!(
usizex2[test_v128]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
msizex2
);
impl_from_cast_mask!(
msizex2[test_v128]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2
);
// FIXME[test_v128]: 64-bit single element vectors into_cast impls
impl_from_cast!(i128x1[test_v128]: u128x1, m128x1);
impl_from_cast!(u128x1[test_v128]: i128x1, m128x1);
impl_from_cast!(m128x1[test_v128]: i128x1, u128x1);

Просмотреть файл

@ -1,68 +0,0 @@
//! `FromCast` and `IntoCast` implementations for portable 16-bit wide vectors
#[rustfmt::skip]
use crate::*;
impl_from_cast!(
i8x2[test_v16]: u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);
impl_from_cast!(
u8x2[test_v16]: i8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);
impl_from_cast_mask!(
m8x2[test_v16]: i8x2,
u8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);

Просмотреть файл

@ -1,298 +0,0 @@
//! `FromCast` and `IntoCast` implementations for portable 256-bit wide vectors
#[rustfmt::skip]
use crate::*;
impl_from_cast!(i8x32[test_v256]: u8x32, m8x32, i16x32, u16x32, m16x32);
impl_from_cast!(u8x32[test_v256]: i8x32, m8x32, i16x32, u16x32, m16x32);
impl_from_cast_mask!(m8x32[test_v256]: i8x32, u8x32, i16x32, u16x32, m16x32);
impl_from_cast!(i16x16[test_v256]: i8x16, u8x16, m8x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
impl_from_cast!(u16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
impl_from_cast_mask!(m16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, u16x16, i32x16, u32x16, f32x16, m32x16);
impl_from_cast!(
i32x8[test_v256]: i8x8,
u8x8,
m8x8,
i16x8,
u16x8,
m16x8,
u32x8,
f32x8,
m32x8,
i64x8,
u64x8,
f64x8,
m64x8,
isizex8,
usizex8,
msizex8
);
impl_from_cast!(
u32x8[test_v256]: i8x8,
u8x8,
m8x8,
i16x8,
u16x8,
m16x8,
i32x8,
f32x8,
m32x8,
i64x8,
u64x8,
f64x8,
m64x8,
isizex8,
usizex8,
msizex8
);
impl_from_cast!(
f32x8[test_v256]: i8x8,
u8x8,
m8x8,
i16x8,
u16x8,
m16x8,
i32x8,
u32x8,
m32x8,
i64x8,
u64x8,
f64x8,
m64x8,
isizex8,
usizex8,
msizex8
);
impl_from_cast_mask!(
m32x8[test_v256]: i8x8,
u8x8,
m8x8,
i16x8,
u16x8,
m16x8,
i32x8,
u32x8,
f32x8,
i64x8,
u64x8,
f64x8,
m64x8,
isizex8,
usizex8,
msizex8
);
impl_from_cast!(
i64x4[test_v256]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
u64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast!(
u64x4[test_v256]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast!(
f64x4[test_v256]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast_mask!(
m64x4[test_v256]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
f64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast!(
i128x2[test_v256]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
f64x2,
m64x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);
impl_from_cast!(
u128x2[test_v256]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
f64x2,
m64x2,
i128x2,
m128x2,
isizex2,
usizex2,
msizex2
);
impl_from_cast_mask!(
m128x2[test_v256]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
m64x2,
f64x2,
i128x2,
u128x2,
isizex2,
usizex2,
msizex2
);
impl_from_cast!(
isizex4[test_v256]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
usizex4,
msizex4
);
impl_from_cast!(
usizex4[test_v256]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
msizex4
);
impl_from_cast_mask!(
msizex4[test_v256]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4
);

Просмотреть файл

@ -1,132 +0,0 @@
//! `FromCast` and `IntoCast` implementations for portable 32-bit wide vectors
#[rustfmt::skip]
use crate::*;
impl_from_cast!(
i8x4[test_v32]: u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast!(
u8x4[test_v32]: i8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast_mask!(
m8x4[test_v32]: i8x4,
u8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast!(
i16x2[test_v32]: i8x2,
u8x2,
m8x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);
impl_from_cast!(
u16x2[test_v32]: i8x2,
u8x2,
m8x2,
i16x2,
m16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);
impl_from_cast_mask!(
m16x2[test_v32]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
i32x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);

Просмотреть файл

@ -1,209 +0,0 @@
//! `FromCast` and `IntoCast` implementations for portable 512-bit wide vectors
#[rustfmt::skip]
use crate::*;
impl_from_cast!(i8x64[test_v512]: u8x64, m8x64);
impl_from_cast!(u8x64[test_v512]: i8x64, m8x64);
impl_from_cast_mask!(m8x64[test_v512]: i8x64, u8x64);
impl_from_cast!(i16x32[test_v512]: i8x32, u8x32, m8x32, u16x32, m16x32);
impl_from_cast!(u16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, m16x32);
impl_from_cast_mask!(m16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, u16x32);
impl_from_cast!(i32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, u32x16, f32x16, m32x16);
impl_from_cast!(u32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, f32x16, m32x16);
impl_from_cast!(f32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, m32x16);
impl_from_cast_mask!(m32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16);
impl_from_cast!(
i64x8[test_v512]: i8x8,
u8x8,
m8x8,
i16x8,
u16x8,
m16x8,
i32x8,
u32x8,
f32x8,
m32x8,
u64x8,
f64x8,
m64x8,
isizex8,
usizex8,
msizex8
);
impl_from_cast!(
u64x8[test_v512]: i8x8,
u8x8,
m8x8,
i16x8,
u16x8,
m16x8,
i32x8,
u32x8,
f32x8,
m32x8,
i64x8,
f64x8,
m64x8,
isizex8,
usizex8,
msizex8
);
impl_from_cast!(
f64x8[test_v512]: i8x8,
u8x8,
m8x8,
i16x8,
u16x8,
m16x8,
i32x8,
u32x8,
f32x8,
m32x8,
i64x8,
u64x8,
m64x8,
isizex8,
usizex8,
msizex8
);
impl_from_cast_mask!(
m64x8[test_v512]: i8x8,
u8x8,
m8x8,
i16x8,
u16x8,
m16x8,
i32x8,
u32x8,
f32x8,
m32x8,
i64x8,
u64x8,
f64x8,
isizex8,
usizex8,
msizex8
);
impl_from_cast!(
i128x4[test_v512]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
f64x4,
m64x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast!(
u128x4[test_v512]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
f64x4,
m64x4,
i128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast_mask!(
m128x4[test_v512]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
m64x4,
f64x4,
i128x4,
u128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast!(
isizex8[test_v512]: i8x8,
u8x8,
m8x8,
i16x8,
u16x8,
m16x8,
i32x8,
u32x8,
f32x8,
m32x8,
i64x8,
u64x8,
f64x8,
m64x8,
usizex8,
msizex8
);
impl_from_cast!(
usizex8[test_v512]: i8x8,
u8x8,
m8x8,
i16x8,
u16x8,
m16x8,
i32x8,
u32x8,
f32x8,
m32x8,
i64x8,
u64x8,
f64x8,
m64x8,
isizex8,
msizex8
);
impl_from_cast_mask!(
msizex8[test_v512]: i8x8,
u8x8,
m8x8,
i16x8,
u16x8,
m16x8,
i32x8,
u32x8,
f32x8,
m32x8,
i64x8,
u64x8,
f64x8,
m64x8,
isizex8,
usizex8
);

Просмотреть файл

@ -1,208 +0,0 @@
//! `FromCast` and `IntoCast` implementations for portable 64-bit wide vectors
#[rustfmt::skip]
use crate::*;
impl_from_cast!(
i8x8[test_v64]: u8x8,
m8x8,
i16x8,
u16x8,
m16x8,
i32x8,
u32x8,
f32x8,
m32x8,
i64x8,
u64x8,
f64x8,
m64x8,
isizex8,
usizex8,
msizex8
);
impl_from_cast!(
u8x8[test_v64]: i8x8,
m8x8,
i16x8,
u16x8,
m16x8,
i32x8,
u32x8,
f32x8,
m32x8,
i64x8,
u64x8,
f64x8,
m64x8,
isizex8,
usizex8,
msizex8
);
impl_from_cast_mask!(
m8x8[test_v64]: i8x8,
u8x8,
i16x8,
u16x8,
m16x8,
i32x8,
u32x8,
f32x8,
m32x8,
i64x8,
u64x8,
f64x8,
m64x8,
isizex8,
usizex8,
msizex8
);
impl_from_cast!(
i16x4[test_v64]: i8x4,
u8x4,
m8x4,
u16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast!(
u16x4[test_v64]: i8x4,
u8x4,
m8x4,
i16x4,
m16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast_mask!(
m16x4[test_v64]: i8x4,
u8x4,
m8x4,
i16x4,
u16x4,
i32x4,
u32x4,
f32x4,
m32x4,
i64x4,
u64x4,
f64x4,
m64x4,
i128x4,
u128x4,
m128x4,
isizex4,
usizex4,
msizex4
);
impl_from_cast!(
i32x2[test_v64]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
u32x2,
f32x2,
m32x2,
i64x2,
u64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);
impl_from_cast!(
u32x2[test_v64]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
f32x2,
m32x2,
i64x2,
u64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);
impl_from_cast!(
f32x2[test_v64]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
m32x2,
i64x2,
u64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);
impl_from_cast_mask!(
m32x2[test_v64]: i8x2,
u8x2,
m8x2,
i16x2,
u16x2,
m16x2,
i32x2,
u32x2,
f32x2,
i64x2,
u64x2,
f64x2,
m64x2,
i128x2,
u128x2,
m128x2,
isizex2,
usizex2,
msizex2
);

16
third_party/rust/packed_simd/src/api/cmp.rs поставляемый
Просмотреть файл

@ -1,16 +0,0 @@
//! Implement cmp traits for vector types
#[macro_use]
mod partial_eq;
#[macro_use]
mod eq;
#[macro_use]
mod partial_ord;
#[macro_use]
mod ord;
#[macro_use]
mod vertical;

Просмотреть файл

@ -1,27 +0,0 @@
//! Implements `Eq` for vector types.
macro_rules! impl_cmp_eq {
(
[$elem_ty:ident; $elem_count:expr]:
$id:ident | $test_tt:tt |
($true:expr, $false:expr)
) => {
impl crate::cmp::Eq for $id {}
impl crate::cmp::Eq for LexicographicallyOrdered<$id> {}
test_if!{
$test_tt:
paste::item! {
pub mod [<$id _cmp_eq>] {
use super::*;
#[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn eq() {
fn foo<E: crate::cmp::Eq>(_: E) {}
let a = $id::splat($false);
foo(a);
}
}
}
}
};
}

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше