зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1882209 - Update encoding_rs to 0.8.34 to deal with rustc changes. r=glandium,supply-chain-reviewers
Differential Revision: https://phabricator.services.mozilla.com/D207167
This commit is contained in:
Родитель
34dd45ff4c
Коммит
5b199dcf62
|
@ -91,6 +91,15 @@ version = "1.0.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b84bf0a05bbb2a83e5eb6fa36bb6e87baa08193c35ff52bbf6b38d8af2890e46"
|
||||
|
||||
[[package]]
|
||||
name = "any_all_workaround"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88fea40735f2cc320a5133ce772d39c571bd6c9b0d4c1a326926eecdd5af2e86"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.69"
|
||||
|
@ -1574,12 +1583,12 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.33"
|
||||
version = "0.8.34"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1"
|
||||
checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59"
|
||||
dependencies = [
|
||||
"any_all_workaround",
|
||||
"cfg-if 1.0.0",
|
||||
"packed_simd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4320,16 +4329,6 @@ dependencies = [
|
|||
"oxilangtag",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "packed_simd"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1f9f08af0c877571712e2e3e686ad79efad9657dbf0f7c3c8ba943ff6c38932d"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.12.1"
|
||||
|
|
|
@ -265,7 +265,7 @@ endif
|
|||
ifndef RUSTC_BOOTSTRAP
|
||||
RUSTC_BOOTSTRAP := mozglue_static,qcms
|
||||
ifdef MOZ_RUST_SIMD
|
||||
RUSTC_BOOTSTRAP := $(RUSTC_BOOTSTRAP),encoding_rs,packed_simd
|
||||
RUSTC_BOOTSTRAP := $(RUSTC_BOOTSTRAP),encoding_rs,any_all_workaround
|
||||
endif
|
||||
export RUSTC_BOOTSTRAP
|
||||
endif
|
||||
|
|
|
@ -588,6 +588,12 @@ who = "Mike Hommey <mh+mozilla@glandium.org>"
|
|||
criteria = "safe-to-deploy"
|
||||
delta = "0.1.4 -> 0.1.5"
|
||||
|
||||
[[audits.any_all_workaround]]
|
||||
who = "Henri Sivonen <hsivonen@hsivonen.fi>"
|
||||
criteria = "safe-to-deploy"
|
||||
version = "0.1.0"
|
||||
notes = "The little code that is in this crate I reviewed and modified from packed_simd (which has previously been vendored in full instead of just this small part)."
|
||||
|
||||
[[audits.anyhow]]
|
||||
who = "Mike Hommey <mh+mozilla@glandium.org>"
|
||||
criteria = "safe-to-deploy"
|
||||
|
|
|
@ -190,6 +190,13 @@ user-id = 4484
|
|||
user-login = "hsivonen"
|
||||
user-name = "Henri Sivonen"
|
||||
|
||||
[[publisher.encoding_rs]]
|
||||
version = "0.8.34"
|
||||
when = "2024-04-10"
|
||||
user-id = 4484
|
||||
user-login = "hsivonen"
|
||||
user-name = "Henri Sivonen"
|
||||
|
||||
[[publisher.errno]]
|
||||
version = "0.3.8"
|
||||
when = "2023-11-28"
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
{"files":{"Cargo.toml":"c38be4bc8ef1c4df398b3eae589681d1bbb54a2577c71d592e12db0af757c472","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"abebbd2620f915c70a873dd8221d99eadd8d017b7b194c22f3e0051f1fde193f","src/lib.rs":"e8a36b888f0f20accd4e7bfb2db9196e42b4be2d1014cb675981543d1372c610"},"package":"88fea40735f2cc320a5133ce772d39c571bd6c9b0d4c1a326926eecdd5af2e86"}
|
|
@ -0,0 +1,25 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies.
|
||||
#
|
||||
# If you are reading this file be aware that the original Cargo.toml
|
||||
# will likely look very different (and much more reasonable).
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
edition = "2021"
|
||||
name = "any_all_workaround"
|
||||
version = "0.1.0"
|
||||
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
|
||||
description = "Workaround for bad LLVM codegen for boolean reductions on 32-bit ARM"
|
||||
homepage = "https://docs.rs/any_all_workaround/"
|
||||
documentation = "https://docs.rs/any_all_workaround/"
|
||||
readme = "README.md"
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/hsivonen/any_all_workaround"
|
||||
|
||||
[dependencies.cfg-if]
|
||||
version = "1.0"
|
|
@ -0,0 +1,13 @@
|
|||
# any_all_workaround
|
||||
|
||||
This is a workaround for bad codegen ([Rust bug](https://github.com/rust-lang/portable-simd/issues/146), [LLVM bug](https://github.com/llvm/llvm-project/issues/50466)) for the `any()` and `all()` reductions for NEON-backed SIMD vectors on 32-bit ARM. On other platforms these delegate to `any()` and `all()` in `core::simd`.
|
||||
|
||||
The plan is to abandon this crate once the LLVM bug is fixed or `core::simd` works around the LLVM bug.
|
||||
|
||||
The code is forked from the [`packed_simd` crate](https://raw.githubusercontent.com/hsivonen/packed_simd/d938e39bee9bc5c222f5f2f2a0df9e53b5ce36ae/src/codegen/reductions/mask/arm.rs).
|
||||
|
||||
This crate requires Nightly Rust as it depends on the `portable_simd` feature.
|
||||
|
||||
# License
|
||||
|
||||
`MIT OR Apache-2.0`, since that's how `packed_simd` is licensed.
|
|
@ -0,0 +1,99 @@
|
|||
// This code began as a fork of
|
||||
// https://raw.githubusercontent.com/rust-lang/packed_simd/d938e39bee9bc5c222f5f2f2a0df9e53b5ce36ae/src/codegen/reductions/mask/arm.rs
|
||||
// which didn't have a license header on the file, but Cargo.toml said "MIT OR Apache-2.0".
|
||||
// See LICENSE-MIT and LICENSE-APACHE.
|
||||
|
||||
#![no_std]
|
||||
#![feature(portable_simd)]
|
||||
#![cfg_attr(
|
||||
all(
|
||||
target_arch = "arm",
|
||||
target_endian = "little",
|
||||
target_feature = "neon",
|
||||
target_feature = "v7"
|
||||
),
|
||||
feature(stdarch_arm_neon_intrinsics)
|
||||
)]
|
||||
|
||||
use cfg_if::cfg_if;
|
||||
use core::simd::mask16x8;
|
||||
use core::simd::mask32x4;
|
||||
use core::simd::mask8x16;
|
||||
|
||||
cfg_if! {
|
||||
if #[cfg(all(target_arch = "arm", target_endian = "little", target_feature = "neon", target_feature = "v7"))] {
|
||||
use core::simd::mask8x8;
|
||||
use core::simd::mask16x4;
|
||||
use core::simd::mask32x2;
|
||||
macro_rules! arm_128_v7_neon_impl {
|
||||
($all:ident, $any:ident, $id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => {
|
||||
#[inline]
|
||||
pub fn $all(s: $id) -> bool {
|
||||
use core::arch::arm::$vpmin;
|
||||
use core::mem::transmute;
|
||||
unsafe {
|
||||
union U {
|
||||
halves: ($half, $half),
|
||||
vec: $id,
|
||||
}
|
||||
let halves = U { vec: s }.halves;
|
||||
let h: $half = transmute($vpmin(transmute(halves.0), transmute(halves.1)));
|
||||
h.all()
|
||||
}
|
||||
}
|
||||
#[inline]
|
||||
pub fn $any(s: $id) -> bool {
|
||||
use core::arch::arm::$vpmax;
|
||||
use core::mem::transmute;
|
||||
unsafe {
|
||||
union U {
|
||||
halves: ($half, $half),
|
||||
vec: $id,
|
||||
}
|
||||
let halves = U { vec: s }.halves;
|
||||
let h: $half = transmute($vpmax(transmute(halves.0), transmute(halves.1)));
|
||||
h.any()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
macro_rules! arm_128_v7_neon_impl {
|
||||
($all:ident, $any:ident, $id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => {
|
||||
#[inline(always)]
|
||||
pub fn $all(s: $id) -> bool {
|
||||
s.all()
|
||||
}
|
||||
#[inline(always)]
|
||||
pub fn $any(s: $id) -> bool {
|
||||
s.any()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
arm_128_v7_neon_impl!(
|
||||
all_mask8x16,
|
||||
any_mask8x16,
|
||||
mask8x16,
|
||||
mask8x8,
|
||||
vpmin_u8,
|
||||
vpmax_u8
|
||||
);
|
||||
arm_128_v7_neon_impl!(
|
||||
all_mask16x8,
|
||||
any_mask16x8,
|
||||
mask16x8,
|
||||
mask16x4,
|
||||
vpmin_u16,
|
||||
vpmax_u16
|
||||
);
|
||||
arm_128_v7_neon_impl!(
|
||||
all_mask32x4,
|
||||
any_mask32x4,
|
||||
mask32x4,
|
||||
mask32x2,
|
||||
vpmin_u32,
|
||||
vpmax_u32
|
||||
);
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -11,8 +11,9 @@
|
|||
|
||||
[package]
|
||||
edition = "2018"
|
||||
rust-version = "1.36"
|
||||
name = "encoding_rs"
|
||||
version = "0.8.33"
|
||||
version = "0.8.34"
|
||||
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
|
||||
description = "A Gecko-oriented implementation of the Encoding Standard"
|
||||
homepage = "https://docs.rs/encoding_rs/"
|
||||
|
@ -36,13 +37,13 @@ repository = "https://github.com/hsivonen/encoding_rs"
|
|||
[profile.release]
|
||||
lto = true
|
||||
|
||||
[dependencies.any_all_workaround]
|
||||
version = "0.1.0"
|
||||
optional = true
|
||||
|
||||
[dependencies.cfg-if]
|
||||
version = "1.0"
|
||||
|
||||
[dependencies.packed_simd]
|
||||
version = "0.3.9"
|
||||
optional = true
|
||||
|
||||
[dependencies.serde]
|
||||
version = "1.0"
|
||||
optional = true
|
||||
|
@ -74,10 +75,4 @@ fast-legacy-encode = [
|
|||
less-slow-big5-hanzi-encode = []
|
||||
less-slow-gb-hanzi-encode = []
|
||||
less-slow-kanji-encode = []
|
||||
simd-accel = [
|
||||
"packed_simd",
|
||||
"packed_simd/into_bits",
|
||||
]
|
||||
|
||||
[badges.travis-ci]
|
||||
repository = "hsivonen/encoding_rs"
|
||||
simd-accel = ["any_all_workaround"]
|
||||
|
|
|
@ -167,13 +167,15 @@ There are currently these optional cargo features:
|
|||
|
||||
### `simd-accel`
|
||||
|
||||
Enables SIMD acceleration using the nightly-dependent `packed_simd` crate.
|
||||
Enables SIMD acceleration using the nightly-dependent `portable_simd` standard
|
||||
library feature.
|
||||
|
||||
This is an opt-in feature, because enabling this feature _opts out_ of Rust's
|
||||
guarantees of future compilers compiling old code (aka. "stability story").
|
||||
|
||||
Currently, this has not been tested to be an improvement except for these
|
||||
targets:
|
||||
targets and enabling the `simd-accel` feature is expected to break the build
|
||||
on other targets:
|
||||
|
||||
* x86_64
|
||||
* i686
|
||||
|
@ -185,22 +187,6 @@ above, and you are prepared _to have to revise your configuration when updating
|
|||
Rust_, you should enable this feature. Otherwise, please _do not_ enable this
|
||||
feature.
|
||||
|
||||
_Note!_ If you are compiling for a target that does not have 128-bit SIMD
|
||||
enabled as part of the target definition and you are enabling 128-bit SIMD
|
||||
using `-C target_feature`, you need to enable the `core_arch` Cargo feature
|
||||
for `packed_simd` to compile a crates.io snapshot of `core_arch` instead of
|
||||
using the standard-library copy of `core::arch`, because the `core::arch`
|
||||
module of the pre-compiled standard library has been compiled with the
|
||||
assumption that the CPU doesn't have 128-bit SIMD. At present this applies
|
||||
mainly to 32-bit ARM targets whose first component does not include the
|
||||
substring `neon`.
|
||||
|
||||
The encoding_rs side of things has not been properly set up for POWER,
|
||||
PowerPC, MIPS, etc., SIMD at this time, so even if you were to follow
|
||||
the advice from the previous paragraph, you probably shouldn't use
|
||||
the `simd-accel` option on the less mainstream architectures at this
|
||||
time.
|
||||
|
||||
Used by Firefox.
|
||||
|
||||
### `serde`
|
||||
|
@ -381,8 +367,9 @@ as semver-breaking, because this crate depends on `cfg-if`, which doesn't
|
|||
appear to treat MSRV changes as semver-breaking, so it would be useless for
|
||||
this crate to treat MSRV changes as semver-breaking.
|
||||
|
||||
As of 2021-02-04, MSRV appears to be Rust 1.36.0 for using the crate and
|
||||
As of 2024-04-04, MSRV appears to be Rust 1.36.0 for using the crate and
|
||||
1.42.0 for doc tests to pass without errors about the global allocator.
|
||||
With the `simd-accel` feature, the MSRV is even higher.
|
||||
|
||||
## Compatibility with rust-encoding
|
||||
|
||||
|
@ -446,10 +433,17 @@ To regenerate the generated code:
|
|||
- [x] Add actually fast CJK encode options.
|
||||
- [ ] ~Investigate [Bob Steagall's lookup table acceleration for UTF-8](https://github.com/BobSteagall/CppNow2018/blob/master/FastConversionFromUTF-8/Fast%20Conversion%20From%20UTF-8%20with%20C%2B%2B%2C%20DFAs%2C%20and%20SSE%20Intrinsics%20-%20Bob%20Steagall%20-%20C%2B%2BNow%202018.pdf).~
|
||||
- [x] Provide a build mode that works without `alloc` (with lesser API surface).
|
||||
- [ ] Migrate to `std::simd` once it is stable and declare 1.0.
|
||||
- [x] Migrate to `std::simd` ~once it is stable and declare 1.0.~
|
||||
- [ ] Migrate `unsafe` slice access by larger types than `u8`/`u16` to `align_to`.
|
||||
|
||||
## Release Notes
|
||||
|
||||
### 0.8.34
|
||||
|
||||
* Use the `portable_simd` nightly feature of the standard library instead of the `packed_simd` crate. Only affects the `simd-accel` optional nightly feature.
|
||||
* Internal documentation improvements and minor code improvements around `unsafe`.
|
||||
* Added `rust-version` to `Cargo.toml`.
|
||||
|
||||
### 0.8.33
|
||||
|
||||
* Use `packed_simd` instead of `packed_simd_2` again now that updates are back under the `packed_simd` name. Only affects the `simd-accel` optional nightly feature.
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -34,7 +34,7 @@ use crate::simd_funcs::*;
|
|||
all(target_endian = "little", target_feature = "neon")
|
||||
)
|
||||
))]
|
||||
use packed_simd::u16x8;
|
||||
use core::simd::u16x8;
|
||||
|
||||
use super::DecoderResult;
|
||||
use super::EncoderResult;
|
||||
|
@ -90,19 +90,23 @@ impl Endian for LittleEndian {
|
|||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
struct UnalignedU16Slice {
|
||||
// Safety invariant: ptr must be valid for reading 2*len bytes
|
||||
ptr: *const u8,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl UnalignedU16Slice {
|
||||
/// Safety: ptr must be valid for reading 2*len bytes
|
||||
#[inline(always)]
|
||||
pub unsafe fn new(ptr: *const u8, len: usize) -> UnalignedU16Slice {
|
||||
// Safety: field invariant passed up to caller here
|
||||
UnalignedU16Slice { ptr, len }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn trim_last(&mut self) {
|
||||
assert!(self.len > 0);
|
||||
// Safety: invariant upheld here: a slice is still valid with a shorter len
|
||||
self.len -= 1;
|
||||
}
|
||||
|
||||
|
@ -113,7 +117,9 @@ impl UnalignedU16Slice {
|
|||
assert!(i < self.len);
|
||||
unsafe {
|
||||
let mut u: MaybeUninit<u16> = MaybeUninit::uninit();
|
||||
// Safety: i is at most len - 1, which works here
|
||||
::core::ptr::copy_nonoverlapping(self.ptr.add(i * 2), u.as_mut_ptr() as *mut u8, 2);
|
||||
// Safety: valid read above lets us do this
|
||||
u.assume_init()
|
||||
}
|
||||
}
|
||||
|
@ -121,8 +127,13 @@ impl UnalignedU16Slice {
|
|||
#[cfg(feature = "simd-accel")]
|
||||
#[inline(always)]
|
||||
pub fn simd_at(&self, i: usize) -> u16x8 {
|
||||
// Safety: i/len are on the scale of u16s, each one corresponds to 2 u8s
|
||||
assert!(i + SIMD_STRIDE_SIZE / 2 <= self.len);
|
||||
let byte_index = i * 2;
|
||||
// Safety: load16_unaligned needs SIMD_STRIDE_SIZE=16 u8 elements to read,
|
||||
// or 16/2 = 8 u16 elements to read.
|
||||
// We have checked that we have at least that many above.
|
||||
|
||||
unsafe { to_u16_lanes(load16_unaligned(self.ptr.add(byte_index))) }
|
||||
}
|
||||
|
||||
|
@ -136,6 +147,7 @@ impl UnalignedU16Slice {
|
|||
// XXX the return value should be restricted not to
|
||||
// outlive self.
|
||||
assert!(from <= self.len);
|
||||
// Safety: This upholds the same invariant: `from` is in bounds and we're returning a shorter slice
|
||||
unsafe { UnalignedU16Slice::new(self.ptr.add(from * 2), self.len - from) }
|
||||
}
|
||||
|
||||
|
@ -144,6 +156,8 @@ impl UnalignedU16Slice {
|
|||
pub fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> {
|
||||
assert!(self.len <= other.len());
|
||||
let mut offset = 0;
|
||||
// Safety: SIMD_STRIDE_SIZE is measured in bytes, whereas len is in u16s. We check we can
|
||||
// munch SIMD_STRIDE_SIZE / 2 u16s which means we can write SIMD_STRIDE_SIZE u8s
|
||||
if SIMD_STRIDE_SIZE / 2 <= self.len {
|
||||
let len_minus_stride = self.len - SIMD_STRIDE_SIZE / 2;
|
||||
loop {
|
||||
|
@ -151,6 +165,7 @@ impl UnalignedU16Slice {
|
|||
if E::OPPOSITE_ENDIAN {
|
||||
simd = simd_byte_swap(simd);
|
||||
}
|
||||
// Safety: we have enough space on the other side to write this
|
||||
unsafe {
|
||||
store8_unaligned(other.as_mut_ptr().add(offset), simd);
|
||||
}
|
||||
|
@ -158,6 +173,7 @@ impl UnalignedU16Slice {
|
|||
break;
|
||||
}
|
||||
offset += SIMD_STRIDE_SIZE / 2;
|
||||
// Safety: This ensures we still have space for writing SIMD_STRIDE_SIZE u8s
|
||||
if offset > len_minus_stride {
|
||||
break;
|
||||
}
|
||||
|
@ -236,6 +252,7 @@ fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
|
|||
) -> CopyAsciiResult<usize, (u16, usize)> {
|
||||
let len = ::core::cmp::min(src.len(), dst.len());
|
||||
let mut offset = 0;
|
||||
// Safety: This check ensures we are able to read/write at least SIMD_STRIDE_SIZE elements
|
||||
if SIMD_STRIDE_SIZE <= len {
|
||||
let len_minus_stride = len - SIMD_STRIDE_SIZE;
|
||||
loop {
|
||||
|
@ -249,10 +266,13 @@ fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
|
|||
break;
|
||||
}
|
||||
let packed = simd_pack(first, second);
|
||||
// Safety: We are able to write SIMD_STRIDE_SIZE elements in this iteration
|
||||
unsafe {
|
||||
store16_unaligned(dst.as_mut_ptr().add(offset), packed);
|
||||
}
|
||||
offset += SIMD_STRIDE_SIZE;
|
||||
// Safety: This is `offset > len - SIMD_STRIDE_SIZE`, which ensures that we can write at least SIMD_STRIDE_SIZE elements
|
||||
// in the next iteration
|
||||
if offset > len_minus_stride {
|
||||
break;
|
||||
}
|
||||
|
@ -637,7 +657,7 @@ impl<'a> Utf16Destination<'a> {
|
|||
self.write_code_unit((0xDC00 + (astral & 0x3FF)) as u16);
|
||||
}
|
||||
#[inline(always)]
|
||||
pub fn write_surrogate_pair(&mut self, high: u16, low: u16) {
|
||||
fn write_surrogate_pair(&mut self, high: u16, low: u16) {
|
||||
self.write_code_unit(high);
|
||||
self.write_code_unit(low);
|
||||
}
|
||||
|
@ -646,6 +666,7 @@ impl<'a> Utf16Destination<'a> {
|
|||
self.write_bmp_excl_ascii(combined);
|
||||
self.write_bmp_excl_ascii(combining);
|
||||
}
|
||||
// Safety-usable invariant: CopyAsciiResult::GoOn will only contain bytes >=0x80
|
||||
#[inline(always)]
|
||||
pub fn copy_ascii_from_check_space_bmp<'b>(
|
||||
&'b mut self,
|
||||
|
@ -659,6 +680,8 @@ impl<'a> Utf16Destination<'a> {
|
|||
} else {
|
||||
(DecoderResult::InputEmpty, src_remaining.len())
|
||||
};
|
||||
// Safety: This function is documented as needing valid pointers for src/dest and len, which
|
||||
// is true since we've passed the minumum length of the two
|
||||
match unsafe {
|
||||
ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
|
||||
} {
|
||||
|
@ -667,16 +690,20 @@ impl<'a> Utf16Destination<'a> {
|
|||
self.pos += length;
|
||||
return CopyAsciiResult::Stop((pending, source.pos, self.pos));
|
||||
}
|
||||
// Safety: the function is documented as returning bytes >=0x80 in the Some
|
||||
Some((non_ascii, consumed)) => {
|
||||
source.pos += consumed;
|
||||
self.pos += consumed;
|
||||
source.pos += 1; // +1 for non_ascii
|
||||
// Safety: non-ascii bubbled out here
|
||||
non_ascii
|
||||
}
|
||||
}
|
||||
};
|
||||
// Safety: non-ascii returned here
|
||||
CopyAsciiResult::GoOn((non_ascii_ret, Utf16BmpHandle::new(self)))
|
||||
}
|
||||
// Safety-usable invariant: CopyAsciiResult::GoOn will only contain bytes >=0x80
|
||||
#[inline(always)]
|
||||
pub fn copy_ascii_from_check_space_astral<'b>(
|
||||
&'b mut self,
|
||||
|
@ -691,6 +718,8 @@ impl<'a> Utf16Destination<'a> {
|
|||
} else {
|
||||
(DecoderResult::InputEmpty, src_remaining.len())
|
||||
};
|
||||
// Safety: This function is documented as needing valid pointers for src/dest and len, which
|
||||
// is true since we've passed the minumum length of the two
|
||||
match unsafe {
|
||||
ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
|
||||
} {
|
||||
|
@ -699,11 +728,13 @@ impl<'a> Utf16Destination<'a> {
|
|||
self.pos += length;
|
||||
return CopyAsciiResult::Stop((pending, source.pos, self.pos));
|
||||
}
|
||||
// Safety: the function is documented as returning bytes >=0x80 in the Some
|
||||
Some((non_ascii, consumed)) => {
|
||||
source.pos += consumed;
|
||||
self.pos += consumed;
|
||||
if self.pos + 1 < dst_len {
|
||||
source.pos += 1; // +1 for non_ascii
|
||||
// Safety: non-ascii bubbled out here
|
||||
non_ascii
|
||||
} else {
|
||||
return CopyAsciiResult::Stop((
|
||||
|
@ -715,6 +746,7 @@ impl<'a> Utf16Destination<'a> {
|
|||
}
|
||||
}
|
||||
};
|
||||
// Safety: non-ascii returned here
|
||||
CopyAsciiResult::GoOn((non_ascii_ret, Utf16AstralHandle::new(self)))
|
||||
}
|
||||
#[inline(always)]
|
||||
|
|
|
@ -689,7 +689,7 @@
|
|||
//! for discussion about the UTF-16 family.
|
||||
|
||||
#![no_std]
|
||||
#![cfg_attr(feature = "simd-accel", feature(core_intrinsics))]
|
||||
#![cfg_attr(feature = "simd-accel", feature(core_intrinsics, portable_simd))]
|
||||
|
||||
#[cfg(feature = "alloc")]
|
||||
#[cfg_attr(test, macro_use)]
|
||||
|
@ -699,17 +699,6 @@ extern crate core;
|
|||
#[macro_use]
|
||||
extern crate cfg_if;
|
||||
|
||||
#[cfg(all(
|
||||
feature = "simd-accel",
|
||||
any(
|
||||
target_feature = "sse2",
|
||||
all(target_endian = "little", target_arch = "aarch64"),
|
||||
all(target_endian = "little", target_feature = "neon")
|
||||
)
|
||||
))]
|
||||
#[macro_use(shuffle)]
|
||||
extern crate packed_simd;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
extern crate serde;
|
||||
|
||||
|
|
|
@ -116,6 +116,11 @@ macro_rules! by_unit_check_alu {
|
|||
}
|
||||
let len_minus_stride = len - ALU_ALIGNMENT / unit_size;
|
||||
if offset + (4 * (ALU_ALIGNMENT / unit_size)) <= len {
|
||||
// Safety: the above check lets us perform 4 consecutive reads of
|
||||
// length ALU_ALIGNMENT / unit_size. ALU_ALIGNMENT is the size of usize, and unit_size
|
||||
// is the size of the `src` pointer, so this is equal to performing four usize reads.
|
||||
//
|
||||
// This invariant is upheld on all loop iterations
|
||||
let len_minus_unroll = len - (4 * (ALU_ALIGNMENT / unit_size));
|
||||
loop {
|
||||
let unroll_accu = unsafe { *(src.add(offset) as *const usize) }
|
||||
|
@ -134,12 +139,14 @@ macro_rules! by_unit_check_alu {
|
|||
return false;
|
||||
}
|
||||
offset += 4 * (ALU_ALIGNMENT / unit_size);
|
||||
// Safety: this check lets us continue to perform the 4 reads earlier
|
||||
if offset > len_minus_unroll {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
while offset <= len_minus_stride {
|
||||
// Safety: the above check lets us perform one usize read.
|
||||
accu |= unsafe { *(src.add(offset) as *const usize) };
|
||||
offset += ALU_ALIGNMENT / unit_size;
|
||||
}
|
||||
|
@ -189,6 +196,11 @@ macro_rules! by_unit_check_simd {
|
|||
}
|
||||
let len_minus_stride = len - SIMD_STRIDE_SIZE / unit_size;
|
||||
if offset + (4 * (SIMD_STRIDE_SIZE / unit_size)) <= len {
|
||||
// Safety: the above check lets us perform 4 consecutive reads of
|
||||
// length SIMD_STRIDE_SIZE / unit_size. SIMD_STRIDE_SIZE is the size of $simd_ty, and unit_size
|
||||
// is the size of the `src` pointer, so this is equal to performing four $simd_ty reads.
|
||||
//
|
||||
// This invariant is upheld on all loop iterations
|
||||
let len_minus_unroll = len - (4 * (SIMD_STRIDE_SIZE / unit_size));
|
||||
loop {
|
||||
let unroll_accu = unsafe { *(src.add(offset) as *const $simd_ty) }
|
||||
|
@ -208,6 +220,7 @@ macro_rules! by_unit_check_simd {
|
|||
return false;
|
||||
}
|
||||
offset += 4 * (SIMD_STRIDE_SIZE / unit_size);
|
||||
// Safety: this check lets us continue to perform the 4 reads earlier
|
||||
if offset > len_minus_unroll {
|
||||
break;
|
||||
}
|
||||
|
@ -215,6 +228,7 @@ macro_rules! by_unit_check_simd {
|
|||
}
|
||||
let mut simd_accu = $splat;
|
||||
while offset <= len_minus_stride {
|
||||
// Safety: the above check lets us perform one $simd_ty read.
|
||||
simd_accu = simd_accu | unsafe { *(src.add(offset) as *const $simd_ty) };
|
||||
offset += SIMD_STRIDE_SIZE / unit_size;
|
||||
}
|
||||
|
@ -234,8 +248,8 @@ macro_rules! by_unit_check_simd {
|
|||
cfg_if! {
|
||||
if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
|
||||
use crate::simd_funcs::*;
|
||||
use packed_simd::u8x16;
|
||||
use packed_simd::u16x8;
|
||||
use core::simd::u8x16;
|
||||
use core::simd::u16x8;
|
||||
|
||||
const SIMD_ALIGNMENT: usize = 16;
|
||||
|
||||
|
|
|
@ -7,55 +7,74 @@
|
|||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use packed_simd::u16x8;
|
||||
use packed_simd::u8x16;
|
||||
use packed_simd::IntoBits;
|
||||
use any_all_workaround::all_mask16x8;
|
||||
use any_all_workaround::all_mask8x16;
|
||||
use any_all_workaround::any_mask16x8;
|
||||
use any_all_workaround::any_mask8x16;
|
||||
use core::simd::cmp::SimdPartialEq;
|
||||
use core::simd::cmp::SimdPartialOrd;
|
||||
use core::simd::mask16x8;
|
||||
use core::simd::mask8x16;
|
||||
use core::simd::simd_swizzle;
|
||||
use core::simd::u16x8;
|
||||
use core::simd::u8x16;
|
||||
use core::simd::ToBytes;
|
||||
|
||||
// TODO: Migrate unaligned access to stdlib code if/when the RFC
|
||||
// https://github.com/rust-lang/rfcs/pull/1725 is implemented.
|
||||
|
||||
/// Safety invariant: ptr must be valid for an unaligned read of 16 bytes
|
||||
#[inline(always)]
|
||||
pub unsafe fn load16_unaligned(ptr: *const u8) -> u8x16 {
|
||||
let mut simd = ::core::mem::uninitialized();
|
||||
::core::ptr::copy_nonoverlapping(ptr, &mut simd as *mut u8x16 as *mut u8, 16);
|
||||
simd
|
||||
let mut simd = ::core::mem::MaybeUninit::<u8x16>::uninit();
|
||||
::core::ptr::copy_nonoverlapping(ptr, simd.as_mut_ptr() as *mut u8, 16);
|
||||
// Safety: copied 16 bytes of initialized memory into this, it is now initialized
|
||||
simd.assume_init()
|
||||
}
|
||||
|
||||
/// Safety invariant: ptr must be valid for an aligned-for-u8x16 read of 16 bytes
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
pub unsafe fn load16_aligned(ptr: *const u8) -> u8x16 {
|
||||
*(ptr as *const u8x16)
|
||||
}
|
||||
|
||||
/// Safety invariant: ptr must be valid for an unaligned store of 16 bytes
|
||||
#[inline(always)]
|
||||
pub unsafe fn store16_unaligned(ptr: *mut u8, s: u8x16) {
|
||||
::core::ptr::copy_nonoverlapping(&s as *const u8x16 as *const u8, ptr, 16);
|
||||
}
|
||||
|
||||
/// Safety invariant: ptr must be valid for an aligned-for-u8x16 store of 16 bytes
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
pub unsafe fn store16_aligned(ptr: *mut u8, s: u8x16) {
|
||||
*(ptr as *mut u8x16) = s;
|
||||
}
|
||||
|
||||
/// Safety invariant: ptr must be valid for an unaligned read of 16 bytes
|
||||
#[inline(always)]
|
||||
pub unsafe fn load8_unaligned(ptr: *const u16) -> u16x8 {
|
||||
let mut simd = ::core::mem::uninitialized();
|
||||
::core::ptr::copy_nonoverlapping(ptr as *const u8, &mut simd as *mut u16x8 as *mut u8, 16);
|
||||
simd
|
||||
let mut simd = ::core::mem::MaybeUninit::<u16x8>::uninit();
|
||||
::core::ptr::copy_nonoverlapping(ptr as *const u8, simd.as_mut_ptr() as *mut u8, 16);
|
||||
// Safety: copied 16 bytes of initialized memory into this, it is now initialized
|
||||
simd.assume_init()
|
||||
}
|
||||
|
||||
/// Safety invariant: ptr must be valid for an aligned-for-u16x8 read of 16 bytes
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
pub unsafe fn load8_aligned(ptr: *const u16) -> u16x8 {
|
||||
*(ptr as *const u16x8)
|
||||
}
|
||||
|
||||
/// Safety invariant: ptr must be valid for an unaligned store of 16 bytes
|
||||
#[inline(always)]
|
||||
pub unsafe fn store8_unaligned(ptr: *mut u16, s: u16x8) {
|
||||
::core::ptr::copy_nonoverlapping(&s as *const u16x8 as *const u8, ptr as *mut u8, 16);
|
||||
}
|
||||
|
||||
/// Safety invariant: ptr must be valid for an aligned-for-u16x8 store of 16 bytes
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
pub unsafe fn store8_aligned(ptr: *mut u16, s: u16x8) {
|
||||
|
@ -100,7 +119,7 @@ pub fn simd_byte_swap(s: u16x8) -> u16x8 {
|
|||
|
||||
#[inline(always)]
|
||||
pub fn to_u16_lanes(s: u8x16) -> u16x8 {
|
||||
s.into_bits()
|
||||
u16x8::from_ne_bytes(s)
|
||||
}
|
||||
|
||||
cfg_if! {
|
||||
|
@ -108,10 +127,11 @@ cfg_if! {
|
|||
|
||||
// Expose low-level mask instead of higher-level conclusion,
|
||||
// because the non-ASCII case would perform less well otherwise.
|
||||
// Safety-usable invariant: This returned value is whether each high bit is set
|
||||
#[inline(always)]
|
||||
pub fn mask_ascii(s: u8x16) -> i32 {
|
||||
unsafe {
|
||||
_mm_movemask_epi8(s.into_bits())
|
||||
_mm_movemask_epi8(s.into())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -125,14 +145,16 @@ cfg_if! {
|
|||
#[inline(always)]
|
||||
pub fn simd_is_ascii(s: u8x16) -> bool {
|
||||
unsafe {
|
||||
_mm_movemask_epi8(s.into_bits()) == 0
|
||||
// Safety: We have cfg()d the correct platform
|
||||
_mm_movemask_epi8(s.into()) == 0
|
||||
}
|
||||
}
|
||||
} else if #[cfg(target_arch = "aarch64")]{
|
||||
#[inline(always)]
|
||||
pub fn simd_is_ascii(s: u8x16) -> bool {
|
||||
unsafe {
|
||||
vmaxvq_u8(s.into_bits()) < 0x80
|
||||
// Safety: We have cfg()d the correct platform
|
||||
vmaxvq_u8(s.into()) < 0x80
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -141,7 +163,7 @@ cfg_if! {
|
|||
// This optimizes better on ARM than
|
||||
// the lt formulation.
|
||||
let highest_ascii = u8x16::splat(0x7F);
|
||||
!s.gt(highest_ascii).any()
|
||||
!any_mask8x16(s.simd_gt(highest_ascii))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -154,20 +176,21 @@ cfg_if! {
|
|||
return true;
|
||||
}
|
||||
let above_str_latin1 = u8x16::splat(0xC4);
|
||||
s.lt(above_str_latin1).all()
|
||||
s.simd_lt(above_str_latin1).all()
|
||||
}
|
||||
} else if #[cfg(target_arch = "aarch64")]{
|
||||
#[inline(always)]
|
||||
pub fn simd_is_str_latin1(s: u8x16) -> bool {
|
||||
unsafe {
|
||||
vmaxvq_u8(s.into_bits()) < 0xC4
|
||||
// Safety: We have cfg()d the correct platform
|
||||
vmaxvq_u8(s.into()) < 0xC4
|
||||
}
|
||||
}
|
||||
} else {
|
||||
#[inline(always)]
|
||||
pub fn simd_is_str_latin1(s: u8x16) -> bool {
|
||||
let above_str_latin1 = u8x16::splat(0xC4);
|
||||
s.lt(above_str_latin1).all()
|
||||
all_mask8x16(s.simd_lt(above_str_latin1))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -177,21 +200,23 @@ cfg_if! {
|
|||
#[inline(always)]
|
||||
pub fn simd_is_basic_latin(s: u16x8) -> bool {
|
||||
unsafe {
|
||||
vmaxvq_u16(s.into_bits()) < 0x80
|
||||
// Safety: We have cfg()d the correct platform
|
||||
vmaxvq_u16(s.into()) < 0x80
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn simd_is_latin1(s: u16x8) -> bool {
|
||||
unsafe {
|
||||
vmaxvq_u16(s.into_bits()) < 0x100
|
||||
// Safety: We have cfg()d the correct platform
|
||||
vmaxvq_u16(s.into()) < 0x100
|
||||
}
|
||||
}
|
||||
} else {
|
||||
#[inline(always)]
|
||||
pub fn simd_is_basic_latin(s: u16x8) -> bool {
|
||||
let above_ascii = u16x8::splat(0x80);
|
||||
s.lt(above_ascii).all()
|
||||
all_mask16x8(s.simd_lt(above_ascii))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
|
@ -200,7 +225,7 @@ cfg_if! {
|
|||
// seems faster in this case while the above
|
||||
// function is better the other way round...
|
||||
let highest_latin1 = u16x8::splat(0xFF);
|
||||
!s.gt(highest_latin1).any()
|
||||
!any_mask16x8(s.simd_gt(highest_latin1))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -209,7 +234,7 @@ cfg_if! {
|
|||
pub fn contains_surrogates(s: u16x8) -> bool {
|
||||
let mask = u16x8::splat(0xF800);
|
||||
let surrogate_bits = u16x8::splat(0xD800);
|
||||
(s & mask).eq(surrogate_bits).any()
|
||||
any_mask16x8((s & mask).simd_eq(surrogate_bits))
|
||||
}
|
||||
|
||||
cfg_if! {
|
||||
|
@ -217,7 +242,8 @@ cfg_if! {
|
|||
macro_rules! aarch64_return_false_if_below_hebrew {
|
||||
($s:ident) => ({
|
||||
unsafe {
|
||||
if vmaxvq_u16($s.into_bits()) < 0x0590 {
|
||||
// Safety: We have cfg()d the correct platform
|
||||
if vmaxvq_u16($s.into()) < 0x0590 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -234,7 +260,7 @@ cfg_if! {
|
|||
|
||||
macro_rules! non_aarch64_return_false_if_all {
|
||||
($s:ident) => ({
|
||||
if $s.all() {
|
||||
if all_mask16x8($s) {
|
||||
return false;
|
||||
}
|
||||
})
|
||||
|
@ -245,7 +271,7 @@ cfg_if! {
|
|||
macro_rules! in_range16x8 {
|
||||
($s:ident, $start:expr, $end:expr) => {{
|
||||
// SIMD sub is wrapping
|
||||
($s - u16x8::splat($start)).lt(u16x8::splat($end - $start))
|
||||
($s - u16x8::splat($start)).simd_lt(u16x8::splat($end - $start))
|
||||
}};
|
||||
}
|
||||
|
||||
|
@ -259,43 +285,44 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool {
|
|||
|
||||
aarch64_return_false_if_below_hebrew!(s);
|
||||
|
||||
let below_hebrew = s.lt(u16x8::splat(0x0590));
|
||||
let below_hebrew = s.simd_lt(u16x8::splat(0x0590));
|
||||
|
||||
non_aarch64_return_false_if_all!(below_hebrew);
|
||||
|
||||
if (below_hebrew | in_range16x8!(s, 0x0900, 0x200F) | in_range16x8!(s, 0x2068, 0xD802)).all() {
|
||||
if all_mask16x8(
|
||||
below_hebrew | in_range16x8!(s, 0x0900, 0x200F) | in_range16x8!(s, 0x2068, 0xD802),
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Quick refutation failed. Let's do the full check.
|
||||
|
||||
(in_range16x8!(s, 0x0590, 0x0900)
|
||||
| in_range16x8!(s, 0xFB1D, 0xFE00)
|
||||
| in_range16x8!(s, 0xFE70, 0xFEFF)
|
||||
| in_range16x8!(s, 0xD802, 0xD804)
|
||||
| in_range16x8!(s, 0xD83A, 0xD83C)
|
||||
| s.eq(u16x8::splat(0x200F))
|
||||
| s.eq(u16x8::splat(0x202B))
|
||||
| s.eq(u16x8::splat(0x202E))
|
||||
| s.eq(u16x8::splat(0x2067)))
|
||||
.any()
|
||||
any_mask16x8(
|
||||
(in_range16x8!(s, 0x0590, 0x0900)
|
||||
| in_range16x8!(s, 0xFB1D, 0xFE00)
|
||||
| in_range16x8!(s, 0xFE70, 0xFEFF)
|
||||
| in_range16x8!(s, 0xD802, 0xD804)
|
||||
| in_range16x8!(s, 0xD83A, 0xD83C)
|
||||
| s.simd_eq(u16x8::splat(0x200F))
|
||||
| s.simd_eq(u16x8::splat(0x202B))
|
||||
| s.simd_eq(u16x8::splat(0x202E))
|
||||
| s.simd_eq(u16x8::splat(0x2067))),
|
||||
)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn simd_unpack(s: u8x16) -> (u16x8, u16x8) {
|
||||
unsafe {
|
||||
let first: u8x16 = shuffle!(
|
||||
s,
|
||||
u8x16::splat(0),
|
||||
[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
|
||||
);
|
||||
let second: u8x16 = shuffle!(
|
||||
s,
|
||||
u8x16::splat(0),
|
||||
[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
|
||||
);
|
||||
(first.into_bits(), second.into_bits())
|
||||
}
|
||||
let first: u8x16 = simd_swizzle!(
|
||||
s,
|
||||
u8x16::splat(0),
|
||||
[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
|
||||
);
|
||||
let second: u8x16 = simd_swizzle!(
|
||||
s,
|
||||
u8x16::splat(0),
|
||||
[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
|
||||
);
|
||||
(u16x8::from_ne_bytes(first), u16x8::from_ne_bytes(second))
|
||||
}
|
||||
|
||||
cfg_if! {
|
||||
|
@ -303,21 +330,20 @@ cfg_if! {
|
|||
#[inline(always)]
|
||||
pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 {
|
||||
unsafe {
|
||||
_mm_packus_epi16(a.into_bits(), b.into_bits()).into_bits()
|
||||
// Safety: We have cfg()d the correct platform
|
||||
_mm_packus_epi16(a.into(), b.into()).into()
|
||||
}
|
||||
}
|
||||
} else {
|
||||
#[inline(always)]
|
||||
pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 {
|
||||
unsafe {
|
||||
let first: u8x16 = a.into_bits();
|
||||
let second: u8x16 = b.into_bits();
|
||||
shuffle!(
|
||||
first,
|
||||
second,
|
||||
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
|
||||
)
|
||||
}
|
||||
let first: u8x16 = a.to_ne_bytes();
|
||||
let second: u8x16 = b.to_ne_bytes();
|
||||
simd_swizzle!(
|
||||
first,
|
||||
second,
|
||||
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,6 +53,9 @@ impl SingleByteDecoder {
|
|||
// statically omit the bound check when accessing
|
||||
// `[u16; 128]` with an index
|
||||
// `non_ascii as usize - 0x80usize`.
|
||||
//
|
||||
// Safety: `non_ascii` is a u8 byte >=0x80, from the invariants
|
||||
// on Utf8Destination::copy_ascii_from_check_space_bmp()
|
||||
let mapped =
|
||||
unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) };
|
||||
// let mapped = self.table[non_ascii as usize - 0x80usize];
|
||||
|
@ -151,9 +154,12 @@ impl SingleByteDecoder {
|
|||
} else {
|
||||
(DecoderResult::InputEmpty, src.len())
|
||||
};
|
||||
// Safety invariant: converted <= length. Quite often we have `converted < length`
|
||||
// which will be separately marked.
|
||||
let mut converted = 0usize;
|
||||
'outermost: loop {
|
||||
match unsafe {
|
||||
// Safety: length is the minimum length, `src/dst + x` will always be valid for reads/writes of `len - x`
|
||||
ascii_to_basic_latin(
|
||||
src.as_ptr().add(converted),
|
||||
dst.as_mut_ptr().add(converted),
|
||||
|
@ -164,6 +170,12 @@ impl SingleByteDecoder {
|
|||
return (pending, length, length);
|
||||
}
|
||||
Some((mut non_ascii, consumed)) => {
|
||||
// Safety invariant: `converted <= length` upheld, since this can only consume
|
||||
// up to `length - converted` bytes.
|
||||
//
|
||||
// Furthermore, in this context,
|
||||
// we can assume `converted < length` since this branch is only ever hit when
|
||||
// ascii_to_basic_latin fails to consume the entire slice
|
||||
converted += consumed;
|
||||
'middle: loop {
|
||||
// `converted` doesn't count the reading of `non_ascii` yet.
|
||||
|
@ -172,6 +184,9 @@ impl SingleByteDecoder {
|
|||
// statically omit the bound check when accessing
|
||||
// `[u16; 128]` with an index
|
||||
// `non_ascii as usize - 0x80usize`.
|
||||
//
|
||||
// Safety: We can rely on `non_ascii` being between `0x80` and `0xFF` due to
|
||||
// the invariants of `ascii_to_basic_latin()`, and our table has enough space for that.
|
||||
let mapped =
|
||||
unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) };
|
||||
// let mapped = self.table[non_ascii as usize - 0x80usize];
|
||||
|
@ -183,9 +198,10 @@ impl SingleByteDecoder {
|
|||
);
|
||||
}
|
||||
unsafe {
|
||||
// The bound check has already been performed
|
||||
// Safety: As mentioned above, `converted < length`
|
||||
*(dst.get_unchecked_mut(converted)) = mapped;
|
||||
}
|
||||
// Safety: `converted <= length` upheld, since `converted < length` before this
|
||||
converted += 1;
|
||||
// Next, handle ASCII punctuation and non-ASCII without
|
||||
// going back to ASCII acceleration. Non-ASCII scripts
|
||||
|
@ -198,7 +214,10 @@ impl SingleByteDecoder {
|
|||
if converted == length {
|
||||
return (pending, length, length);
|
||||
}
|
||||
// Safety: We are back to `converted < length` because of the == above
|
||||
// and can perform this check.
|
||||
let mut b = unsafe { *(src.get_unchecked(converted)) };
|
||||
// Safety: `converted < length` is upheld for this loop
|
||||
'innermost: loop {
|
||||
if b > 127 {
|
||||
non_ascii = b;
|
||||
|
@ -208,15 +227,20 @@ impl SingleByteDecoder {
|
|||
// byte unconditionally instead of trying to unread it
|
||||
// to make it part of the next SIMD stride.
|
||||
unsafe {
|
||||
// Safety: `converted < length` is true for this loop
|
||||
*(dst.get_unchecked_mut(converted)) = u16::from(b);
|
||||
}
|
||||
// Safety: We are now at `converted <= length`. We should *not* `continue`
|
||||
// the loop without reverifying
|
||||
converted += 1;
|
||||
if b < 60 {
|
||||
// We've got punctuation
|
||||
if converted == length {
|
||||
return (pending, length, length);
|
||||
}
|
||||
// Safety: we're back to `converted <= length` because of the == above
|
||||
b = unsafe { *(src.get_unchecked(converted)) };
|
||||
// Safety: The loop continues as `converted < length`
|
||||
continue 'innermost;
|
||||
}
|
||||
// We've got markup or ASCII text
|
||||
|
@ -234,6 +258,8 @@ impl SingleByteDecoder {
|
|||
loop {
|
||||
if let Some((non_ascii, offset)) = validate_ascii(bytes) {
|
||||
total += offset;
|
||||
// Safety: We can rely on `non_ascii` being between `0x80` and `0xFF` due to
|
||||
// the invariants of `ascii_to_basic_latin()`, and our table has enough space for that.
|
||||
let mapped = unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) };
|
||||
if mapped != u16::from(non_ascii) {
|
||||
return total;
|
||||
|
@ -384,9 +410,12 @@ impl SingleByteEncoder {
|
|||
} else {
|
||||
(EncoderResult::InputEmpty, src.len())
|
||||
};
|
||||
// Safety invariant: converted <= length. Quite often we have `converted < length`
|
||||
// which will be separately marked.
|
||||
let mut converted = 0usize;
|
||||
'outermost: loop {
|
||||
match unsafe {
|
||||
// Safety: length is the minimum length, `src/dst + x` will always be valid for reads/writes of `len - x`
|
||||
basic_latin_to_ascii(
|
||||
src.as_ptr().add(converted),
|
||||
dst.as_mut_ptr().add(converted),
|
||||
|
@ -397,15 +426,23 @@ impl SingleByteEncoder {
|
|||
return (pending, length, length);
|
||||
}
|
||||
Some((mut non_ascii, consumed)) => {
|
||||
// Safety invariant: `converted <= length` upheld, since this can only consume
|
||||
// up to `length - converted` bytes.
|
||||
//
|
||||
// Furthermore, in this context,
|
||||
// we can assume `converted < length` since this branch is only ever hit when
|
||||
// ascii_to_basic_latin fails to consume the entire slice
|
||||
converted += consumed;
|
||||
'middle: loop {
|
||||
// `converted` doesn't count the reading of `non_ascii` yet.
|
||||
match self.encode_u16(non_ascii) {
|
||||
Some(byte) => {
|
||||
unsafe {
|
||||
// Safety: we're allowed this access since `converted < length`
|
||||
*(dst.get_unchecked_mut(converted)) = byte;
|
||||
}
|
||||
converted += 1;
|
||||
// `converted <= length` now
|
||||
}
|
||||
None => {
|
||||
// At this point, we need to know if we
|
||||
|
@ -421,6 +458,8 @@ impl SingleByteEncoder {
|
|||
converted,
|
||||
);
|
||||
}
|
||||
// Safety: convered < length from outside the match, and `converted + 1 != length`,
|
||||
// So `converted + 1 < length` as well. We're in bounds
|
||||
let second =
|
||||
u32::from(unsafe { *src.get_unchecked(converted + 1) });
|
||||
if second & 0xFC00u32 != 0xDC00u32 {
|
||||
|
@ -432,6 +471,18 @@ impl SingleByteEncoder {
|
|||
}
|
||||
// The next code unit is a low surrogate.
|
||||
let astral: char = unsafe {
|
||||
// Safety: We can rely on non_ascii being 0xD800-0xDBFF since the high bits are 0xD800
|
||||
// Then, (non_ascii << 10 - 0xD800 << 10) becomes between (0 to 0x3FF) << 10, which is between
|
||||
// 0x400 to 0xffc00. Adding the 0x10000 gives a range of 0x10400 to 0x10fc00. Subtracting the 0xDC00
|
||||
// gives 0x2800 to 0x102000
|
||||
// The second term is between 0xDC00 and 0xDFFF from the check above. This gives a maximum
|
||||
// possible range of (0x10400 + 0xDC00) to (0x102000 + 0xDFFF) which is 0x1E000 to 0x10ffff.
|
||||
// This is in range.
|
||||
//
|
||||
// From a Unicode principles perspective this can also be verified as we have checked that `non_ascii` is a high surrogate
|
||||
// (0xD800..=0xDBFF), and that `second` is a low surrogate (`0xDC00..=0xDFFF`), and we are applying reverse of the UTC16 transformation
|
||||
// algorithm <https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF>, by applying the high surrogate - 0xD800 to the
|
||||
// high ten bits, and the low surrogate - 0xDc00 to the low ten bits, and then adding 0x10000
|
||||
::core::char::from_u32_unchecked(
|
||||
(u32::from(non_ascii) << 10) + second
|
||||
- (((0xD800u32 << 10) - 0x1_0000u32) + 0xDC00u32),
|
||||
|
@ -456,6 +507,7 @@ impl SingleByteEncoder {
|
|||
converted + 1, // +1 `for non_ascii`
|
||||
converted,
|
||||
);
|
||||
// Safety: This branch diverges, so no need to uphold invariants on `converted`
|
||||
}
|
||||
}
|
||||
// Next, handle ASCII punctuation and non-ASCII without
|
||||
|
@ -469,8 +521,12 @@ impl SingleByteEncoder {
|
|||
if converted == length {
|
||||
return (pending, length, length);
|
||||
}
|
||||
// Safety: we're back to `converted < length` due to the == above and can perform
|
||||
// the unchecked read
|
||||
let mut unit = unsafe { *(src.get_unchecked(converted)) };
|
||||
'innermost: loop {
|
||||
// Safety: This loop always begins with `converted < length`, see
|
||||
// the invariant outside and the comment on the continue below
|
||||
if unit > 127 {
|
||||
non_ascii = unit;
|
||||
continue 'middle;
|
||||
|
@ -479,19 +535,25 @@ impl SingleByteEncoder {
|
|||
// byte unconditionally instead of trying to unread it
|
||||
// to make it part of the next SIMD stride.
|
||||
unsafe {
|
||||
// Safety: Can rely on converted < length
|
||||
*(dst.get_unchecked_mut(converted)) = unit as u8;
|
||||
}
|
||||
converted += 1;
|
||||
// `converted <= length` here
|
||||
if unit < 60 {
|
||||
// We've got punctuation
|
||||
if converted == length {
|
||||
return (pending, length, length);
|
||||
}
|
||||
// Safety: `converted < length` due to the == above. The read is safe.
|
||||
unit = unsafe { *(src.get_unchecked(converted)) };
|
||||
// Safety: This only happens if `converted < length`, maintaining it
|
||||
continue 'innermost;
|
||||
}
|
||||
// We've got markup or ASCII text
|
||||
continue 'outermost;
|
||||
// Safety: All other routes to here diverge so the continue is the only
|
||||
// way to run the innermost loop.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,12 +14,13 @@ use crate::variant::*;
|
|||
cfg_if! {
|
||||
if #[cfg(feature = "simd-accel")] {
|
||||
use simd_funcs::*;
|
||||
use packed_simd::u16x8;
|
||||
use core::simd::u16x8;
|
||||
use core::simd::cmp::SimdPartialOrd;
|
||||
|
||||
#[inline(always)]
|
||||
fn shift_upper(unpacked: u16x8) -> u16x8 {
|
||||
let highest_ascii = u16x8::splat(0x7F);
|
||||
unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) }
|
||||
unpacked + unpacked.simd_gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) }
|
||||
} else {
|
||||
}
|
||||
}
|
||||
|
@ -116,10 +117,15 @@ impl UserDefinedDecoder {
|
|||
let simd_iterations = length >> 4;
|
||||
let src_ptr = src.as_ptr();
|
||||
let dst_ptr = dst.as_mut_ptr();
|
||||
// Safety: This is `for i in 0..length / 16`
|
||||
for i in 0..simd_iterations {
|
||||
// Safety: This is in bounds: length is the minumum valid length for both src/dst
|
||||
// and i ranges to length/16, so multiplying by 16 will always be `< length` and can do
|
||||
// a 16 byte read
|
||||
let input = unsafe { load16_unaligned(src_ptr.add(i * 16)) };
|
||||
let (first, second) = simd_unpack(input);
|
||||
unsafe {
|
||||
// Safety: same as above, but this is two consecutive 8-byte reads
|
||||
store8_unaligned(dst_ptr.add(i * 16), shift_upper(first));
|
||||
store8_unaligned(dst_ptr.add((i * 16) + 8), shift_upper(second));
|
||||
}
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -1,83 +0,0 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies.
|
||||
#
|
||||
# If you are reading this file be aware that the original Cargo.toml
|
||||
# will likely look very different (and much more reasonable).
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
name = "packed_simd"
|
||||
version = "0.3.9"
|
||||
build = "build.rs"
|
||||
description = "Portable Packed SIMD vectors"
|
||||
homepage = "https://github.com/rust-lang/packed_simd"
|
||||
documentation = "https://docs.rs/crate/packed_simd/"
|
||||
readme = "README.md"
|
||||
keywords = [
|
||||
"simd",
|
||||
"vector",
|
||||
"portability",
|
||||
]
|
||||
categories = [
|
||||
"hardware-support",
|
||||
"concurrency",
|
||||
"no-std",
|
||||
"data-structures",
|
||||
]
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/rust-lang/packed_simd"
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
features = ["into_bits"]
|
||||
rustdoc-args = [
|
||||
"--cfg",
|
||||
"doc_cfg",
|
||||
]
|
||||
|
||||
[dependencies.cfg-if]
|
||||
version = "1.0.0"
|
||||
|
||||
[dependencies.core_arch]
|
||||
version = "0.1.5"
|
||||
optional = true
|
||||
|
||||
[dependencies.num-traits]
|
||||
version = "0.2.14"
|
||||
features = ["libm"]
|
||||
default-features = false
|
||||
|
||||
[dev-dependencies.arrayvec]
|
||||
version = "^0.5"
|
||||
default-features = false
|
||||
|
||||
[dev-dependencies.paste]
|
||||
version = "^1"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
into_bits = []
|
||||
libcore_neon = []
|
||||
|
||||
[target."cfg(target_arch = \"x86_64\")".dependencies.sleef-sys]
|
||||
version = "0.1.2"
|
||||
optional = true
|
||||
|
||||
[target.wasm32-unknown-unknown.dev-dependencies.wasm-bindgen]
|
||||
version = "=0.2.87"
|
||||
|
||||
[target.wasm32-unknown-unknown.dev-dependencies.wasm-bindgen-test]
|
||||
version = "=0.3.37"
|
||||
|
||||
[badges.is-it-maintained-issue-resolution]
|
||||
repository = "rust-lang/packed_simd"
|
||||
|
||||
[badges.is-it-maintained-open-issues]
|
||||
repository = "rust-lang/packed_simd"
|
||||
|
||||
[badges.maintenance]
|
||||
status = "experimental"
|
|
@ -1,144 +0,0 @@
|
|||
# `Simd<[T; N]>`
|
||||
|
||||
## Implementation of [Rust RFC #2366: `std::simd`][rfc2366]
|
||||
|
||||
[![Latest Version]][crates.io] [![docs]][master_docs]
|
||||
|
||||
**WARNING**: this crate only supports the most recent nightly Rust toolchain
|
||||
and will be superseded by [`#![feature(portable_simd)]`](https://github.com/rust-lang/portable-simd).
|
||||
|
||||
## Documentation
|
||||
|
||||
* [API docs (`master` branch)][master_docs]
|
||||
* [Performance guide][perf_guide]
|
||||
* [API docs (`docs.rs`)][docs.rs]
|
||||
* [RFC2366 `std::simd`][rfc2366]: - contains motivation, design rationale,
|
||||
discussion, etc.
|
||||
|
||||
## Examples
|
||||
|
||||
Most of the examples come with both a scalar and a vectorized implementation.
|
||||
|
||||
* [`aobench`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench)
|
||||
* [`fannkuch_redux`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/fannkuch_redux)
|
||||
* [`matrix inverse`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/matrix_inverse)
|
||||
* [`mandelbrot`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/mandelbrot)
|
||||
* [`n-body`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/nbody)
|
||||
* [`options_pricing`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/options_pricing)
|
||||
* [`spectral_norm`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/spectral_norm)
|
||||
* [`triangle transform`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/triangle_xform)
|
||||
* [`stencil`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/stencil)
|
||||
* [`vector dot product`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/dot_product)
|
||||
|
||||
## Cargo features
|
||||
|
||||
* `into_bits` (default: disabled): enables `FromBits`/`IntoBits` trait
|
||||
implementations for the vector types. These allow reinterpreting the bits of a
|
||||
vector type as those of another vector type safely by just using the
|
||||
`.into_bits()` method.
|
||||
|
||||
## Performance
|
||||
|
||||
The following [ISPC] examples are also part of `packed_simd`'s
|
||||
[`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/)
|
||||
directory, where `packed_simd`+[`rayon`][rayon] are used to emulate [ISPC]'s
|
||||
Single-Program-Multiple-Data (SPMD) programming model. The performance results
|
||||
on different hardware is shown in the `readme.md` of each example. The following
|
||||
table summarizes the performance ranges, where `+` means speed-up and `-`
|
||||
slowdown:
|
||||
|
||||
* `aobench`: `[-1.02x, +1.53x]`,
|
||||
* `stencil`: `[+1.06x, +1.72x]`,
|
||||
* `mandelbrot`: `[-1.74x, +1.2x]`,
|
||||
* `options_pricing`:
|
||||
* `black_scholes`: `+1.0x`
|
||||
* `binomial_put`: `+1.4x`
|
||||
|
||||
While SPMD is not the intended use case for `packed_simd`, it is possible to
|
||||
combine the library with [`rayon`][rayon] to poorly emulate [ISPC]'s SPMD programming
|
||||
model in Rust. Writing performant code is not as straightforward as with
|
||||
[ISPC], but with some care (e.g. see the [Performance Guide][perf_guide]) one
|
||||
can easily match and often out-perform [ISPC]'s "default performance".
|
||||
|
||||
## Platform support
|
||||
|
||||
The following table describes the supported platforms: `build` shows whether
|
||||
the library compiles without issues for a given target, while `run` shows
|
||||
whether the test suite passes for a given target.
|
||||
|
||||
| **Linux** | **build** | **run** |
|
||||
|---------------------------------------|-----------|---------|
|
||||
| `i586-unknown-linux-gnu` | ✓ | ✗ |
|
||||
| `i686-unknown-linux-gnu` | ✓ | ✗ |
|
||||
| `x86_64-unknown-linux-gnu` | ✓ | ✓ |
|
||||
| `arm-unknown-linux-gnueabihf` | ✓ | ✓ |
|
||||
| `armv7-unknown-linux-gnueabi` | ✓ | ✓ |
|
||||
| `aarch64-unknown-linux-gnu` | ✓ | ✓ |
|
||||
| `powerpc-unknown-linux-gnu` | ✓ | ✗ |
|
||||
| `powerpc64-unknown-linux-gnu` | ✓ | ✗ |
|
||||
| `powerpc64le-unknown-linux-gnu` | ✓ | ✓ |
|
||||
| `s390x-unknown-linux-gnu` | ✓ | ✗ |
|
||||
| `sparc64-unknown-linux-gnu` | ✓ | ✗ |
|
||||
| `thumbv7neon-unknown-linux-gnueabihf` | ✓ | ✓ |
|
||||
| **MacOSX** | **build** | **run** |
|
||||
| `x86_64-apple-darwin` | ✓ | ✓ |
|
||||
| **Android** | **build** | **run** |
|
||||
| `x86_64-linux-android` | ✓ | ✓ |
|
||||
| `armv7-linux-androideabi` | ✓ | ✗ |
|
||||
| `aarch64-linux-android` | ✓ | ✗ |
|
||||
| `thumbv7neon-linux-androideabi` | ✓ | ✗ |
|
||||
| **iOS** | **build** | **run** |
|
||||
| `x86_64-apple-ios` | ✗ | ✗ |
|
||||
| `aarch64-apple-ios` | ✗ | ✗ |
|
||||
|
||||
|
||||
## Machine code verification
|
||||
|
||||
The
|
||||
[`verify/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/verify)
|
||||
crate tests disassembles the portable packed vector APIs at run-time and
|
||||
compares the generated machine code against the desired one to make sure that
|
||||
this crate remains efficient.
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under either of
|
||||
|
||||
* [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
|
||||
([LICENSE-APACHE](LICENSE-APACHE))
|
||||
|
||||
* [MIT License](http://opensource.org/licenses/MIT)
|
||||
([LICENSE-MIT](LICENSE-MIT))
|
||||
|
||||
at your option.
|
||||
|
||||
## Contributing
|
||||
|
||||
We welcome all people who want to contribute.
|
||||
Please see the [contributing instructions] for more information.
|
||||
|
||||
Contributions in any form (issues, pull requests, etc.) to this project
|
||||
must adhere to Rust's [Code of Conduct].
|
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted
|
||||
for inclusion in `packed_simd` by you, as defined in the Apache-2.0 license, shall be
|
||||
dual licensed as above, without any additional terms or conditions.
|
||||
|
||||
[travis]: https://travis-ci.com/rust-lang/packed_simd
|
||||
[Travis-CI Status]: https://travis-ci.com/rust-lang/packed_simd.svg?branch=master
|
||||
[appveyor]: https://ci.appveyor.com/project/gnzlbg/packed-simd
|
||||
[Appveyor Status]: https://ci.appveyor.com/api/projects/status/hd7v9dvr442hgdix?svg=true
|
||||
[Latest Version]: https://img.shields.io/crates/v/packed_simd.svg
|
||||
[crates.io]: https://crates.io/crates/packed_simd
|
||||
[docs]: https://docs.rs/packed_simd/badge.svg
|
||||
[docs.rs]: https://docs.rs/packed_simd
|
||||
[master_docs]: https://rust-lang-nursery.github.io/packed_simd/packed_simd/
|
||||
[perf_guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/
|
||||
[rfc2366]: https://github.com/rust-lang/rfcs/pull/2366
|
||||
[ISPC]: https://ispc.github.io/
|
||||
[rayon]: https://crates.io/crates/rayon
|
||||
[boost_license]: https://www.boost.org/LICENSE_1_0.txt
|
||||
[SLEEF]: https://sleef.org/
|
||||
[sleef_sys]: https://crates.io/crates/sleef-sys
|
||||
[contributing instructions]: contributing.md
|
||||
[Code of Conduct]: https://www.rust-lang.org/en-US/conduct.html
|
|
@ -1,3 +0,0 @@
|
|||
status = [
|
||||
"continuous-integration/travis-ci/push"
|
||||
]
|
|
@ -1,6 +0,0 @@
|
|||
fn main() {
|
||||
let target = std::env::var("TARGET").expect("TARGET environment variable not defined");
|
||||
if target.contains("neon") {
|
||||
println!("cargo:rustc-cfg=libcore_neon");
|
||||
}
|
||||
}
|
|
@ -1,71 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Performs an operation on all targets
|
||||
|
||||
set -ex
|
||||
|
||||
: "${1?The all.sh script requires one argument.}"
|
||||
|
||||
op=$1
|
||||
|
||||
cargo_clean() {
|
||||
cargo clean
|
||||
}
|
||||
|
||||
cargo_check_fmt() {
|
||||
cargo fmt --all -- --check
|
||||
}
|
||||
|
||||
cargo_fmt() {
|
||||
cargo fmt --all
|
||||
}
|
||||
|
||||
cargo_clippy() {
|
||||
cargo clippy --all -- -D clippy::perf
|
||||
}
|
||||
|
||||
CMD="-1"
|
||||
|
||||
case $op in
|
||||
clean*)
|
||||
CMD=cargo_clean
|
||||
;;
|
||||
check_fmt*)
|
||||
CMD=cargo_check_fmt
|
||||
;;
|
||||
fmt*)
|
||||
CMD=cargo_fmt
|
||||
;;
|
||||
clippy)
|
||||
CMD=cargo_clippy
|
||||
;;
|
||||
*)
|
||||
echo "Unknown operation: \"${op}\""
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "Operation is: ${CMD}"
|
||||
|
||||
# On src/
|
||||
$CMD
|
||||
|
||||
# Check examples/
|
||||
for dir in examples/*/
|
||||
do
|
||||
dir=${dir%*/}
|
||||
(
|
||||
cd "${dir%*/}"
|
||||
$CMD
|
||||
)
|
||||
done
|
||||
|
||||
(
|
||||
cd verify/verify
|
||||
$CMD
|
||||
)
|
||||
|
||||
(
|
||||
cd micro_benchmarks
|
||||
$CMD
|
||||
)
|
|
@ -1,21 +0,0 @@
|
|||
#!/usr/bin/env sh
|
||||
# Copyright 2016 The Rust Project Developers. See the COPYRIGHT
|
||||
# file at the top-level directory of this distribution and at
|
||||
# http://rust-lang.org/COPYRIGHT.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
set -ex
|
||||
|
||||
ANDROID_NDK_URL=https://dl.google.com/android/repository
|
||||
ANDROID_NDK_ARCHIVE=android-ndk-r25b-linux.zip
|
||||
|
||||
curl -fO "$ANDROID_NDK_URL/$ANDROID_NDK_ARCHIVE"
|
||||
unzip -q $ANDROID_NDK_ARCHIVE
|
||||
rm $ANDROID_NDK_ARCHIVE
|
||||
mv android-ndk-* ndk
|
||||
rm -rf android-ndk-*
|
|
@ -1,60 +0,0 @@
|
|||
#!/usr/bin/env sh
|
||||
# Copyright 2016 The Rust Project Developers. See the COPYRIGHT
|
||||
# file at the top-level directory of this distribution and at
|
||||
# http://rust-lang.org/COPYRIGHT.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
set -ex
|
||||
|
||||
# Prep the SDK and emulator
|
||||
#
|
||||
# Note that the update process requires that we accept a bunch of licenses, and
|
||||
# we can't just pipe `yes` into it for some reason, so we take the same strategy
|
||||
# located in https://github.com/appunite/docker by just wrapping it in a script
|
||||
# which apparently magically accepts the licenses.
|
||||
|
||||
mkdir sdk
|
||||
curl --retry 5 https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip -O
|
||||
unzip -d sdk sdk-tools-linux-3859397.zip
|
||||
|
||||
case "$1" in
|
||||
arm | armv7)
|
||||
abi=armeabi-v7a
|
||||
;;
|
||||
|
||||
aarch64)
|
||||
abi=arm64-v8a
|
||||
;;
|
||||
|
||||
i686)
|
||||
abi=x86
|
||||
;;
|
||||
|
||||
x86_64)
|
||||
abi=x86_64
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "invalid arch: $1"
|
||||
exit 1
|
||||
;;
|
||||
esac;
|
||||
|
||||
# --no_https avoids
|
||||
# javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: No trusted certificate found
|
||||
yes | ./sdk/tools/bin/sdkmanager --licenses --no_https
|
||||
yes | ./sdk/tools/bin/sdkmanager --no_https \
|
||||
"emulator" \
|
||||
"platform-tools" \
|
||||
"platforms;android-24" \
|
||||
"system-images;android-24;default;$abi"
|
||||
|
||||
echo "no" |
|
||||
./sdk/tools/bin/avdmanager create avd \
|
||||
--name "${1}" \
|
||||
--package "system-images;android-24;default;$abi"
|
|
@ -1,56 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright 2017 The Rust Project Developers. See the COPYRIGHT
|
||||
# file at the top-level directory of this distribution and at
|
||||
# http://rust-lang.org/COPYRIGHT.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
set -ex
|
||||
|
||||
URL=https://dl.google.com/android/repository/sys-img/android
|
||||
|
||||
main() {
|
||||
local arch="${1}"
|
||||
local name="${2}"
|
||||
local dest=/system
|
||||
local td
|
||||
td="$(mktemp -d)"
|
||||
|
||||
apt-get install --no-install-recommends e2tools
|
||||
|
||||
pushd "${td}"
|
||||
curl --retry 5 -O "${URL}/${name}"
|
||||
unzip -q "${name}"
|
||||
|
||||
local system
|
||||
system="$(find . -name system.img)"
|
||||
mkdir -p ${dest}/{bin,lib,lib64}
|
||||
|
||||
# Extract android linker and libraries to /system
|
||||
# This allows android executables to be run directly (or with qemu)
|
||||
if [ "${arch}" = "x86_64" ] || [ "${arch}" = "arm64" ]; then
|
||||
e2cp -p "${system}:/bin/linker64" "${dest}/bin/"
|
||||
e2cp -p "${system}:/lib64/libdl.so" "${dest}/lib64/"
|
||||
e2cp -p "${system}:/lib64/libc.so" "${dest}/lib64/"
|
||||
e2cp -p "${system}:/lib64/libm.so" "${dest}/lib64/"
|
||||
else
|
||||
e2cp -p "${system}:/bin/linker" "${dest}/bin/"
|
||||
e2cp -p "${system}:/lib/libdl.so" "${dest}/lib/"
|
||||
e2cp -p "${system}:/lib/libc.so" "${dest}/lib/"
|
||||
e2cp -p "${system}:/lib/libm.so" "${dest}/lib/"
|
||||
fi
|
||||
|
||||
# clean up
|
||||
apt-get purge --auto-remove -y e2tools
|
||||
|
||||
popd
|
||||
|
||||
rm -rf "${td}"
|
||||
}
|
||||
|
||||
main "${@}"
|
|
@ -1,32 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Runs all benchmarks. Controlled by the following environment variables:
|
||||
#
|
||||
# FEATURES={} - cargo features to pass to all benchmarks (e.g. core_arch,sleef-sys,ispc)
|
||||
# NORUN={1} - only builds the benchmarks
|
||||
|
||||
set -ex
|
||||
|
||||
if [[ ${NORUN} != 1 ]]; then
|
||||
# Most benchmarks require hyperfine; require it upfront.
|
||||
hash hyperfine 2>/dev/null || { echo >&2 "hyperfine is not in PATH."; exit 1; }
|
||||
fi
|
||||
|
||||
|
||||
# If the ispc benchmark feature is enabled, ispc must be in the path of the
|
||||
# benchmarks.
|
||||
if echo "$FEATURES" | grep -q "ispc"; then
|
||||
hash ispc 2>/dev/null || { echo >&2 "ispc is not in PATH."; exit 1; }
|
||||
fi
|
||||
|
||||
# An example with a benchmark.sh is a benchmark:
|
||||
for dir in examples/*/
|
||||
do
|
||||
dir=${dir%*/}
|
||||
cd ${dir%*/}
|
||||
if [ -f "benchmark.sh" ]; then
|
||||
./benchmark.sh
|
||||
fi
|
||||
cd -
|
||||
done
|
||||
|
|
@ -1,176 +0,0 @@
|
|||
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// This is a script to deploy and execute a binary on an iOS simulator.
|
||||
// The primary use of this is to be able to run unit tests on the simulator and
|
||||
// retrieve the results.
|
||||
//
|
||||
// To do this through Cargo instead, use Dinghy
|
||||
// (https://github.com/snipsco/dinghy): cargo dinghy install, then cargo dinghy
|
||||
// test.
|
||||
|
||||
use std::env;
|
||||
use std::fs::{self, File};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::process;
|
||||
use std::process::Command;
|
||||
|
||||
macro_rules! t {
|
||||
($e:expr) => (match $e {
|
||||
Ok(e) => e,
|
||||
Err(e) => panic!("{} failed with: {}", stringify!($e), e),
|
||||
})
|
||||
}
|
||||
|
||||
// Step one: Wrap as an app
|
||||
fn package_as_simulator_app(crate_name: &str, test_binary_path: &Path) {
|
||||
println!("Packaging simulator app");
|
||||
drop(fs::remove_dir_all("ios_simulator_app"));
|
||||
t!(fs::create_dir("ios_simulator_app"));
|
||||
t!(fs::copy(test_binary_path,
|
||||
Path::new("ios_simulator_app").join(crate_name)));
|
||||
|
||||
let mut f = t!(File::create("ios_simulator_app/Info.plist"));
|
||||
t!(f.write_all(format!(r#"
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC
|
||||
"-//Apple//DTD PLIST 1.0//EN"
|
||||
"http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>CFBundleExecutable</key>
|
||||
<string>{}</string>
|
||||
<key>CFBundleIdentifier</key>
|
||||
<string>com.rust.unittests</string>
|
||||
</dict>
|
||||
</plist>
|
||||
"#, crate_name).as_bytes()));
|
||||
}
|
||||
|
||||
// Step two: Start the iOS simulator
|
||||
fn start_simulator() {
|
||||
println!("Looking for iOS simulator");
|
||||
let output = t!(Command::new("xcrun").arg("simctl").arg("list").output());
|
||||
assert!(output.status.success());
|
||||
let mut simulator_exists = false;
|
||||
let mut simulator_booted = false;
|
||||
let mut found_rust_sim = false;
|
||||
let stdout = t!(String::from_utf8(output.stdout));
|
||||
for line in stdout.lines() {
|
||||
if line.contains("rust_ios") {
|
||||
if found_rust_sim {
|
||||
panic!("Duplicate rust_ios simulators found. Please \
|
||||
double-check xcrun simctl list.");
|
||||
}
|
||||
simulator_exists = true;
|
||||
simulator_booted = line.contains("(Booted)");
|
||||
found_rust_sim = true;
|
||||
}
|
||||
}
|
||||
|
||||
if simulator_exists == false {
|
||||
println!("Creating iOS simulator");
|
||||
Command::new("xcrun")
|
||||
.arg("simctl")
|
||||
.arg("create")
|
||||
.arg("rust_ios")
|
||||
.arg("com.apple.CoreSimulator.SimDeviceType.iPhone-SE")
|
||||
.arg("com.apple.CoreSimulator.SimRuntime.iOS-10-2")
|
||||
.check_status();
|
||||
} else if simulator_booted == true {
|
||||
println!("Shutting down already-booted simulator");
|
||||
Command::new("xcrun")
|
||||
.arg("simctl")
|
||||
.arg("shutdown")
|
||||
.arg("rust_ios")
|
||||
.check_status();
|
||||
}
|
||||
|
||||
println!("Starting iOS simulator");
|
||||
// We can't uninstall the app (if present) as that will hang if the
|
||||
// simulator isn't completely booted; just erase the simulator instead.
|
||||
Command::new("xcrun").arg("simctl").arg("erase").arg("rust_ios").check_status();
|
||||
Command::new("xcrun").arg("simctl").arg("boot").arg("rust_ios").check_status();
|
||||
}
|
||||
|
||||
// Step three: Install the app
|
||||
fn install_app_to_simulator() {
|
||||
println!("Installing app to simulator");
|
||||
Command::new("xcrun")
|
||||
.arg("simctl")
|
||||
.arg("install")
|
||||
.arg("booted")
|
||||
.arg("ios_simulator_app/")
|
||||
.check_status();
|
||||
}
|
||||
|
||||
// Step four: Run the app
|
||||
fn run_app_on_simulator() {
|
||||
println!("Running app");
|
||||
let output = t!(Command::new("xcrun")
|
||||
.arg("simctl")
|
||||
.arg("launch")
|
||||
.arg("--console")
|
||||
.arg("booted")
|
||||
.arg("com.rust.unittests")
|
||||
.output());
|
||||
|
||||
println!("stdout --\n{}\n", String::from_utf8_lossy(&output.stdout));
|
||||
println!("stderr --\n{}\n", String::from_utf8_lossy(&output.stderr));
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let failed = stdout.lines()
|
||||
.find(|l| l.contains("FAILED"))
|
||||
.map(|l| l.contains("FAILED"))
|
||||
.unwrap_or(false);
|
||||
|
||||
let passed = stdout.lines()
|
||||
.find(|l| l.contains("test result: ok"))
|
||||
.map(|l| l.contains("test result: ok"))
|
||||
.unwrap_or(false);
|
||||
|
||||
println!("Shutting down simulator");
|
||||
Command::new("xcrun")
|
||||
.arg("simctl")
|
||||
.arg("shutdown")
|
||||
.arg("rust_ios")
|
||||
.check_status();
|
||||
if !(passed && !failed) {
|
||||
panic!("tests didn't pass");
|
||||
}
|
||||
}
|
||||
|
||||
trait CheckStatus {
|
||||
fn check_status(&mut self);
|
||||
}
|
||||
|
||||
impl CheckStatus for Command {
|
||||
fn check_status(&mut self) {
|
||||
println!("\trunning: {:?}", self);
|
||||
assert!(t!(self.status()).success());
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let args: Vec<String> = env::args().collect();
|
||||
if args.len() != 2 {
|
||||
println!("Usage: {} <executable>", args[0]);
|
||||
process::exit(-1);
|
||||
}
|
||||
|
||||
let test_binary_path = Path::new(&args[1]);
|
||||
let crate_name = test_binary_path.file_name().unwrap();
|
||||
|
||||
package_as_simulator_app(crate_name.to_str().unwrap(), test_binary_path);
|
||||
start_simulator();
|
||||
install_app_to_simulator();
|
||||
run_app_on_simulator();
|
||||
}
|
|
@ -1,47 +0,0 @@
|
|||
FROM ubuntu:16.04
|
||||
|
||||
RUN dpkg --add-architecture i386 && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
file \
|
||||
make \
|
||||
curl \
|
||||
ca-certificates \
|
||||
python \
|
||||
unzip \
|
||||
expect \
|
||||
openjdk-9-jre \
|
||||
libstdc++6:i386 \
|
||||
libpulse0 \
|
||||
gcc \
|
||||
libc6-dev
|
||||
|
||||
WORKDIR /android/
|
||||
COPY android* /android/
|
||||
|
||||
ENV ANDROID_ARCH=aarch64
|
||||
ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
|
||||
|
||||
RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
|
||||
RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
|
||||
RUN mv /root/.android /tmp
|
||||
RUN chmod 777 -R /tmp/.android
|
||||
RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
|
||||
|
||||
ENV PATH=$PATH:/rust/bin \
|
||||
CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER=aarch64-linux-android-gcc \
|
||||
CARGO_TARGET_AARCH64_LINUX_ANDROID_RUNNER=/tmp/runtest \
|
||||
OBJDUMP=aarch64-linux-android-objdump \
|
||||
HOME=/tmp
|
||||
|
||||
ADD runtest-android.rs /tmp/runtest.rs
|
||||
ENTRYPOINT [ \
|
||||
"bash", \
|
||||
"-c", \
|
||||
# set SHELL so android can detect a 64bits system, see
|
||||
# http://stackoverflow.com/a/41789144
|
||||
"SHELL=/bin/dash /android/sdk/emulator/emulator @aarch64 -no-window & \
|
||||
rustc /tmp/runtest.rs -o /tmp/runtest && \
|
||||
exec \"$@\"", \
|
||||
"--" \
|
||||
]
|
|
@ -1,14 +0,0 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
gcc-aarch64-linux-gnu \
|
||||
libc6-dev-arm64-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
|
||||
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
|
||||
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" \
|
||||
OBJDUMP=aarch64-linux-gnu-objdump
|
|
@ -1,15 +0,0 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
libc6-armel-cross \
|
||||
libc6-dev-armel-cross \
|
||||
binutils-arm-linux-gnueabi \
|
||||
gcc-arm-linux-gnueabi \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc \
|
||||
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER="qemu-arm -L /usr/arm-linux-gnueabi" \
|
||||
OBJDUMP=arm-linux-gnueabi-objdump
|
|
@ -1,13 +0,0 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
gcc-arm-linux-gnueabihf \
|
||||
libc6-dev-armhf-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
|
||||
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
|
||||
OBJDUMP=arm-linux-gnueabihf-objdump
|
|
@ -1,47 +0,0 @@
|
|||
FROM ubuntu:16.04
|
||||
|
||||
RUN dpkg --add-architecture i386 && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
file \
|
||||
make \
|
||||
curl \
|
||||
ca-certificates \
|
||||
python \
|
||||
unzip \
|
||||
expect \
|
||||
openjdk-9-jre \
|
||||
libstdc++6:i386 \
|
||||
libpulse0 \
|
||||
gcc \
|
||||
libc6-dev
|
||||
|
||||
WORKDIR /android/
|
||||
COPY android* /android/
|
||||
|
||||
ENV ANDROID_ARCH=arm
|
||||
ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
|
||||
|
||||
RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
|
||||
RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
|
||||
RUN mv /root/.android /tmp
|
||||
RUN chmod 777 -R /tmp/.android
|
||||
RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
|
||||
|
||||
ENV PATH=$PATH:/rust/bin \
|
||||
CARGO_TARGET_ARM_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \
|
||||
CARGO_TARGET_ARM_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \
|
||||
OBJDUMP=arm-linux-androideabi-objdump \
|
||||
HOME=/tmp
|
||||
|
||||
ADD runtest-android.rs /tmp/runtest.rs
|
||||
ENTRYPOINT [ \
|
||||
"bash", \
|
||||
"-c", \
|
||||
# set SHELL so android can detect a 64bits system, see
|
||||
# http://stackoverflow.com/a/41789144
|
||||
"SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \
|
||||
rustc /tmp/runtest.rs -o /tmp/runtest && \
|
||||
exec \"$@\"", \
|
||||
"--" \
|
||||
]
|
|
@ -1,13 +0,0 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
gcc-arm-linux-gnueabihf \
|
||||
libc6-dev-armhf-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
|
||||
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
|
||||
OBJDUMP=arm-linux-gnueabihf-objdump
|
|
@ -1,7 +0,0 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc-multilib \
|
||||
libc6-dev \
|
||||
file \
|
||||
make \
|
||||
ca-certificates
|
|
@ -1,7 +0,0 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc-multilib \
|
||||
libc6-dev \
|
||||
file \
|
||||
make \
|
||||
ca-certificates
|
|
@ -1,13 +0,0 @@
|
|||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-mips-linux-gnu libc6-dev-mips-cross \
|
||||
qemu-system-mips \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
|
||||
ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \
|
||||
CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER="qemu-mips -L /usr/mips-linux-gnu" \
|
||||
OBJDUMP=mips-linux-gnu-objdump
|
|
@ -1,10 +0,0 @@
|
|||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-mips64-linux-gnuabi64 libc6-dev-mips64-cross \
|
||||
qemu-system-mips64 qemu-user
|
||||
|
||||
ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \
|
||||
CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64 -L /usr/mips64-linux-gnuabi64" \
|
||||
OBJDUMP=mips64-linux-gnuabi64-objdump
|
|
@ -1,10 +0,0 @@
|
|||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross \
|
||||
qemu-system-mips64el
|
||||
|
||||
ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \
|
||||
CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64el -L /usr/mips64el-linux-gnuabi64" \
|
||||
OBJDUMP=mips64el-linux-gnuabi64-objdump
|
|
@ -1,25 +0,0 @@
|
|||
FROM ubuntu:18.10
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
make \
|
||||
qemu-user \
|
||||
qemu-system-mips \
|
||||
bzip2 \
|
||||
curl \
|
||||
file
|
||||
|
||||
RUN mkdir /toolchain
|
||||
|
||||
# Note that this originally came from:
|
||||
# https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2
|
||||
RUN curl -L https://ci-mirrors.rust-lang.org/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \
|
||||
tar xjf - -C /toolchain --strip-components=2
|
||||
|
||||
ENV PATH=$PATH:/rust/bin:/toolchain/bin \
|
||||
CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \
|
||||
CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \
|
||||
CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain"
|
|
@ -1,13 +0,0 @@
|
|||
FROM ubuntu:22.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \
|
||||
qemu-system-ppc \
|
||||
make \
|
||||
file
|
||||
|
||||
ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \
|
||||
CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu Vger -L /usr/powerpc-linux-gnu" \
|
||||
CC=powerpc-linux-gnu-gcc \
|
||||
OBJDUMP=powerpc-linux-gnu-objdump
|
|
@ -1,17 +0,0 @@
|
|||
FROM ubuntu:22.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
gcc-powerpc64-linux-gnu \
|
||||
libc6-dev-ppc64-cross \
|
||||
qemu-user \
|
||||
qemu-system-ppc \
|
||||
make \
|
||||
file
|
||||
|
||||
ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \
|
||||
CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64 -L /usr/powerpc64-linux-gnu" \
|
||||
CC=powerpc64-linux-gnu-gcc \
|
||||
OBJDUMP=powerpc64-linux-gnu-objdump
|
|
@ -1,11 +0,0 @@
|
|||
FROM ubuntu:22.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \
|
||||
qemu-system-ppc file make
|
||||
|
||||
ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \
|
||||
CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64le -L /usr/powerpc64le-linux-gnu" \
|
||||
CC=powerpc64le-linux-gnu-gcc \
|
||||
OBJDUMP=powerpc64le-linux-gnu-objdump
|
|
@ -1,20 +0,0 @@
|
|||
FROM ubuntu:22.04
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
curl \
|
||||
cmake \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
g++-s390x-linux-gnu \
|
||||
libc6-dev-s390x-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
|
||||
ENV CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_LINKER=s390x-linux-gnu-gcc \
|
||||
CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_RUNNER="qemu-s390x -L /usr/s390x-linux-gnu" \
|
||||
CC_s390x_unknown_linux_gnu=s390x-linux-gnu-gcc \
|
||||
CXX_s390x_unknown_linux_gnu=s390x-linux-gnu-g++ \
|
||||
OBJDUMP=s390x-linux-gnu-objdump
|
|
@ -1,18 +0,0 @@
|
|||
FROM debian:bookworm
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl ca-certificates \
|
||||
gcc libc6-dev \
|
||||
gcc-sparc64-linux-gnu libc6-dev-sparc64-cross \
|
||||
qemu-system-sparc64 openbios-sparc seabios ipxe-qemu \
|
||||
p7zip-full cpio
|
||||
|
||||
COPY linux-sparc64.sh /
|
||||
RUN bash /linux-sparc64.sh
|
||||
|
||||
COPY test-runner-linux /
|
||||
|
||||
ENV CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_LINKER=sparc64-linux-gnu-gcc \
|
||||
CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_RUNNER="/test-runner-linux sparc64" \
|
||||
CC_sparc64_unknown_linux_gnu=sparc64-linux-gnu-gcc \
|
||||
PATH=$PATH:/rust/bin
|
|
@ -1,47 +0,0 @@
|
|||
FROM ubuntu:16.04
|
||||
|
||||
RUN dpkg --add-architecture i386 && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
file \
|
||||
make \
|
||||
curl \
|
||||
ca-certificates \
|
||||
python \
|
||||
unzip \
|
||||
expect \
|
||||
openjdk-9-jre \
|
||||
libstdc++6:i386 \
|
||||
libpulse0 \
|
||||
gcc \
|
||||
libc6-dev
|
||||
|
||||
WORKDIR /android/
|
||||
COPY android* /android/
|
||||
|
||||
ENV ANDROID_ARCH=arm
|
||||
ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
|
||||
|
||||
RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
|
||||
RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
|
||||
RUN mv /root/.android /tmp
|
||||
RUN chmod 777 -R /tmp/.android
|
||||
RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
|
||||
|
||||
ENV PATH=$PATH:/rust/bin \
|
||||
CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \
|
||||
CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \
|
||||
OBJDUMP=arm-linux-androideabi-objdump \
|
||||
HOME=/tmp
|
||||
|
||||
ADD runtest-android.rs /tmp/runtest.rs
|
||||
ENTRYPOINT [ \
|
||||
"bash", \
|
||||
"-c", \
|
||||
# set SHELL so android can detect a 64bits system, see
|
||||
# http://stackoverflow.com/a/41789144
|
||||
"SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \
|
||||
rustc /tmp/runtest.rs -o /tmp/runtest && \
|
||||
exec \"$@\"", \
|
||||
"--" \
|
||||
]
|
|
@ -1,13 +0,0 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
gcc-arm-linux-gnueabihf \
|
||||
libc6-dev-armhf-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
ENV CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
|
||||
CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
|
||||
OBJDUMP=arm-linux-gnueabihf-objdump
|
|
@ -1,39 +0,0 @@
|
|||
FROM ubuntu:22.04
|
||||
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
clang \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
libc6-dev \
|
||||
make \
|
||||
ninja-build \
|
||||
python-is-python3 \
|
||||
xz-utils
|
||||
|
||||
# Install `wasm2wat`
|
||||
RUN git clone --recursive https://github.com/WebAssembly/wabt
|
||||
RUN make -C wabt -j$(nproc)
|
||||
ENV PATH=$PATH:/wabt/bin
|
||||
|
||||
# Install `wasm-bindgen-test-runner`
|
||||
RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.87/wasm-bindgen-0.2.87-x86_64-unknown-linux-musl.tar.gz \
|
||||
| tar xzf -
|
||||
# Keep in sync with the version on Cargo.toml.
|
||||
ENV PATH=$PATH:/wasm-bindgen-0.2.87-x86_64-unknown-linux-musl
|
||||
ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner
|
||||
|
||||
# Install `node`
|
||||
RUN curl https://nodejs.org/dist/v14.16.0/node-v14.16.0-linux-x64.tar.xz | tar xJf -
|
||||
ENV PATH=$PATH:/node-v14.16.0-linux-x64/bin
|
||||
|
||||
# We use a shim linker that removes `--strip-debug` when passed to LLD. While
|
||||
# this typically results in invalid debug information in release mode it doesn't
|
||||
# result in an invalid names section which is what we're interested in.
|
||||
COPY lld-shim.rs /
|
||||
ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_LINKER=/tmp/lld-shim
|
||||
|
||||
# Rustc isn't available until this container starts, so defer compilation of the
|
||||
# shim.
|
||||
ENTRYPOINT /rust/bin/rustc /lld-shim.rs -o /tmp/lld-shim && exec bash "$@"
|
|
@ -1,31 +0,0 @@
|
|||
FROM ubuntu:20.04
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
curl \
|
||||
gcc \
|
||||
libc-dev \
|
||||
python \
|
||||
unzip \
|
||||
file \
|
||||
make
|
||||
|
||||
WORKDIR /android/
|
||||
ENV ANDROID_ARCH=x86_64
|
||||
COPY android-install-ndk.sh /android/
|
||||
RUN sh /android/android-install-ndk.sh
|
||||
|
||||
ENV STDARCH_ASSERT_INSTR_LIMIT=30
|
||||
|
||||
# We do not run x86_64-linux-android tests on an android emulator.
|
||||
# See ci/android-sysimage.sh for informations about how tests are run.
|
||||
COPY android-sysimage.sh /android/
|
||||
RUN bash /android/android-sysimage.sh x86_64 x86_64-24_r07.zip
|
||||
|
||||
ENV PATH=$PATH:/rust/bin:/android/ndk/toolchains/llvm/prebuilt/linux-x86_64/bin \
|
||||
CARGO_TARGET_X86_64_LINUX_ANDROID_LINKER=x86_64-linux-android21-clang \
|
||||
CC_x86_64_linux_android=x86_64-linux-android21-clang \
|
||||
CXX_x86_64_linux_android=x86_64-linux-android21-clang++ \
|
||||
OBJDUMP=llvm-objdump \
|
||||
HOME=/tmp
|
|
@ -1,16 +0,0 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
file \
|
||||
make \
|
||||
ca-certificates \
|
||||
wget \
|
||||
bzip2 \
|
||||
cmake \
|
||||
libclang-dev \
|
||||
clang
|
||||
|
||||
RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.16.0-2018-01-30-lin.tar.bz2
|
||||
RUN tar -xjf sde-external-8.16.0-2018-01-30-lin.tar.bz2
|
||||
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.16.0-2018-01-30-lin/sde64 --"
|
|
@ -1,10 +0,0 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
file \
|
||||
make \
|
||||
ca-certificates \
|
||||
cmake \
|
||||
libclang-dev \
|
||||
clang
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -ex
|
||||
|
||||
rm -rf target/doc
|
||||
mkdir -p target/doc
|
||||
|
||||
# Build API documentation
|
||||
cargo doc --features=into_bits
|
||||
|
||||
# Build Performance Guide
|
||||
# FIXME: https://github.com/rust-lang-nursery/mdBook/issues/780
|
||||
# mdbook build perf-guide -d target/doc/perf-guide
|
||||
cd perf-guide
|
||||
mdbook build
|
||||
cd -
|
||||
cp -r perf-guide/book target/doc/perf-guide
|
||||
|
||||
# If we're on travis, not a PR, and on the right branch, publish!
|
||||
if [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$TRAVIS_BRANCH" = "master" ]; then
|
||||
python3 -vV
|
||||
pip -vV
|
||||
python3.9 -vV
|
||||
pip install ghp_import --user
|
||||
ghp-import -n target/doc
|
||||
git push -qf https://${GH_PAGES}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
|
||||
fi
|
|
@ -1,18 +0,0 @@
|
|||
set -ex
|
||||
|
||||
mkdir -m 777 /qemu
|
||||
cd /qemu
|
||||
|
||||
curl -LO https://github.com/qemu/qemu/raw/master/pc-bios/s390-ccw.img
|
||||
curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/kernel.debian
|
||||
curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/initrd.debian
|
||||
|
||||
mv kernel.debian kernel
|
||||
mv initrd.debian initrd.gz
|
||||
|
||||
mkdir init
|
||||
cd init
|
||||
gunzip -c ../initrd.gz | cpio -id
|
||||
rm ../initrd.gz
|
||||
cp /usr/s390x-linux-gnu/lib/libgcc_s.so.1 usr/lib/
|
||||
chmod a+w .
|
|
@ -1,17 +0,0 @@
|
|||
set -ex
|
||||
|
||||
mkdir -m 777 /qemu
|
||||
cd /qemu
|
||||
|
||||
curl -LO https://cdimage.debian.org/cdimage/ports/9.0/sparc64/iso-cd/debian-9.0-sparc64-NETINST-1.iso
|
||||
7z e debian-9.0-sparc64-NETINST-1.iso boot/initrd.gz
|
||||
7z e debian-9.0-sparc64-NETINST-1.iso boot/sparc64
|
||||
mv sparc64 kernel
|
||||
rm debian-9.0-sparc64-NETINST-1.iso
|
||||
|
||||
mkdir init
|
||||
cd init
|
||||
gunzip -c ../initrd.gz | cpio -id
|
||||
rm ../initrd.gz
|
||||
cp /usr/sparc64-linux-gnu/lib/libgcc_s.so.1 usr/lib/
|
||||
chmod a+w .
|
|
@ -1,11 +0,0 @@
|
|||
use std::os::unix::prelude::*;
|
||||
use std::process::Command;
|
||||
use std::env;
|
||||
|
||||
fn main() {
|
||||
let args = env::args()
|
||||
.skip(1)
|
||||
.filter(|s| s != "--strip-debug")
|
||||
.collect::<Vec<_>>();
|
||||
panic!("failed to exec: {}", Command::new("rust-lld").args(&args).exec());
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
set -x
|
||||
|
||||
export success=true
|
||||
|
||||
find . -iname '*.rs' | while read -r file; do
|
||||
result=$(grep '.\{79\}' "${file}" | grep --invert 'http')
|
||||
if [ "${result}" = "" ]
|
||||
then
|
||||
:
|
||||
else
|
||||
echo "file \"${file}\": $result"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
# Small script to run tests for a target (or all targets) inside all the
|
||||
# respective docker images.
|
||||
|
||||
set -ex
|
||||
|
||||
run() {
|
||||
echo "Building docker container for TARGET=${TARGET} RUSTFLAGS=${RUSTFLAGS}"
|
||||
docker build -t packed_simd -f ci/docker/${TARGET}/Dockerfile ci/
|
||||
mkdir -p target
|
||||
target=$(echo "${TARGET}" | sed 's/-emulated//')
|
||||
echo "Running docker"
|
||||
docker run \
|
||||
--user `id -u`:`id -g` \
|
||||
--rm \
|
||||
--init \
|
||||
--volume $HOME/.cargo:/cargo \
|
||||
--env CARGO_HOME=/cargo \
|
||||
--volume `rustc --print sysroot`:/rust:ro \
|
||||
--env TARGET=$target \
|
||||
--env NORUN \
|
||||
--env NOVERIFY \
|
||||
--env RUSTFLAGS \
|
||||
--volume `pwd`:/checkout:ro \
|
||||
--volume `pwd`/target:/checkout/target \
|
||||
--workdir /checkout \
|
||||
--privileged \
|
||||
packed_simd \
|
||||
bash \
|
||||
-c 'PATH=$PATH:/rust/bin exec ci/run.sh'
|
||||
}
|
||||
|
||||
if [ -z "${TARGET}" ]; then
|
||||
for d in `ls ci/docker/`; do
|
||||
run $d
|
||||
done
|
||||
else
|
||||
run ${TARGET}
|
||||
fi
|
|
@ -1,99 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
: ${TARGET?"The TARGET environment variable must be set."}
|
||||
|
||||
# Tests are all super fast anyway, and they fault often enough on travis that
|
||||
# having only one thread increases debuggability to be worth it.
|
||||
#export RUST_TEST_THREADS=1
|
||||
#export RUST_BACKTRACE=full
|
||||
#export RUST_TEST_NOCAPTURE=1
|
||||
|
||||
# Some appveyor builds run out-of-memory; this attempts to mitigate that:
|
||||
# https://github.com/rust-lang-nursery/packed_simd/issues/39
|
||||
# export RUSTFLAGS="${RUSTFLAGS} -C codegen-units=1"
|
||||
# export CARGO_BUILD_JOBS=1
|
||||
|
||||
export CARGO_SUBCMD=test
|
||||
if [[ "${NORUN}" == "1" ]]; then
|
||||
export CARGO_SUBCMD=build
|
||||
fi
|
||||
|
||||
if [[ ${TARGET} == "x86_64-apple-ios" ]] || [[ ${TARGET} == "i386-apple-ios" ]]; then
|
||||
export RUSTFLAGS="${RUSTFLAGS} -Clink-arg=-mios-simulator-version-min=7.0"
|
||||
rustc ./ci/deploy_and_run_on_ios_simulator.rs -o $HOME/runtest
|
||||
export CARGO_TARGET_X86_64_APPLE_IOS_RUNNER=$HOME/runtest
|
||||
export CARGO_TARGET_I386_APPLE_IOS_RUNNER=$HOME/runtest
|
||||
fi
|
||||
|
||||
# The source directory is read-only. Need to copy internal crates to the target
|
||||
# directory for their Cargo.lock to be properly written.
|
||||
mkdir target || true
|
||||
|
||||
rustc --version
|
||||
cargo --version
|
||||
echo "TARGET=${TARGET}"
|
||||
echo "HOST=${HOST}"
|
||||
echo "RUSTFLAGS=${RUSTFLAGS}"
|
||||
echo "NORUN=${NORUN}"
|
||||
echo "NOVERIFY=${NOVERIFY}"
|
||||
echo "CARGO_SUBCMD=${CARGO_SUBCMD}"
|
||||
echo "CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS}"
|
||||
echo "CARGO_INCREMENTAL=${CARGO_INCREMENTAL}"
|
||||
echo "RUST_TEST_THREADS=${RUST_TEST_THREADS}"
|
||||
echo "RUST_BACKTRACE=${RUST_BACKTRACE}"
|
||||
echo "RUST_TEST_NOCAPTURE=${RUST_TEST_NOCAPTURE}"
|
||||
|
||||
cargo_test() {
|
||||
cmd="cargo ${CARGO_SUBCMD} --verbose --target=${TARGET} ${@}"
|
||||
if [ "${NORUN}" != "1" ]
|
||||
then
|
||||
if [ "$TARGET" != "wasm32-unknown-unknown" ]
|
||||
then
|
||||
cmd="$cmd -- --quiet"
|
||||
fi
|
||||
fi
|
||||
mkdir target || true
|
||||
${cmd} 2>&1 | tee > target/output
|
||||
if [[ ${PIPESTATUS[0]} != 0 ]]; then
|
||||
cat target/output
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
cargo_test_impl() {
|
||||
ORIGINAL_RUSTFLAGS=${RUSTFLAGS}
|
||||
RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v16 --cfg test_v32 --cfg test_v64" cargo_test ${@}
|
||||
RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v128 --cfg test_v256" cargo_test ${@}
|
||||
RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v512" cargo_test ${@}
|
||||
RUSTFLAGS=${ORIGINAL_RUSTFLAGS}
|
||||
}
|
||||
|
||||
# Debug run:
|
||||
if [[ "${TARGET}" != "wasm32-unknown-unknown" ]]; then
|
||||
# Run wasm32-unknown-unknown in release mode only
|
||||
cargo_test_impl
|
||||
fi
|
||||
|
||||
if [[ "${TARGET}" == "x86_64-unknown-linux-gnu" ]] || [[ "${TARGET}" == "x86_64-pc-windows-msvc" ]]; then
|
||||
# use sleef on linux and windows x86_64 builds
|
||||
# FIXME: Use `core_arch,sleef-sys` features once they works again
|
||||
cargo_test_impl --release --features=into_bits
|
||||
else
|
||||
# FIXME: Use `core_arch` feature once it works again
|
||||
cargo_test_impl --release --features=into_bits
|
||||
fi
|
||||
|
||||
# Verify code generation
|
||||
if [[ "${NOVERIFY}" != "1" ]]; then
|
||||
cp -r verify/verify target/verify
|
||||
export STDSIMD_ASSERT_INSTR_LIMIT=30
|
||||
if [[ "${TARGET}" == "i586-unknown-linux-gnu" ]]; then
|
||||
export STDSIMD_ASSERT_INSTR_LIMIT=50
|
||||
fi
|
||||
cargo_test --release --manifest-path=target/verify/Cargo.toml
|
||||
fi
|
||||
|
||||
# FIXME: Figure out which examples take too long to run and ignore or adjust those
|
||||
#. ci/run_examples.sh
|
|
@ -1,51 +0,0 @@
|
|||
# Runs all examples.
|
||||
|
||||
# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/55
|
||||
# All examples fail to build for `armv7-apple-ios`.
|
||||
if [[ ${TARGET} == "armv7-apple-ios" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# FIXME: travis exceeds 50 minutes on these targets
|
||||
# Skipping the examples is an attempt at preventing travis from timing-out
|
||||
if [[ ${TARGET} == "arm-linux-androidabi" ]] || [[ ${TARGET} == "aarch64-linux-androidabi" ]] \
|
||||
|| [[ ${TARGET} == "sparc64-unknown-linux-gnu" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ ${TARGET} == "wasm32-unknown-unknown" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
cp -r examples/aobench target/aobench
|
||||
cargo_test --manifest-path=target/aobench/Cargo.toml --release --no-default-features
|
||||
cargo_test --manifest-path=target/aobench/Cargo.toml --release --features=256bit
|
||||
|
||||
cp -r examples/dot_product target/dot_product
|
||||
cargo_test --manifest-path=target/dot_product/Cargo.toml --release
|
||||
|
||||
cp -r examples/fannkuch_redux target/fannkuch_redux
|
||||
cargo_test --manifest-path=target/fannkuch_redux/Cargo.toml --release
|
||||
|
||||
# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/56
|
||||
if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then
|
||||
cp -r examples/mandelbrot target/mandelbrot
|
||||
cargo_test --manifest-path=target/mandelbrot/Cargo.toml --release
|
||||
fi
|
||||
|
||||
cp -r examples/matrix_inverse target/matrix_inverse
|
||||
cargo_test --manifest-path=target/matrix_inverse/Cargo.toml --release
|
||||
|
||||
cp -r examples/nbody target/nbody
|
||||
cargo_test --manifest-path=target/nbody/Cargo.toml --release
|
||||
|
||||
cp -r examples/spectral_norm target/spectral_norm
|
||||
cargo_test --manifest-path=target/spectral_norm/Cargo.toml --release
|
||||
|
||||
if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then
|
||||
cp -r examples/stencil target/stencil
|
||||
cargo_test --manifest-path=target/stencil/Cargo.toml --release
|
||||
fi
|
||||
|
||||
cp -r examples/triangle_xform target/triangle_xform
|
||||
cargo_test --manifest-path=target/triangle_xform/Cargo.toml --release
|
|
@ -1,45 +0,0 @@
|
|||
use std::env;
|
||||
use std::process::Command;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
fn main() {
|
||||
let args = env::args_os()
|
||||
.skip(1)
|
||||
.filter(|arg| arg != "--quiet")
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(args.len(), 1);
|
||||
let test = PathBuf::from(&args[0]);
|
||||
let dst = Path::new("/data/local/tmp").join(test.file_name().unwrap());
|
||||
|
||||
let status = Command::new("adb")
|
||||
.arg("wait-for-device")
|
||||
.status()
|
||||
.expect("failed to run: adb wait-for-device");
|
||||
assert!(status.success());
|
||||
|
||||
let status = Command::new("adb")
|
||||
.arg("push")
|
||||
.arg(&test)
|
||||
.arg(&dst)
|
||||
.status()
|
||||
.expect("failed to run: adb pushr");
|
||||
assert!(status.success());
|
||||
|
||||
let output = Command::new("adb")
|
||||
.arg("shell")
|
||||
.arg(&dst)
|
||||
.output()
|
||||
.expect("failed to run: adb shell");
|
||||
assert!(status.success());
|
||||
|
||||
println!("status: {}\nstdout ---\n{}\nstderr ---\n{}",
|
||||
output.status,
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr));
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let mut lines = stdout.lines().filter(|l| l.starts_with("test result"));
|
||||
if !lines.all(|l| l.contains("test result: ok") && l.contains("0 failed")) {
|
||||
panic!("failed to find successful test run");
|
||||
}
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
# Get latest ISPC binary for the target and put it in the path
|
||||
git clone https://github.com/gnzlbg/ispc-binaries
|
||||
cp ispc-binaries/ispc-${TARGET} ispc
|
|
@ -1,24 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
arch=$1
|
||||
prog=$2
|
||||
|
||||
cd /qemu/init
|
||||
cp -f $2 prog
|
||||
find . | cpio --create --format='newc' --quiet | gzip > ../initrd.gz
|
||||
cd ..
|
||||
|
||||
timeout 30s qemu-system-$arch \
|
||||
-m 1024 \
|
||||
-nographic \
|
||||
-kernel kernel \
|
||||
-initrd initrd.gz \
|
||||
-append init=/prog > output || true
|
||||
|
||||
# remove kernel messages
|
||||
tr -d '\r' < output | egrep -v '^\['
|
||||
|
||||
# if the output contains a failure, return error
|
||||
! grep FAILED output > /dev/null
|
|
@ -1,67 +0,0 @@
|
|||
# Contributing to `packed_simd`
|
||||
|
||||
Welcome! If you are reading this document, it means you are interested in contributing
|
||||
to the `packed_simd` crate.
|
||||
|
||||
## Reporting issues
|
||||
|
||||
All issues with this crate are tracked using GitHub's [Issue Tracker].
|
||||
|
||||
You can use issues to bring bugs to the attention of the maintainers, to discuss
|
||||
certain problems encountered with the crate, or to request new features (although
|
||||
feature requests should be limited to things mentioned in the [RFC]).
|
||||
|
||||
One thing to keep in mind is to always use the **latest** nightly toolchain when
|
||||
working on this crate. Due to the nature of this project, we use a lot of unstable
|
||||
features, meaning breakage happens often.
|
||||
|
||||
[Issue Tracker]: https://github.com/rust-lang-nursery/packed_simd/issues
|
||||
[RFC]: https://github.com/rust-lang/rfcs/pull/2366
|
||||
|
||||
### LLVM issues
|
||||
|
||||
The Rust compiler relies on [LLVM](https://llvm.org/) for machine code generation,
|
||||
and quite a few LLVM bugs have been discovered during the development of this project.
|
||||
|
||||
If you encounter issues with incorrect/suboptimal codegen, which you do not encounter
|
||||
when using the [SIMD vendor intrinsics](https://doc.rust-lang.org/nightly/std/arch/),
|
||||
it is likely the issue is with LLVM, or this crate's interaction with it.
|
||||
|
||||
You should first open an issue **in this repo** to help us track the problem, and we
|
||||
will help determine what is the exact cause of the problem.
|
||||
If LLVM is indeed the cause, the issue will be reported upstream to the
|
||||
[LLVM bugtracker](https://bugs.llvm.org/).
|
||||
|
||||
## Submitting Pull Requests
|
||||
|
||||
New code is submitted to the crate using GitHub's [pull request] mechanism.
|
||||
You should first fork this repository, make your changes (preferably in a new
|
||||
branch), then use GitHub's web UI to create a new PR.
|
||||
|
||||
[pull request]: https://help.github.com/articles/about-pull-requests/
|
||||
|
||||
### Examples
|
||||
|
||||
The `examples` directory contains code showcasing SIMD code written with this crate,
|
||||
usually in comparison to scalar or ISPC code. If you have a project / idea which
|
||||
uses SIMD, we'd love to add it to the examples list.
|
||||
|
||||
Every example should include a small `README`, describing the example code's purpose.
|
||||
If your example could potentially work as a benchmark, then add a `benchmark.sh`
|
||||
script to allow running the example benchmark code in CI. See an existing example's
|
||||
[`benchmark.sh`](examples/aobench/benchmark.sh) for a sample.
|
||||
|
||||
Don't forget to update the crate's top-level `README` with a link to your example.
|
||||
|
||||
### Perf guide
|
||||
|
||||
The objective of the [performance guide][perf-guide] is to be a comprehensive
|
||||
resource detailing the process of optimizing Rust code with SIMD support.
|
||||
|
||||
If you believe a certain section could be reworded, or if you have any tips & tricks
|
||||
related to SIMD which you'd like to share, please open a PR.
|
||||
|
||||
[mdBook] is used to manage the formatting of the guide as a book.
|
||||
|
||||
[perf-guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/
|
||||
[mdBook]: https://github.com/rust-lang-nursery/mdBook
|
|
@ -1,12 +0,0 @@
|
|||
[book]
|
||||
authors = ["Gonzalo Brito Gadeschi", "Gabriel Majeri"]
|
||||
multilingual = false
|
||||
src = "src"
|
||||
title = "Rust SIMD Performance Guide"
|
||||
description = "This book describes how to write performant SIMD code in Rust."
|
||||
|
||||
[build]
|
||||
create-missing = false
|
||||
|
||||
[output.html]
|
||||
additional-css = ["./src/ascii.css"]
|
|
@ -1,21 +0,0 @@
|
|||
# Summary
|
||||
|
||||
[Introduction](./introduction.md)
|
||||
|
||||
- [Floating-point Math](./float-math/fp.md)
|
||||
- [Short-vector Math Library](./float-math/svml.md)
|
||||
- [Approximate functions](./float-math/approx.md)
|
||||
- [Fused multiply-accumulate](./float-math/fma.md)
|
||||
|
||||
- [Target features](./target-feature/features.md)
|
||||
- [Using `RUSTFLAGS`](./target-feature/rustflags.md)
|
||||
- [Using the `target_feature` attribute](./target-feature/attribute.md)
|
||||
- [Interaction with inlining](./target-feature/inlining.md)
|
||||
- [Detecting features at runtime](./target-feature/runtime.md)
|
||||
|
||||
- [Bounds checking](./bound_checks.md)
|
||||
- [Vertical and horizontal operations](./vert-hor-ops.md)
|
||||
|
||||
- [Performance profiling](./prof/profiling.md)
|
||||
- [Profiling on Linux](./prof/linux.md)
|
||||
- [Using machine code analyzers](./prof/mca.md)
|
|
@ -1,4 +0,0 @@
|
|||
code {
|
||||
/* "Source Code Pro" breaks ASCII art */
|
||||
font-family: Consolas, "Ubuntu Mono", Menlo, "DejaVu Sans Mono", monospace;
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
# Bounds checking
|
||||
|
||||
Reading and writing packed vectors to/from slices is checked by default.
|
||||
Independently of the configuration options used, the safe functions:
|
||||
|
||||
* `Simd<[T; N]>::from_slice_aligned(& s[..])`
|
||||
* `Simd<[T; N]>::write_to_slice_aligned(&mut s[..])`
|
||||
|
||||
always check that:
|
||||
|
||||
* the slice is big enough to hold the vector
|
||||
* the slice is suitably aligned to perform an aligned load/store for a `Simd<[T;
|
||||
N]>` (this alignment is often much larger than that of `T`).
|
||||
|
||||
There are `_unaligned` versions that use unaligned load and stores, as well as
|
||||
`unsafe` `_unchecked` that do not perform any checks iff `debug-assertions =
|
||||
false` / `debug = false`. That is, the `_unchecked` methods do still assert size
|
||||
and alignment in debug builds and could also do so in release builds depending
|
||||
on the configuration options.
|
||||
|
||||
These assertions do often significantly impact performance and you should be
|
||||
aware of them.
|
|
@ -1,8 +0,0 @@
|
|||
# Approximate functions
|
||||
|
||||
<!-- TODO:
|
||||
|
||||
Explain that they exists, that they are often _much_ faster, how to use them,
|
||||
that people should check whether the error is good enough for their
|
||||
applications. Explain that this error is currently unstable and might change.
|
||||
-->
|
|
@ -1,6 +0,0 @@
|
|||
# Fused Multiply Add
|
||||
|
||||
<!-- TODO:
|
||||
Explain that this is a compound operation, infinite precision, difference
|
||||
between `mul_add` and `mul_adde`, that LLVM cannot do this by itself, etc.
|
||||
-->
|
|
@ -1,3 +0,0 @@
|
|||
# Floating-point math
|
||||
|
||||
This chapter contains information pertaining to working with floating-point numbers.
|
|
@ -1,7 +0,0 @@
|
|||
# Short Vector Math Library
|
||||
|
||||
<!-- TODO:
|
||||
Explain how is short-vector math performed by default (just scalarized libm calls).
|
||||
|
||||
Explain how to enable `sleef`, etc.
|
||||
-->
|
|
@ -1,26 +0,0 @@
|
|||
# Introduction
|
||||
|
||||
## What is SIMD
|
||||
|
||||
<!-- TODO:
|
||||
describe what SIMD is, which algorithms can benefit from it,
|
||||
give usage examples
|
||||
-->
|
||||
|
||||
## History of SIMD in Rust
|
||||
|
||||
<!-- TODO:
|
||||
discuss history of unstable std::simd,
|
||||
stabilization of std::arch, etc.
|
||||
-->
|
||||
|
||||
## Discover packed_simd
|
||||
|
||||
<!-- TODO: describe scope of this project -->
|
||||
|
||||
Writing fast and portable SIMD algorithms using `packed_simd` is, unfortunately,
|
||||
not trivial. There are many pitfals that one should be aware of, and some idioms
|
||||
that help avoid those pitfalls.
|
||||
|
||||
This book attempts to document these best practices and provides practical examples
|
||||
on how to apply the tips to _your_ code.
|
|
@ -1,107 +0,0 @@
|
|||
# Performance profiling on Linux
|
||||
|
||||
## Using `perf`
|
||||
|
||||
[perf](https://perf.wiki.kernel.org/) is the most powerful performance profiler
|
||||
for Linux, featuring support for various hardware Performance Monitoring Units,
|
||||
as well as integration with the kernel's performance events framework.
|
||||
|
||||
We will only look at how can the `perf` command can be used to profile SIMD code.
|
||||
Full system profiling is outside of the scope of this book.
|
||||
|
||||
### Recording
|
||||
|
||||
The first step is to record a program's execution during an average workload.
|
||||
It helps if you can isolate the parts of your program which have performance
|
||||
issues, and set up a benchmark which can be easily (re)run.
|
||||
|
||||
Build the benchmark binary in release mode, after having enabled debug info:
|
||||
|
||||
```sh
|
||||
$ cargo build --release
|
||||
Finished release [optimized + debuginfo] target(s) in 0.02s
|
||||
```
|
||||
|
||||
Then use the `perf record` subcommand:
|
||||
|
||||
```sh
|
||||
$ perf record --call-graph=dwarf ./target/release/my-program
|
||||
[ perf record: Woken up 10 times to write data ]
|
||||
[ perf record: Captured and wrote 2,356 MB perf.data (292 samples) ]
|
||||
```
|
||||
|
||||
Instead of using `--call-graph=dwarf`, which can become pretty slow, you can use
|
||||
`--call-graph=lbr` if you have a processor with support for Last Branch Record
|
||||
(i.e. Intel Haswell and newer).
|
||||
|
||||
`perf` will, by default, record the count of CPU cycles it takes to execute
|
||||
various parts of your program. You can use the `-e` command line option
|
||||
to enable other performance events, such as `cache-misses`. Use `perf list`
|
||||
to get a list of all hardware counters supported by your CPU.
|
||||
|
||||
### Viewing the report
|
||||
|
||||
The next step is getting a bird's eye view of the program's execution.
|
||||
`perf` provides a `ncurses`-based interface which will get you started.
|
||||
|
||||
Use `perf report` to open a visualization of your program's performance:
|
||||
|
||||
```sh
|
||||
perf report --hierarchy -M intel
|
||||
```
|
||||
|
||||
`--hierarchy` will display a tree-like structure of where your program spent
|
||||
most of its time. `-M intel` enables disassembly output with Intel syntax, which
|
||||
is subjectively more readable than the default AT&T syntax.
|
||||
|
||||
Here is the output from profiling the `nbody` benchmark:
|
||||
|
||||
```
|
||||
- 100,00% nbody
|
||||
- 94,18% nbody
|
||||
+ 93,48% [.] nbody_lib::simd::advance
|
||||
+ 0,70% [.] nbody_lib::run
|
||||
+ 5,06% libc-2.28.so
|
||||
```
|
||||
|
||||
If you move with the arrow keys to any node in the tree, you can the press `a`
|
||||
to have `perf` _annotate_ that node. This means it will:
|
||||
|
||||
- disassemble the function
|
||||
|
||||
- associate every instruction with the percentage of time which was spent executing it
|
||||
|
||||
- interleaves the disassembly with the source code,
|
||||
assuming it found the debug symbols
|
||||
(you can use `s` to toggle this behaviour)
|
||||
|
||||
`perf` will, by default, open the instruction which it identified as being the
|
||||
hottest spot in the function:
|
||||
|
||||
```
|
||||
0,76 │ movapd xmm2,xmm0
|
||||
0,38 │ movhlps xmm2,xmm0
|
||||
│ addpd xmm2,xmm0
|
||||
│ unpcklpd xmm1,xmm2
|
||||
12,50 │ sqrtpd xmm0,xmm1
|
||||
1,52 │ mulpd xmm0,xmm1
|
||||
```
|
||||
|
||||
In this case, `sqrtpd` will be highlighted in red, since that's the instruction
|
||||
which the CPU spends most of its time executing.
|
||||
|
||||
## Using Valgrind
|
||||
|
||||
Valgrind is a set of tools which initially helped C/C++ programmers find unsafe
|
||||
memory accesses in their code. Nowadays the project also has
|
||||
|
||||
- a heap profiler called `massif`
|
||||
|
||||
- a cache utilization profiler called `cachegrind`
|
||||
|
||||
- a call-graph performance profiler called `callgrind`
|
||||
|
||||
<!--
|
||||
TODO: explain valgrind's dynamic binary translation, warn about massive
|
||||
slowdown, talk about `kcachegrind` for a GUI
|
||||
-->
|
|
@ -1,100 +0,0 @@
|
|||
# Machine code analysis tools
|
||||
|
||||
## The microarchitecture of modern CPUs
|
||||
|
||||
While you might have heard of Instruction Set Architectures, such as `x86` or
|
||||
`arm` or `mips`, the term _microarchitecture_ (also written here as _µ-arch_),
|
||||
refers to the internal details of an actual family of CPUs, such as Intel's
|
||||
_Haswell_ or AMD's _Jaguar_.
|
||||
|
||||
Replacing scalar code with SIMD code will improve performance on all CPUs
|
||||
supporting the required vector extensions.
|
||||
However, due to microarchitectural differences, the actual speed-up at
|
||||
runtime might vary.
|
||||
|
||||
**Example**: a simple example arises when optimizing for AMD K8 CPUs.
|
||||
The assembly generated for an empty function should look like this:
|
||||
|
||||
```asm
|
||||
nop
|
||||
ret
|
||||
```
|
||||
|
||||
The `nop` is used to align the `ret` instruction for better performance.
|
||||
However, the compiler will actually generated the following code:
|
||||
|
||||
```asm
|
||||
repz ret
|
||||
```
|
||||
|
||||
The `repz` instruction will repeat the following instruction until a certain
|
||||
condition. Of course, in this situation, the function will simply immediately
|
||||
return, and the `ret` instruction is still aligned.
|
||||
However, AMD K8's branch predictor performs better with the latter code.
|
||||
|
||||
For those looking to absolutely maximize performance for a certain target µ-arch,
|
||||
you will have to read some CPU manuals, or ask the compiler to do it for you
|
||||
with `-C target-cpu`.
|
||||
|
||||
### Summary of CPU internals
|
||||
|
||||
Modern processors are able to execute instructions out-of-order for better performance,
|
||||
by utilizing tricks such as [branch prediction], [instruction pipelining],
|
||||
or [superscalar execution].
|
||||
|
||||
[branch prediction]: https://en.wikipedia.org/wiki/Branch_predictor
|
||||
[instruction pipelining]: https://en.wikipedia.org/wiki/Instruction_pipelining
|
||||
[superscalar execution]: https://en.wikipedia.org/wiki/Superscalar_processor
|
||||
|
||||
SIMD instructions are also subject to these optimizations, meaning it can get pretty
|
||||
difficult to determine where the slowdown happens.
|
||||
For example, if the profiler reports a store operation is slow, one of two things
|
||||
could be happening:
|
||||
|
||||
- the store is limited by the CPU's memory bandwidth, which is actually an ideal
|
||||
scenario, all things considered;
|
||||
|
||||
- memory bandwidth is nowhere near its peak, but the value to be stored is at the
|
||||
end of a long chain of operations, and this store is where the profiler
|
||||
encountered the pipeline stall;
|
||||
|
||||
Since most profilers are simple tools which don't understand the subtleties of
|
||||
instruction scheduling, you
|
||||
|
||||
## Analyzing the machine code
|
||||
|
||||
Certain tools have knowledge of internal CPU microarchitecture, i.e. they know
|
||||
|
||||
- how many physical [register files] a CPU actually has
|
||||
|
||||
- what is the latency / throughtput of an instruction
|
||||
|
||||
- what [µ-ops] are generated for a set of instructions
|
||||
|
||||
and many other architectural details.
|
||||
|
||||
[register files]: https://en.wikipedia.org/wiki/Register_file
|
||||
[µ-ops]: https://en.wikipedia.org/wiki/Micro-operation
|
||||
|
||||
These tools are therefore able to provide accurate information as to why some
|
||||
instructions are inefficient, and where the bottleneck is.
|
||||
|
||||
The disadvantage is that the output of these tools requires advanced knowledge
|
||||
of the target architecture to understand, i.e. they **cannot** point out what
|
||||
the cause of the issue is explicitly.
|
||||
|
||||
## Intel's Architecture Code Analyzer (IACA)
|
||||
|
||||
[IACA] is a free tool offered by Intel for analyzing the performance of various
|
||||
computational kernels.
|
||||
|
||||
Being a proprietary, closed source tool, it _only_ supports Intel's µ-arches.
|
||||
|
||||
[IACA]: https://software.intel.com/en-us/articles/intel-architecture-code-analyzer
|
||||
|
||||
## llvm-mca
|
||||
|
||||
<!--
|
||||
TODO: once LLVM 7 gets released, write a chapter on using llvm-mca
|
||||
with SIMD disassembly.
|
||||
-->
|
|
@ -1,14 +0,0 @@
|
|||
# Performance profiling
|
||||
|
||||
While the rest of the book provides practical advice on how to improve the performance
|
||||
of SIMD code, this chapter is dedicated to [**performance profiling**][profiling].
|
||||
Profiling consists of recording a program's execution in order to identify program
|
||||
hotspots.
|
||||
|
||||
**Important**: most profilers require debug information in order to accurately
|
||||
link the program hotspots back to the corresponding source code lines. Rust will
|
||||
disable debug info generation by default for optimized builds, but you can change
|
||||
that [in your `Cargo.toml`][cargo-ref].
|
||||
|
||||
[profiling]: https://en.wikipedia.org/wiki/Profiling_(computer_programming)
|
||||
[cargo-ref]: https://doc.rust-lang.org/cargo/reference/manifest.html#the-profile-sections
|
|
@ -1,5 +0,0 @@
|
|||
# The `target_feature` attribute
|
||||
|
||||
<!-- TODO:
|
||||
Explain the `#[target_feature]` attribute
|
||||
-->
|
|
@ -1,13 +0,0 @@
|
|||
# Enabling target features
|
||||
|
||||
Not all processors of a certain architecture will have SIMD processing units,
|
||||
and using a SIMD instruction which is not supported will trigger undefined behavior.
|
||||
|
||||
To allow building safe, portable programs, the Rust compiler will **not**, by default,
|
||||
generate any sort of vector instructions, unless it can statically determine
|
||||
they are supported. For example, on AMD64, SSE2 support is architecturally guaranteed.
|
||||
The `x86_64-apple-darwin` target enables up to SSSE3. The get a defintive list of
|
||||
which features are enabled by default on various platforms, refer to the target
|
||||
specifications [in the compiler's source code][targets].
|
||||
|
||||
[targets]: https://github.com/rust-lang/rust/tree/master/src/librustc_target/spec
|
|
@ -1,5 +0,0 @@
|
|||
# Inlining
|
||||
|
||||
<!-- TODO:
|
||||
Explain how the `#[target_feature]` attribute interacts with inlining
|
||||
-->
|
|
@ -1,31 +0,0 @@
|
|||
# Target features in practice
|
||||
|
||||
Using `RUSTFLAGS` will allow the crate being compiled, as well as all its
|
||||
transitive dependencies to use certain target features.
|
||||
|
||||
A tehnique used to avoid undefined behavior at runtime is to compile and
|
||||
ship multiple binaries, each compiled with a certain set of features.
|
||||
This might not be feasible in some cases, and can quickly get out of hand
|
||||
as more and more vector extensions are added to an architecture.
|
||||
|
||||
Rust can be more flexible: you can build a single binary/library which automatically
|
||||
picks the best supported vector instructions depending on the host machine.
|
||||
The trick consists of monomorphizing parts of the code during building, and then
|
||||
using run-time feature detection to select the right code path when running.
|
||||
|
||||
<!-- TODO
|
||||
Explain how to create efficient functions that dispatch to different
|
||||
implementations at run-time without issues (e.g. using `#[inline(always)]` for
|
||||
the impls, wrapping in `#[target_feature]`, and the wrapping those in a function
|
||||
that does run-time feature detection).
|
||||
-->
|
||||
|
||||
**NOTE** (x86 specific): because the AVX (256-bit) registers extend the existing
|
||||
SSE (128-bit) registers, mixing SSE and AVX instructions in a program can cause
|
||||
performance issues.
|
||||
|
||||
The solution is to compile all code, even the code written with 128-bit vectors,
|
||||
with the AVX target feature enabled. This will cause the compiler to prefix the
|
||||
generated instructions with the [VEX] prefix.
|
||||
|
||||
[VEX]: https://en.wikipedia.org/wiki/VEX_prefix
|
|
@ -1,5 +0,0 @@
|
|||
# Detecting host features at runtime
|
||||
|
||||
<!-- TODO:
|
||||
Explain cost (how it works).
|
||||
-->
|
|
@ -1,77 +0,0 @@
|
|||
# Using RUSTFLAGS
|
||||
|
||||
One of the easiest ways to benefit from SIMD is to allow the compiler
|
||||
to generate code using certain vector instruction extensions.
|
||||
|
||||
The environment variable `RUSTFLAGS` can be used to pass options for code
|
||||
generation to the Rust compiler. These flags will affect **all** compiled crates.
|
||||
|
||||
There are two flags which can be used to enable specific vector extensions:
|
||||
|
||||
## target-feature
|
||||
|
||||
- Syntax: `-C target-feature=<features>`
|
||||
|
||||
- Provides the compiler with a comma-separated set of instruction extensions
|
||||
to enable.
|
||||
|
||||
**Example**: Use `-C target-feature=+sse3,+avx` to enable generating instructions
|
||||
for [Streaming SIMD Extensions 3](https://en.wikipedia.org/wiki/SSE3) and
|
||||
[Advanced Vector Extensions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions).
|
||||
|
||||
- To list target triples for all targets supported by Rust, use:
|
||||
|
||||
```sh
|
||||
rustc --print target-list
|
||||
```
|
||||
|
||||
- To list all support target features for a certain target triple, use:
|
||||
|
||||
```sh
|
||||
rustc --target=${TRIPLE} --print target-features
|
||||
```
|
||||
|
||||
- Note that all CPU features are independent, and will have to be enabled individually.
|
||||
|
||||
**Example**: Setting `-C target-feature=+avx2` will _not_ enable `fma`, even though
|
||||
all CPUs which support AVX2 also support FMA. To enable both, one has to use
|
||||
`-C target-feature=+avx2,+fma`
|
||||
|
||||
- Some features also depend on other features, which need to be enabled for the
|
||||
target instructions to be generated.
|
||||
|
||||
**Example**: Unless `v7` is specified as the target CPU (see below), to enable
|
||||
NEON on ARM it is necessary to use `-C target-feature=+v7,+neon`.
|
||||
|
||||
## target-cpu
|
||||
|
||||
- Syntax: `-C target-cpu=<cpu>`
|
||||
|
||||
- Sets the identifier of a CPU family / model for which to build and optimize the code.
|
||||
|
||||
**Example**: `RUSTFLAGS='-C target-cpu=cortex-a75'`
|
||||
|
||||
- To list all supported target CPUs for a certain target triple, use:
|
||||
|
||||
```sh
|
||||
rustc --target=${TRIPLE} --print target-cpus
|
||||
```
|
||||
|
||||
**Example**:
|
||||
|
||||
```sh
|
||||
rustc --target=i686-pc-windows-msvc --print target-cpus
|
||||
```
|
||||
|
||||
- The compiler will translate this into a list of target features. Therefore,
|
||||
individual feature checks (`#[cfg(target_feature = "...")]`) will still
|
||||
work properly.
|
||||
|
||||
- It will cause the code generator to optimize the generated code for that
|
||||
specific CPU model.
|
||||
|
||||
- Using `native` as the CPU model will cause Rust to generate and optimize code
|
||||
for the CPU running the compiler. It is useful when building programs which you
|
||||
plan to only use locally. This should never be used when the generated programs
|
||||
are meant to be run on other computers, such as when packaging for distribution
|
||||
or cross-compiling.
|
|
@ -1,76 +0,0 @@
|
|||
# Vertical and horizontal operations
|
||||
|
||||
In SIMD terminology, each vector has a certain "width" (number of lanes).
|
||||
A vector processor is able to perform two kinds of operations on a vector:
|
||||
|
||||
- Vertical operations:
|
||||
operate on two vectors of the same width, result has same width
|
||||
|
||||
**Example**: vertical addition of two `f32x4` vectors
|
||||
|
||||
%0 == | 2 | -3.5 | 0 | 7 |
|
||||
+ + + +
|
||||
%1 == | 4 | 1.5 | -1 | 0 |
|
||||
= = = =
|
||||
%0 + %1 == | 6 | -2 | -1 | 7 |
|
||||
|
||||
- Horizontal operations:
|
||||
reduce the elements of two vectors in some way,
|
||||
the result's elements combine information from the two original ones
|
||||
|
||||
**Example**: horizontal addition of two `u64x2` vectors
|
||||
|
||||
%0 == | 1 | 3 |
|
||||
└─+───┘
|
||||
└───────┐
|
||||
│
|
||||
%1 == | 4 | -1 | │
|
||||
└─+──┘ │
|
||||
└───┐ │
|
||||
│ │
|
||||
┌─────│───┘
|
||||
▼ ▼
|
||||
%0 + %1 == | 4 | 3 |
|
||||
|
||||
## Performance consideration of horizontal operations
|
||||
|
||||
The result of vertical operations, like vector negation: `-a`, for a given lane,
|
||||
does not depend on the result of the operation for the other lanes. The result
|
||||
of horizontal operations, like the vector `sum` reduction: `a.sum()`, depends on
|
||||
the value of all vector lanes.
|
||||
|
||||
In virtually all architectures vertical operations are fast, while horizontal
|
||||
operations are, by comparison, very slow.
|
||||
|
||||
Consider the following two functions for computing the sum of all `f32` values
|
||||
in a slice:
|
||||
|
||||
```rust
|
||||
fn fast_sum(x: &[f32]) -> f32 {
|
||||
assert!(x.len() % 4 == 0);
|
||||
let mut sum = f32x4::splat(0.); // [0., 0., 0., 0.]
|
||||
for i in (0..x.len()).step_by(4) {
|
||||
sum += f32x4::from_slice_unaligned(&x[i..]);
|
||||
}
|
||||
sum.sum()
|
||||
}
|
||||
|
||||
fn slow_sum(x: &[f32]) -> f32 {
|
||||
assert!(x.len() % 4 == 0);
|
||||
let mut sum: f32 = 0.;
|
||||
for i in (0..x.len()).step_by(4) {
|
||||
sum += f32x4::from_slice_unaligned(&x[i..]).sum();
|
||||
}
|
||||
sum
|
||||
}
|
||||
```
|
||||
|
||||
The inner loop over the slice is where the bulk of the work actually happens.
|
||||
There, the `fast_sum` function perform vertical operations into a vector, doing
|
||||
a single horizontal reduction at the end, while the `slow_sum` function performs
|
||||
horizontal vector operations inside of the loop.
|
||||
|
||||
On all widely-used architectures, `fast_sum` is a large constant factor faster
|
||||
than `slow_sum`. You can run the [slice_sum]() example and see for yourself. On
|
||||
the particular machine tested there the algorithm using the horizontal vector
|
||||
addition is 2.7x slower than the one using vertical vector operations!
|
|
@ -1 +0,0 @@
|
|||
nightly
|
|
@ -1,5 +0,0 @@
|
|||
max_width = 110
|
||||
use_small_heuristics = "Max"
|
||||
wrap_comments = true
|
||||
edition = "2018"
|
||||
error_on_line_overflow = true
|
|
@ -1,309 +0,0 @@
|
|||
//! Implements the Simd<[T; N]> APIs
|
||||
|
||||
#[macro_use]
|
||||
mod bitmask;
|
||||
pub(crate) mod cast;
|
||||
#[macro_use]
|
||||
mod cmp;
|
||||
#[macro_use]
|
||||
mod default;
|
||||
#[macro_use]
|
||||
mod fmt;
|
||||
#[macro_use]
|
||||
mod from;
|
||||
#[macro_use]
|
||||
mod hash;
|
||||
#[macro_use]
|
||||
mod math;
|
||||
#[macro_use]
|
||||
mod minimal;
|
||||
#[macro_use]
|
||||
mod ops;
|
||||
#[macro_use]
|
||||
mod ptr;
|
||||
#[macro_use]
|
||||
mod reductions;
|
||||
#[macro_use]
|
||||
mod select;
|
||||
#[macro_use]
|
||||
mod shuffle;
|
||||
#[macro_use]
|
||||
mod shuffle1_dyn;
|
||||
#[macro_use]
|
||||
mod slice;
|
||||
#[macro_use]
|
||||
mod swap_bytes;
|
||||
#[macro_use]
|
||||
mod bit_manip;
|
||||
|
||||
#[cfg(feature = "into_bits")]
|
||||
pub(crate) mod into_bits;
|
||||
|
||||
macro_rules! impl_i {
|
||||
([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
|
||||
| $ielem_ty:ident, $ibitmask_ty:ident | $test_tt:tt | $($elem_ids:ident),*
|
||||
| From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
|
||||
impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
||||
| $($elem_ids),* | $(#[$doc])*);
|
||||
impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_ops_vector_bitwise!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
|
||||
);
|
||||
impl_ops_scalar_bitwise!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
|
||||
);
|
||||
impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_ops_vector_int_min_max!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt
|
||||
);
|
||||
impl_reduction_integer_arithmetic!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
||||
);
|
||||
impl_reduction_min_max!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
||||
);
|
||||
impl_reduction_bitwise!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
||||
| (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0)
|
||||
);
|
||||
impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1));
|
||||
impl_from_vectors!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
|
||||
);
|
||||
impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_cmp_partial_eq!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)
|
||||
);
|
||||
impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
|
||||
impl_cmp_vertical!(
|
||||
[$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt
|
||||
);
|
||||
impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
|
||||
impl_bitmask!($tuple_id | $ibitmask_ty | (-1, 0) | $test_tt);
|
||||
|
||||
test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt);
|
||||
test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_u {
|
||||
([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
|
||||
| $ielem_ty:ident, $ibitmask_ty:ident | $test_tt:tt | $($elem_ids:ident),*
|
||||
| From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
|
||||
impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
||||
| $($elem_ids),* | $(#[$doc])*);
|
||||
impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_ops_vector_bitwise!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
|
||||
);
|
||||
impl_ops_scalar_bitwise!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
|
||||
);
|
||||
impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_ops_vector_int_min_max!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt
|
||||
);
|
||||
impl_reduction_integer_arithmetic!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
||||
);
|
||||
impl_reduction_min_max!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
||||
);
|
||||
impl_reduction_bitwise!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
||||
| (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0)
|
||||
);
|
||||
impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1));
|
||||
impl_from_vectors!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
|
||||
);
|
||||
impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_cmp_partial_eq!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 0)
|
||||
);
|
||||
impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
|
||||
impl_cmp_vertical!(
|
||||
[$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt
|
||||
);
|
||||
impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
|
||||
impl_bitmask!($tuple_id | $ibitmask_ty | ($ielem_ty::max_value(), 0) |
|
||||
$test_tt);
|
||||
|
||||
test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt);
|
||||
test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_f {
|
||||
([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
|
||||
| $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),*
|
||||
| From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
|
||||
impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
||||
| $($elem_ids),* | $(#[$doc])*);
|
||||
impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_ops_vector_float_min_max!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt
|
||||
);
|
||||
impl_reduction_float_arithmetic!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_reduction_min_max!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
||||
);
|
||||
impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 1.));
|
||||
impl_from_vectors!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
|
||||
);
|
||||
impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_cmp_partial_eq!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 0.)
|
||||
);
|
||||
impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
|
||||
impl_float_consts!([$elem_ty; $elem_n]: $tuple_id);
|
||||
impl_float_category!([$elem_ty; $elem_n]: $tuple_id, $mask_ty);
|
||||
|
||||
// floating-point math
|
||||
impl_math_float_abs!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_math_float_cos!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_math_float_exp!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_math_float_ln!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_math_float_mul_add!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_math_float_mul_adde!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_math_float_powf!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_math_float_recpre!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_math_float_rsqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_math_float_sin!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_math_float_sqrt!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_math_float_sqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_math_float_tanh!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_cmp_vertical!(
|
||||
[$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1., 0.)
|
||||
| $test_tt
|
||||
);
|
||||
|
||||
test_select!($elem_ty, $mask_ty, $tuple_id, (1., 2.) | $test_tt);
|
||||
test_reduction_float_min_max!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt
|
||||
);
|
||||
test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_m {
|
||||
([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident
|
||||
| $ielem_ty:ident, $ibitmask_ty:ident
|
||||
| $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),*
|
||||
| $(#[$doc:meta])*) => {
|
||||
impl_minimal_mask!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
||||
| $($elem_ids),* | $(#[$doc])*
|
||||
);
|
||||
impl_ops_vector_mask_bitwise!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
|
||||
);
|
||||
impl_ops_scalar_mask_bitwise!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
|
||||
);
|
||||
impl_reduction_bitwise!(
|
||||
[bool; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
||||
| (|x|{ x != 0 }) | (true, false)
|
||||
);
|
||||
impl_reduction_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_fmt_debug!([bool; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_from_array!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt
|
||||
| (crate::$elem_ty::new(true), true)
|
||||
);
|
||||
impl_from_vectors!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
|
||||
);
|
||||
impl_default!([bool; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_cmp_partial_eq!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
|
||||
);
|
||||
impl_cmp_eq!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
|
||||
);
|
||||
impl_cmp_vertical!(
|
||||
[$elem_ty; $elem_n]: $tuple_id, $tuple_id, true, (true, false)
|
||||
| $test_tt
|
||||
);
|
||||
impl_select!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_cmp_ord!(
|
||||
[$elem_ty; $elem_n]: $tuple_id | $test_tt | (false, true)
|
||||
);
|
||||
impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
impl_bitmask!($tuple_id | $ibitmask_ty | (true, false) | $test_tt);
|
||||
|
||||
test_cmp_partial_ord_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
test_shuffle1_dyn_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_const_p {
|
||||
([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident,
|
||||
$usize_ty:ident, $isize_ty:ident
|
||||
| $test_tt:tt | $($elem_ids:ident),*
|
||||
| From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
|
||||
impl_minimal_p!(
|
||||
[$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty
|
||||
| ref_ | $test_tt | $($elem_ids),*
|
||||
| (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])*
|
||||
);
|
||||
impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_mut_p {
|
||||
([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident,
|
||||
$usize_ty:ident, $isize_ty:ident
|
||||
| $test_tt:tt | $($elem_ids:ident),*
|
||||
| From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
|
||||
impl_minimal_p!(
|
||||
[$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty
|
||||
| ref_mut_ | $test_tt | $($elem_ids),*
|
||||
| (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])*
|
||||
);
|
||||
impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
|
||||
impl_ptr_write!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
|
||||
}
|
||||
}
|
|
@ -1,129 +0,0 @@
|
|||
//! Bit manipulations.
|
||||
|
||||
macro_rules! impl_bit_manip {
|
||||
([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
||||
impl $id {
|
||||
/// Returns the number of ones in the binary representation of
|
||||
/// the lanes of `self`.
|
||||
#[inline]
|
||||
pub fn count_ones(self) -> Self {
|
||||
super::codegen::bit_manip::BitManip::ctpop(self)
|
||||
}
|
||||
|
||||
/// Returns the number of zeros in the binary representation of
|
||||
/// the lanes of `self`.
|
||||
#[inline]
|
||||
pub fn count_zeros(self) -> Self {
|
||||
super::codegen::bit_manip::BitManip::ctpop(!self)
|
||||
}
|
||||
|
||||
/// Returns the number of leading zeros in the binary
|
||||
/// representation of the lanes of `self`.
|
||||
#[inline]
|
||||
pub fn leading_zeros(self) -> Self {
|
||||
super::codegen::bit_manip::BitManip::ctlz(self)
|
||||
}
|
||||
|
||||
/// Returns the number of trailing zeros in the binary
|
||||
/// representation of the lanes of `self`.
|
||||
#[inline]
|
||||
pub fn trailing_zeros(self) -> Self {
|
||||
super::codegen::bit_manip::BitManip::cttz(self)
|
||||
}
|
||||
}
|
||||
|
||||
test_if! {
|
||||
$test_tt:
|
||||
paste::item! {
|
||||
#[allow(overflowing_literals)]
|
||||
pub mod [<$id _bit_manip>] {
|
||||
#![allow(const_item_mutation)]
|
||||
use super::*;
|
||||
|
||||
const LANE_WIDTH: usize = mem::size_of::<$elem_ty>() * 8;
|
||||
|
||||
macro_rules! test_func {
|
||||
($x:expr, $func:ident) => {{
|
||||
let mut actual = $x;
|
||||
for i in 0..$id::lanes() {
|
||||
actual = actual.replace(
|
||||
i,
|
||||
$x.extract(i).$func() as $elem_ty
|
||||
);
|
||||
}
|
||||
let expected = $x.$func();
|
||||
assert_eq!(actual, expected);
|
||||
}};
|
||||
}
|
||||
|
||||
const BYTES: [u8; 64] = [
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
];
|
||||
|
||||
fn load_bytes() -> $id {
|
||||
let elems: &mut [$elem_ty] = unsafe {
|
||||
slice::from_raw_parts_mut(
|
||||
BYTES.as_mut_ptr() as *mut $elem_ty,
|
||||
$id::lanes(),
|
||||
)
|
||||
};
|
||||
$id::from_slice_unaligned(elems)
|
||||
}
|
||||
|
||||
#[cfg_attr(not(target_arch = "wasm32"), test)]
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn count_ones() {
|
||||
test_func!($id::splat(0), count_ones);
|
||||
test_func!($id::splat(!0), count_ones);
|
||||
test_func!(load_bytes(), count_ones);
|
||||
}
|
||||
|
||||
#[cfg_attr(not(target_arch = "wasm32"), test)]
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn count_zeros() {
|
||||
test_func!($id::splat(0), count_zeros);
|
||||
test_func!($id::splat(!0), count_zeros);
|
||||
test_func!(load_bytes(), count_zeros);
|
||||
}
|
||||
|
||||
#[cfg_attr(not(target_arch = "wasm32"), test)]
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn leading_zeros() {
|
||||
test_func!($id::splat(0), leading_zeros);
|
||||
test_func!($id::splat(1), leading_zeros);
|
||||
// some implementations use `pshufb` which has unique
|
||||
// behavior when the 8th bit is set.
|
||||
test_func!($id::splat(0b1000_0010), leading_zeros);
|
||||
test_func!($id::splat(!0), leading_zeros);
|
||||
test_func!(
|
||||
$id::splat(1 << (LANE_WIDTH - 1)),
|
||||
leading_zeros
|
||||
);
|
||||
test_func!(load_bytes(), leading_zeros);
|
||||
}
|
||||
|
||||
#[cfg_attr(not(target_arch = "wasm32"), test)]
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn trailing_zeros() {
|
||||
test_func!($id::splat(0), trailing_zeros);
|
||||
test_func!($id::splat(1), trailing_zeros);
|
||||
test_func!($id::splat(0b1000_0010), trailing_zeros);
|
||||
test_func!($id::splat(!0), trailing_zeros);
|
||||
test_func!(
|
||||
$id::splat(1 << (LANE_WIDTH - 1)),
|
||||
trailing_zeros
|
||||
);
|
||||
test_func!(load_bytes(), trailing_zeros);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
|
@ -1,79 +0,0 @@
|
|||
//! Bitmask API
|
||||
|
||||
macro_rules! impl_bitmask {
|
||||
($id:ident | $ibitmask_ty:ident | ($set:expr, $clear:expr)
|
||||
| $test_tt:tt) => {
|
||||
impl $id {
|
||||
/// Creates a bitmask with the MSB of each vector lane.
|
||||
///
|
||||
/// If the vector has less than 8 lanes, the bits that do not
|
||||
/// correspond to any vector lanes are cleared.
|
||||
#[inline]
|
||||
pub fn bitmask(self) -> $ibitmask_ty {
|
||||
unsafe { codegen::llvm::simd_bitmask(self.0) }
|
||||
}
|
||||
}
|
||||
|
||||
test_if! {
|
||||
$test_tt:
|
||||
paste::item! {
|
||||
#[cfg(not(
|
||||
// FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/210
|
||||
target_endian = "big"
|
||||
))]
|
||||
pub mod [<$id _bitmask>] {
|
||||
use super::*;
|
||||
#[cfg_attr(not(target_arch = "wasm32"), test)]
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn bitmask() {
|
||||
// clear all lanes
|
||||
let vec = $id::splat($clear as _);
|
||||
let bitmask: $ibitmask_ty = 0;
|
||||
assert_eq!(vec.bitmask(), bitmask);
|
||||
|
||||
// set even lanes
|
||||
let mut vec = $id::splat($clear as _);
|
||||
for i in 0..$id::lanes() {
|
||||
if i % 2 == 0 {
|
||||
vec = vec.replace(i, $set as _);
|
||||
}
|
||||
}
|
||||
// create bitmask with even lanes set:
|
||||
let mut bitmask: $ibitmask_ty = 0;
|
||||
for i in 0..$id::lanes() {
|
||||
if i % 2 == 0 {
|
||||
bitmask |= 1 << i;
|
||||
}
|
||||
}
|
||||
assert_eq!(vec.bitmask(), bitmask);
|
||||
|
||||
|
||||
// set odd lanes
|
||||
let mut vec = $id::splat($clear as _);
|
||||
for i in 0..$id::lanes() {
|
||||
if i % 2 != 0 {
|
||||
vec = vec.replace(i, $set as _);
|
||||
}
|
||||
}
|
||||
// create bitmask with odd lanes set:
|
||||
let mut bitmask: $ibitmask_ty = 0;
|
||||
for i in 0..$id::lanes() {
|
||||
if i % 2 != 0 {
|
||||
bitmask |= 1 << i;
|
||||
}
|
||||
}
|
||||
assert_eq!(vec.bitmask(), bitmask);
|
||||
|
||||
// set all lanes
|
||||
let vec = $id::splat($set as _);
|
||||
let mut bitmask: $ibitmask_ty = 0;
|
||||
for i in 0..$id::lanes() {
|
||||
bitmask |= 1 << i;
|
||||
}
|
||||
assert_eq!(vec.bitmask(), bitmask);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
|
@ -1,108 +0,0 @@
|
|||
//! Implementation of `FromCast` and `IntoCast`.
|
||||
#![allow(clippy::module_name_repetitions)]
|
||||
|
||||
/// Numeric cast from `T` to `Self`.
|
||||
///
|
||||
/// > Note: This is a temporary workaround until the conversion traits
|
||||
/// specified > in [RFC2484] are implemented.
|
||||
///
|
||||
/// Numeric cast between vectors with the same number of lanes, such that:
|
||||
///
|
||||
/// * casting integer vectors whose lane types have the same size (e.g. `i32xN`
|
||||
/// -> `u32xN`) is a **no-op**,
|
||||
///
|
||||
/// * casting from a larger integer to a smaller integer (e.g. `u32xN` ->
|
||||
/// `u8xN`) will **truncate**,
|
||||
///
|
||||
/// * casting from a smaller integer to a larger integer (e.g. `u8xN` ->
|
||||
/// `u32xN`) will:
|
||||
/// * **zero-extend** if the source is unsigned, or
|
||||
/// * **sign-extend** if the source is signed,
|
||||
///
|
||||
/// * casting from a float to an integer will **round the float towards zero**,
|
||||
///
|
||||
/// * casting from an integer to float will produce the floating point
|
||||
/// representation of the integer, **rounding to nearest, ties to even**,
|
||||
///
|
||||
/// * casting from an `f32` to an `f64` is perfect and lossless,
|
||||
///
|
||||
/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**.
|
||||
///
|
||||
/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484
|
||||
pub trait FromCast<T>: crate::marker::Sized {
|
||||
/// Numeric cast from `T` to `Self`.
|
||||
fn from_cast(_: T) -> Self;
|
||||
}
|
||||
|
||||
/// Numeric cast from `Self` to `T`.
|
||||
///
|
||||
/// > Note: This is a temporary workaround until the conversion traits
|
||||
/// specified > in [RFC2484] are implemented.
|
||||
///
|
||||
/// Numeric cast between vectors with the same number of lanes, such that:
|
||||
///
|
||||
/// * casting integer vectors whose lane types have the same size (e.g. `i32xN`
|
||||
/// -> `u32xN`) is a **no-op**,
|
||||
///
|
||||
/// * casting from a larger integer to a smaller integer (e.g. `u32xN` ->
|
||||
/// `u8xN`) will **truncate**,
|
||||
///
|
||||
/// * casting from a smaller integer to a larger integer (e.g. `u8xN` ->
|
||||
/// `u32xN`) will:
|
||||
/// * **zero-extend** if the source is unsigned, or
|
||||
/// * **sign-extend** if the source is signed,
|
||||
///
|
||||
/// * casting from a float to an integer will **round the float towards zero**,
|
||||
///
|
||||
/// * casting from an integer to float will produce the floating point
|
||||
/// representation of the integer, **rounding to nearest, ties to even**,
|
||||
///
|
||||
/// * casting from an `f32` to an `f64` is perfect and lossless,
|
||||
///
|
||||
/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**.
|
||||
///
|
||||
/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484
|
||||
pub trait Cast<T>: crate::marker::Sized {
|
||||
/// Numeric cast from `self` to `T`.
|
||||
fn cast(self) -> T;
|
||||
}
|
||||
|
||||
/// `FromCast` implies `Cast`.
|
||||
impl<T, U> Cast<U> for T
|
||||
where
|
||||
U: FromCast<T>,
|
||||
{
|
||||
#[inline]
|
||||
fn cast(self) -> U {
|
||||
U::from_cast(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// `FromCast` and `Cast` are reflexive
|
||||
impl<T> FromCast<T> for T {
|
||||
#[inline]
|
||||
fn from_cast(t: Self) -> Self {
|
||||
t
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
|
||||
mod v16;
|
||||
pub use self::v16::*;
|
||||
|
||||
mod v32;
|
||||
pub use self::v32::*;
|
||||
|
||||
mod v64;
|
||||
pub use self::v64::*;
|
||||
|
||||
mod v128;
|
||||
pub use self::v128::*;
|
||||
|
||||
mod v256;
|
||||
pub use self::v256::*;
|
||||
|
||||
mod v512;
|
||||
pub use self::v512::*;
|
|
@ -1,82 +0,0 @@
|
|||
//! Macros implementing `FromCast`
|
||||
|
||||
macro_rules! impl_from_cast_ {
|
||||
($id:ident[$test_tt:tt]: $from_ty:ident) => {
|
||||
impl crate::api::cast::FromCast<$from_ty> for $id {
|
||||
#[inline]
|
||||
fn from_cast(x: $from_ty) -> Self {
|
||||
use crate::llvm::simd_cast;
|
||||
debug_assert_eq!($from_ty::lanes(), $id::lanes());
|
||||
Simd(unsafe { simd_cast(x.0) })
|
||||
}
|
||||
}
|
||||
|
||||
test_if!{
|
||||
$test_tt:
|
||||
paste::item! {
|
||||
pub mod [<$id _from_cast_ $from_ty>] {
|
||||
use super::*;
|
||||
#[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn test() {
|
||||
assert_eq!($id::lanes(), $from_ty::lanes());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_from_cast {
|
||||
($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
|
||||
$(
|
||||
impl_from_cast_!($id[$test_tt]: $from_ty);
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_from_cast_mask_ {
|
||||
($id:ident[$test_tt:tt]: $from_ty:ident) => {
|
||||
impl crate::api::cast::FromCast<$from_ty> for $id {
|
||||
#[inline]
|
||||
fn from_cast(x: $from_ty) -> Self {
|
||||
debug_assert_eq!($from_ty::lanes(), $id::lanes());
|
||||
x.ne($from_ty::default())
|
||||
.select($id::splat(true), $id::splat(false))
|
||||
}
|
||||
}
|
||||
|
||||
test_if!{
|
||||
$test_tt:
|
||||
paste::item! {
|
||||
pub mod [<$id _from_cast_ $from_ty>] {
|
||||
use super::*;
|
||||
#[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn test() {
|
||||
assert_eq!($id::lanes(), $from_ty::lanes());
|
||||
|
||||
let x = $from_ty::default();
|
||||
let m: $id = x.cast();
|
||||
assert!(m.none());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_from_cast_mask {
|
||||
($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
|
||||
$(
|
||||
impl_from_cast_mask_!($id[$test_tt]: $from_ty);
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
macro_rules! impl_into_cast {
|
||||
($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
|
||||
$(
|
||||
impl_from_cast_!($from_ty[$test_tt]: $id);
|
||||
)*
|
||||
}
|
||||
}
|
|
@ -1,302 +0,0 @@
|
|||
//! `FromCast` and `IntoCast` implementations for portable 128-bit wide vectors
|
||||
#[rustfmt::skip]
|
||||
|
||||
use crate::*;
|
||||
|
||||
impl_from_cast!(i8x16[test_v128]: u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
|
||||
impl_from_cast!(u8x16[test_v128]: i8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
|
||||
impl_from_cast_mask!(m8x16[test_v128]: i8x16, u8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
|
||||
|
||||
impl_from_cast!(
|
||||
i16x8[test_v128]: i8x8,
|
||||
u8x8,
|
||||
m8x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
impl_from_cast!(
|
||||
u16x8[test_v128]: i8x8,
|
||||
u8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
m16x8[test_v128]: i8x8,
|
||||
u8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
|
||||
impl_from_cast!(
|
||||
i32x4[test_v128]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
impl_from_cast!(
|
||||
u32x4[test_v128]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
impl_from_cast!(
|
||||
f32x4[test_v128]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
m32x4[test_v128]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
|
||||
impl_from_cast!(
|
||||
i64x2[test_v128]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
impl_from_cast!(
|
||||
u64x2[test_v128]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
impl_from_cast!(
|
||||
f64x2[test_v128]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
m64x2[test_v128]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
|
||||
impl_from_cast!(
|
||||
isizex2[test_v128]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
impl_from_cast!(
|
||||
usizex2[test_v128]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
msizex2
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
msizex2[test_v128]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2
|
||||
);
|
||||
|
||||
// FIXME[test_v128]: 64-bit single element vectors into_cast impls
|
||||
impl_from_cast!(i128x1[test_v128]: u128x1, m128x1);
|
||||
impl_from_cast!(u128x1[test_v128]: i128x1, m128x1);
|
||||
impl_from_cast!(m128x1[test_v128]: i128x1, u128x1);
|
|
@ -1,68 +0,0 @@
|
|||
//! `FromCast` and `IntoCast` implementations for portable 16-bit wide vectors
|
||||
#[rustfmt::skip]
|
||||
|
||||
use crate::*;
|
||||
|
||||
impl_from_cast!(
|
||||
i8x2[test_v16]: u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
impl_from_cast!(
|
||||
u8x2[test_v16]: i8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
m8x2[test_v16]: i8x2,
|
||||
u8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
|
@ -1,298 +0,0 @@
|
|||
//! `FromCast` and `IntoCast` implementations for portable 256-bit wide vectors
|
||||
#[rustfmt::skip]
|
||||
|
||||
use crate::*;
|
||||
|
||||
impl_from_cast!(i8x32[test_v256]: u8x32, m8x32, i16x32, u16x32, m16x32);
|
||||
impl_from_cast!(u8x32[test_v256]: i8x32, m8x32, i16x32, u16x32, m16x32);
|
||||
impl_from_cast_mask!(m8x32[test_v256]: i8x32, u8x32, i16x32, u16x32, m16x32);
|
||||
|
||||
impl_from_cast!(i16x16[test_v256]: i8x16, u8x16, m8x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
|
||||
impl_from_cast!(u16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
|
||||
impl_from_cast_mask!(m16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, u16x16, i32x16, u32x16, f32x16, m32x16);
|
||||
|
||||
impl_from_cast!(
|
||||
i32x8[test_v256]: i8x8,
|
||||
u8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
impl_from_cast!(
|
||||
u32x8[test_v256]: i8x8,
|
||||
u8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
impl_from_cast!(
|
||||
f32x8[test_v256]: i8x8,
|
||||
u8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
m32x8[test_v256]: i8x8,
|
||||
u8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
|
||||
impl_from_cast!(
|
||||
i64x4[test_v256]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
impl_from_cast!(
|
||||
u64x4[test_v256]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
impl_from_cast!(
|
||||
f64x4[test_v256]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
m64x4[test_v256]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
|
||||
impl_from_cast!(
|
||||
i128x2[test_v256]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
impl_from_cast!(
|
||||
u128x2[test_v256]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
m128x2[test_v256]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
m64x2,
|
||||
f64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
|
||||
impl_from_cast!(
|
||||
isizex4[test_v256]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
impl_from_cast!(
|
||||
usizex4[test_v256]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
msizex4
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
msizex4[test_v256]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4
|
||||
);
|
|
@ -1,132 +0,0 @@
|
|||
//! `FromCast` and `IntoCast` implementations for portable 32-bit wide vectors
|
||||
#[rustfmt::skip]
|
||||
|
||||
use crate::*;
|
||||
|
||||
impl_from_cast!(
|
||||
i8x4[test_v32]: u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
impl_from_cast!(
|
||||
u8x4[test_v32]: i8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
m8x4[test_v32]: i8x4,
|
||||
u8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
|
||||
impl_from_cast!(
|
||||
i16x2[test_v32]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
impl_from_cast!(
|
||||
u16x2[test_v32]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
m16x2[test_v32]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
|
@ -1,209 +0,0 @@
|
|||
//! `FromCast` and `IntoCast` implementations for portable 512-bit wide vectors
|
||||
#[rustfmt::skip]
|
||||
|
||||
use crate::*;
|
||||
|
||||
impl_from_cast!(i8x64[test_v512]: u8x64, m8x64);
|
||||
impl_from_cast!(u8x64[test_v512]: i8x64, m8x64);
|
||||
impl_from_cast_mask!(m8x64[test_v512]: i8x64, u8x64);
|
||||
|
||||
impl_from_cast!(i16x32[test_v512]: i8x32, u8x32, m8x32, u16x32, m16x32);
|
||||
impl_from_cast!(u16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, m16x32);
|
||||
impl_from_cast_mask!(m16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, u16x32);
|
||||
|
||||
impl_from_cast!(i32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, u32x16, f32x16, m32x16);
|
||||
impl_from_cast!(u32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, f32x16, m32x16);
|
||||
impl_from_cast!(f32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, m32x16);
|
||||
impl_from_cast_mask!(m32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16);
|
||||
|
||||
impl_from_cast!(
|
||||
i64x8[test_v512]: i8x8,
|
||||
u8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
impl_from_cast!(
|
||||
u64x8[test_v512]: i8x8,
|
||||
u8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
impl_from_cast!(
|
||||
f64x8[test_v512]: i8x8,
|
||||
u8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
m64x8[test_v512]: i8x8,
|
||||
u8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
isizex8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
|
||||
impl_from_cast!(
|
||||
i128x4[test_v512]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
impl_from_cast!(
|
||||
u128x4[test_v512]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
m128x4[test_v512]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
m64x4,
|
||||
f64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
|
||||
impl_from_cast!(
|
||||
isizex8[test_v512]: i8x8,
|
||||
u8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
impl_from_cast!(
|
||||
usizex8[test_v512]: i8x8,
|
||||
u8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
msizex8
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
msizex8[test_v512]: i8x8,
|
||||
u8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
usizex8
|
||||
);
|
|
@ -1,208 +0,0 @@
|
|||
//! `FromCast` and `IntoCast` implementations for portable 64-bit wide vectors
|
||||
#[rustfmt::skip]
|
||||
|
||||
use crate::*;
|
||||
|
||||
impl_from_cast!(
|
||||
i8x8[test_v64]: u8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
impl_from_cast!(
|
||||
u8x8[test_v64]: i8x8,
|
||||
m8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
m8x8[test_v64]: i8x8,
|
||||
u8x8,
|
||||
i16x8,
|
||||
u16x8,
|
||||
m16x8,
|
||||
i32x8,
|
||||
u32x8,
|
||||
f32x8,
|
||||
m32x8,
|
||||
i64x8,
|
||||
u64x8,
|
||||
f64x8,
|
||||
m64x8,
|
||||
isizex8,
|
||||
usizex8,
|
||||
msizex8
|
||||
);
|
||||
|
||||
impl_from_cast!(
|
||||
i16x4[test_v64]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
u16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
impl_from_cast!(
|
||||
u16x4[test_v64]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
m16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
m16x4[test_v64]: i8x4,
|
||||
u8x4,
|
||||
m8x4,
|
||||
i16x4,
|
||||
u16x4,
|
||||
i32x4,
|
||||
u32x4,
|
||||
f32x4,
|
||||
m32x4,
|
||||
i64x4,
|
||||
u64x4,
|
||||
f64x4,
|
||||
m64x4,
|
||||
i128x4,
|
||||
u128x4,
|
||||
m128x4,
|
||||
isizex4,
|
||||
usizex4,
|
||||
msizex4
|
||||
);
|
||||
|
||||
impl_from_cast!(
|
||||
i32x2[test_v64]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
impl_from_cast!(
|
||||
u32x2[test_v64]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
f32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
impl_from_cast!(
|
||||
f32x2[test_v64]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
m32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
||||
impl_from_cast_mask!(
|
||||
m32x2[test_v64]: i8x2,
|
||||
u8x2,
|
||||
m8x2,
|
||||
i16x2,
|
||||
u16x2,
|
||||
m16x2,
|
||||
i32x2,
|
||||
u32x2,
|
||||
f32x2,
|
||||
i64x2,
|
||||
u64x2,
|
||||
f64x2,
|
||||
m64x2,
|
||||
i128x2,
|
||||
u128x2,
|
||||
m128x2,
|
||||
isizex2,
|
||||
usizex2,
|
||||
msizex2
|
||||
);
|
|
@ -1,16 +0,0 @@
|
|||
//! Implement cmp traits for vector types
|
||||
|
||||
#[macro_use]
|
||||
mod partial_eq;
|
||||
|
||||
#[macro_use]
|
||||
mod eq;
|
||||
|
||||
#[macro_use]
|
||||
mod partial_ord;
|
||||
|
||||
#[macro_use]
|
||||
mod ord;
|
||||
|
||||
#[macro_use]
|
||||
mod vertical;
|
|
@ -1,27 +0,0 @@
|
|||
//! Implements `Eq` for vector types.
|
||||
|
||||
macro_rules! impl_cmp_eq {
|
||||
(
|
||||
[$elem_ty:ident; $elem_count:expr]:
|
||||
$id:ident | $test_tt:tt |
|
||||
($true:expr, $false:expr)
|
||||
) => {
|
||||
impl crate::cmp::Eq for $id {}
|
||||
impl crate::cmp::Eq for LexicographicallyOrdered<$id> {}
|
||||
|
||||
test_if!{
|
||||
$test_tt:
|
||||
paste::item! {
|
||||
pub mod [<$id _cmp_eq>] {
|
||||
use super::*;
|
||||
#[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn eq() {
|
||||
fn foo<E: crate::cmp::Eq>(_: E) {}
|
||||
let a = $id::splat($false);
|
||||
foo(a);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче