зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1859085 - Update xsimd to 11.1.0. r=padenot,sergesanspaille
Changlog: 11.1.0 ------ * Introduce XSIMD_DEFAULT_ARCH to force default architecture (if any) * Remove C++ requirement on xsimd::exp10 scalar implementation 11.0.0 ------ * Provide a generic reducer * Fix ``find_package(xsimd)`` for xtl enabled xsimd, reloaded * Provide avx512f implementation of FMA and variant * Hexadecimal floating points are not a C++11 feature * back to slow implementation of exp10 on Windows * Changed bitwise_cast API * Provide generic signed /unsigned type conversion * Fixed sde location * Feature/incr decr Depends on D191042 Differential Revision: https://phabricator.services.mozilla.com/D191043
This commit is contained in:
Родитель
52ca4b99e8
Коммит
727ac25d91
|
@ -9,6 +9,61 @@
|
|||
Changelog
|
||||
=========
|
||||
|
||||
11.1.0
|
||||
------
|
||||
|
||||
* Introduce XSIMD_DEFAULT_ARCH to force default architecture (if any)
|
||||
|
||||
* Remove C++ requirement on xsimd::exp10 scalar implementation
|
||||
|
||||
* Improve and test documentation
|
||||
|
||||
11.0.0
|
||||
------
|
||||
|
||||
* Provide a generic reducer
|
||||
|
||||
* Fix ``find_package(xsimd)`` for xtl enabled xsimd, reloaded
|
||||
|
||||
* Cleanup benchmark code
|
||||
|
||||
* Provide avx512f implementation of FMA and variant
|
||||
|
||||
* Hexadecimal floating points are not a C++11 feature
|
||||
|
||||
* back to slow implementation of exp10 on Windows
|
||||
|
||||
* Changed bitwise_cast API
|
||||
|
||||
* Provide generic signed /unsigned type conversion
|
||||
|
||||
* Fixed sde location
|
||||
|
||||
* Feature/incr decr
|
||||
|
||||
* Cleanup documentation
|
||||
|
||||
10.0.0
|
||||
------
|
||||
|
||||
* Fix potential ABI issue in SVE support
|
||||
|
||||
* Disable fast exp10 on OSX
|
||||
|
||||
* Assert on unaligned memory when calling aligned load/store
|
||||
|
||||
* Fix warning about uninitialized storage
|
||||
|
||||
* Always forward arch parameter
|
||||
|
||||
* Do not specialize the behavior of ``simd_return_type`` for char
|
||||
|
||||
* Support broadcasting of complex batches
|
||||
|
||||
* Make xsimd compatible with -fno-exceptions
|
||||
|
||||
* Provide and test comparison operators overloads that accept scalars
|
||||
|
||||
9.0.1
|
||||
-----
|
||||
|
||||
|
|
|
@ -43,6 +43,20 @@ namespace xsimd
|
|||
self, other);
|
||||
}
|
||||
|
||||
// decr
|
||||
template <class A, class T>
|
||||
inline batch<T, A> decr(batch<T, A> const& self, requires_arch<generic>) noexcept
|
||||
{
|
||||
return self - T(1);
|
||||
}
|
||||
|
||||
// decr_if
|
||||
template <class A, class T, class Mask>
|
||||
inline batch<T, A> decr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept
|
||||
{
|
||||
return select(mask, decr(self), self);
|
||||
}
|
||||
|
||||
// div
|
||||
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
||||
inline batch<T, A> div(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
|
||||
|
@ -112,6 +126,20 @@ namespace xsimd
|
|||
return { res_r, res_i };
|
||||
}
|
||||
|
||||
// incr
|
||||
template <class A, class T>
|
||||
inline batch<T, A> incr(batch<T, A> const& self, requires_arch<generic>) noexcept
|
||||
{
|
||||
return self + T(1);
|
||||
}
|
||||
|
||||
// incr_if
|
||||
template <class A, class T, class Mask>
|
||||
inline batch<T, A> incr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept
|
||||
{
|
||||
return select(mask, incr(self), self);
|
||||
}
|
||||
|
||||
// mul
|
||||
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
|
||||
inline batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
|
||||
|
|
|
@ -135,6 +135,51 @@ namespace xsimd
|
|||
}
|
||||
}
|
||||
|
||||
// some generic fast_cast conversion
|
||||
namespace detail
|
||||
{
|
||||
template <class A>
|
||||
inline batch<uint8_t, A> fast_cast(batch<int8_t, A> const& self, batch<uint8_t, A> const&, requires_arch<generic>) noexcept
|
||||
{
|
||||
return bitwise_cast<uint8_t>(self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<uint16_t, A> fast_cast(batch<int16_t, A> const& self, batch<uint16_t, A> const&, requires_arch<generic>) noexcept
|
||||
{
|
||||
return bitwise_cast<uint16_t>(self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<uint32_t, A> fast_cast(batch<int32_t, A> const& self, batch<uint32_t, A> const&, requires_arch<generic>) noexcept
|
||||
{
|
||||
return bitwise_cast<uint32_t>(self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<uint64_t, A> fast_cast(batch<int64_t, A> const& self, batch<uint64_t, A> const&, requires_arch<generic>) noexcept
|
||||
{
|
||||
return bitwise_cast<uint64_t>(self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<int8_t, A> fast_cast(batch<uint8_t, A> const& self, batch<int8_t, A> const&, requires_arch<generic>) noexcept
|
||||
{
|
||||
return bitwise_cast<int8_t>(self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<int16_t, A> fast_cast(batch<uint16_t, A> const& self, batch<int16_t, A> const&, requires_arch<generic>) noexcept
|
||||
{
|
||||
return bitwise_cast<int16_t>(self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<int32_t, A> fast_cast(batch<uint32_t, A> const& self, batch<int32_t, A> const&, requires_arch<generic>) noexcept
|
||||
{
|
||||
return bitwise_cast<int32_t>(self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<int64_t, A> fast_cast(batch<uint64_t, A> const& self, batch<int64_t, A> const&, requires_arch<generic>) noexcept
|
||||
{
|
||||
return bitwise_cast<int64_t>(self);
|
||||
}
|
||||
}
|
||||
|
||||
namespace detail
|
||||
{
|
||||
// Generic conversion handling machinery. Each architecture must define
|
||||
|
|
|
@ -549,6 +549,13 @@ namespace xsimd
|
|||
}
|
||||
}
|
||||
|
||||
// decr_if
|
||||
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
||||
inline batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx>) noexcept
|
||||
{
|
||||
return self + batch<T, A>(mask.data);
|
||||
}
|
||||
|
||||
// div
|
||||
template <class A>
|
||||
inline batch<float, A> div(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
|
||||
|
@ -749,6 +756,13 @@ namespace xsimd
|
|||
return _mm256_add_pd(tmp1, tmp2);
|
||||
}
|
||||
|
||||
// incr_if
|
||||
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
||||
inline batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx>) noexcept
|
||||
{
|
||||
return self - batch<T, A>(mask.data);
|
||||
}
|
||||
|
||||
// insert
|
||||
template <class A, class T, size_t I, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
||||
inline batch<T, A> insert(batch<T, A> const& self, T val, index<I> pos, requires_arch<avx>) noexcept
|
||||
|
|
|
@ -378,8 +378,8 @@ namespace xsimd
|
|||
std::complex<float> c0, std::complex<float> c1,
|
||||
std::complex<float> c2, std::complex<float> c3) noexcept
|
||||
{
|
||||
return batch<std::complex<float>>(float32x4_t { c0.real(), c1.real(), c2.real(), c3.real() },
|
||||
float32x4_t { c0.imag(), c1.imag(), c2.imag(), c3.imag() });
|
||||
return batch<std::complex<float>, A>(float32x4_t { c0.real(), c1.real(), c2.real(), c3.real() },
|
||||
float32x4_t { c0.imag(), c1.imag(), c2.imag(), c3.imag() });
|
||||
}
|
||||
|
||||
template <class A, class... Args>
|
||||
|
|
|
@ -138,6 +138,33 @@ namespace xsimd
|
|||
return x + y;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline T incr(T const& x) noexcept
|
||||
{
|
||||
return x + T(1);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline T incr_if(T const& x, bool mask) noexcept
|
||||
{
|
||||
return x + T(mask ? 1 : 0);
|
||||
}
|
||||
|
||||
inline bool all(bool mask)
|
||||
{
|
||||
return mask;
|
||||
}
|
||||
|
||||
inline bool any(bool mask)
|
||||
{
|
||||
return mask;
|
||||
}
|
||||
|
||||
inline bool none(bool mask)
|
||||
{
|
||||
return !mask;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline typename std::enable_if<std::is_integral<T>::value, T>::type
|
||||
bitwise_and(T x, T y) noexcept
|
||||
|
@ -470,11 +497,13 @@ namespace xsimd
|
|||
#else
|
||||
inline float exp10(const float& x) noexcept
|
||||
{
|
||||
return std::exp(0x1.26bb1cp+1f * x);
|
||||
const float ln10 = std::log(10.f);
|
||||
return std::exp(ln10 * x);
|
||||
}
|
||||
inline double exp10(const double& x) noexcept
|
||||
{
|
||||
return std::exp(0x1.26bb1bbb55516p+1 * x);
|
||||
const double ln10 = std::log(10.);
|
||||
return std::exp(ln10 * x);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -738,6 +767,18 @@ namespace xsimd
|
|||
return x - y;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline T decr(T const& x) noexcept
|
||||
{
|
||||
return x - T(1);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline T decr_if(T const& x, bool mask) noexcept
|
||||
{
|
||||
return x - T(mask ? 1 : 0);
|
||||
}
|
||||
|
||||
#ifdef XSIMD_ENABLE_XTL_COMPLEX
|
||||
template <class T, bool i3ec>
|
||||
inline xtl::xcomplex<T, T, i3ec> log2(const xtl::xcomplex<T, T, i3ec>& val) noexcept
|
||||
|
|
|
@ -501,6 +501,13 @@ namespace xsimd
|
|||
}
|
||||
}
|
||||
|
||||
// decr_if
|
||||
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
||||
inline batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<sse2>) noexcept
|
||||
{
|
||||
return self + batch<T, A>(mask.data);
|
||||
}
|
||||
|
||||
// div
|
||||
template <class A>
|
||||
inline batch<float, A> div(batch<float, A> const& self, batch<float, A> const& other, requires_arch<sse2>) noexcept
|
||||
|
@ -808,6 +815,13 @@ namespace xsimd
|
|||
_mm_unpackhi_pd(row[0], row[1]));
|
||||
}
|
||||
|
||||
// incr_if
|
||||
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
||||
inline batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<sse2>) noexcept
|
||||
{
|
||||
return self - batch<T, A>(mask.data);
|
||||
}
|
||||
|
||||
// insert
|
||||
template <class A, class T, size_t I, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
||||
inline batch<T, A> insert(batch<T, A> const& self, T val, index<I> pos, requires_arch<sse2>) noexcept
|
||||
|
|
|
@ -23,6 +23,22 @@
|
|||
namespace xsimd
|
||||
{
|
||||
|
||||
/**
|
||||
* @ingroup architectures
|
||||
*
|
||||
* Dummy architectures that only appears in a list of architecture when no
|
||||
* other architecture has been detected.
|
||||
*/
|
||||
struct unavailable
|
||||
{
|
||||
static constexpr bool supported() noexcept { return false; }
|
||||
static constexpr bool available() noexcept { return false; }
|
||||
static constexpr unsigned version() noexcept { return 0; }
|
||||
static constexpr std::size_t alignment() noexcept { return 0; }
|
||||
static constexpr bool requires_alignment() noexcept { return false; }
|
||||
static constexpr char const* name() noexcept { return "<none>"; }
|
||||
};
|
||||
|
||||
namespace detail
|
||||
{
|
||||
// Checks whether T appears in Tys.
|
||||
|
@ -73,6 +89,21 @@ namespace xsimd
|
|||
return max_of((head0 > head1 ? head0 : head1), tail...);
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
struct head;
|
||||
|
||||
template <typename T, typename... Ts>
|
||||
struct head<T, Ts...>
|
||||
{
|
||||
using type = T;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct head<>
|
||||
{
|
||||
using type = unavailable;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
// An arch_list is a list of architectures, sorted by version number.
|
||||
|
@ -84,6 +115,8 @@ namespace xsimd
|
|||
"architecture list must be sorted by version");
|
||||
#endif
|
||||
|
||||
using best = typename detail::head<Archs...>::type;
|
||||
|
||||
template <class Arch>
|
||||
using add = arch_list<Archs..., Arch>;
|
||||
|
||||
|
@ -109,34 +142,8 @@ namespace xsimd
|
|||
}
|
||||
};
|
||||
|
||||
struct unavailable
|
||||
{
|
||||
static constexpr bool supported() noexcept { return false; }
|
||||
static constexpr bool available() noexcept { return false; }
|
||||
static constexpr unsigned version() noexcept { return 0; }
|
||||
static constexpr std::size_t alignment() noexcept { return 0; }
|
||||
static constexpr bool requires_alignment() noexcept { return false; }
|
||||
static constexpr char const* name() noexcept { return "<none>"; }
|
||||
};
|
||||
|
||||
namespace detail
|
||||
{
|
||||
// Pick the best architecture in arch_list L, which is the last
|
||||
// because architectures are sorted by version.
|
||||
template <class L>
|
||||
struct best;
|
||||
|
||||
template <>
|
||||
struct best<arch_list<>>
|
||||
{
|
||||
using type = unavailable;
|
||||
};
|
||||
|
||||
template <class Arch, class... Archs>
|
||||
struct best<arch_list<Arch, Archs...>>
|
||||
{
|
||||
using type = Arch;
|
||||
};
|
||||
|
||||
// Filter archlists Archs, picking only supported archs and adding
|
||||
// them to L.
|
||||
|
@ -190,12 +197,15 @@ namespace xsimd
|
|||
|
||||
using supported_architectures = typename detail::supported<all_architectures>::type;
|
||||
|
||||
using x86_arch = typename detail::best<typename detail::supported<all_x86_architectures>::type>::type;
|
||||
using arm_arch = typename detail::best<typename detail::supported<all_arm_architectures>::type>::type;
|
||||
// using default_arch = typename detail::best<typename detail::supported<arch_list</*arm_arch,*/ x86_arch>>::type>::type;
|
||||
using default_arch = typename std::conditional<std::is_same<x86_arch, unavailable>::value,
|
||||
arm_arch,
|
||||
x86_arch>::type;
|
||||
using x86_arch = typename detail::supported<all_x86_architectures>::type::best;
|
||||
using arm_arch = typename detail::supported<all_arm_architectures>::type::best;
|
||||
using best_arch = typename supported_architectures::best;
|
||||
|
||||
#ifdef XSIMD_DEFAULT_ARCH
|
||||
using default_arch = XSIMD_DEFAULT_ARCH;
|
||||
#else
|
||||
using default_arch = best_arch;
|
||||
#endif
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
@ -203,7 +213,7 @@ namespace xsimd
|
|||
class dispatcher
|
||||
{
|
||||
|
||||
const unsigned best_arch;
|
||||
const unsigned best_arch_found;
|
||||
F functor;
|
||||
|
||||
template <class Arch, class... Tys>
|
||||
|
@ -216,7 +226,7 @@ namespace xsimd
|
|||
template <class Arch, class ArchNext, class... Archs, class... Tys>
|
||||
auto walk_archs(arch_list<Arch, ArchNext, Archs...>, Tys&&... args) noexcept -> decltype(functor(Arch {}, std::forward<Tys>(args)...))
|
||||
{
|
||||
if (Arch::version() <= best_arch)
|
||||
if (Arch::version() <= best_arch_found)
|
||||
return functor(Arch {}, std::forward<Tys>(args)...);
|
||||
else
|
||||
return walk_archs(arch_list<ArchNext, Archs...> {}, std::forward<Tys>(args)...);
|
||||
|
@ -224,7 +234,7 @@ namespace xsimd
|
|||
|
||||
public:
|
||||
dispatcher(F f) noexcept
|
||||
: best_arch(available_architectures().best)
|
||||
: best_arch_found(available_architectures().best)
|
||||
, functor(f)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -12,9 +12,9 @@
|
|||
#ifndef XSIMD_CONFIG_HPP
|
||||
#define XSIMD_CONFIG_HPP
|
||||
|
||||
#define XSIMD_VERSION_MAJOR 10
|
||||
#define XSIMD_VERSION_MAJOR 11
|
||||
#define XSIMD_VERSION_MINOR 0
|
||||
#define XSIMD_VERSION_PATCH 0
|
||||
#define XSIMD_VERSION_PATCH 1
|
||||
|
||||
/**
|
||||
* high level free functions
|
||||
|
|
|
@ -52,6 +52,7 @@ namespace xsimd
|
|||
unsigned avx512bw : 1;
|
||||
unsigned neon : 1;
|
||||
unsigned neon64 : 1;
|
||||
unsigned sve : 1;
|
||||
|
||||
// version number of the best arch available
|
||||
unsigned best;
|
||||
|
@ -75,6 +76,15 @@ namespace xsimd
|
|||
neon64 = 0;
|
||||
best = neon::version() * neon;
|
||||
|
||||
#elif defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS > 0
|
||||
|
||||
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
|
||||
sve = bool(getauxval(AT_HWCAP) & HWCAP_SVE);
|
||||
#else
|
||||
sve = 0;
|
||||
#endif
|
||||
best = sve::version() * sve;
|
||||
|
||||
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86)
|
||||
auto get_cpuid = [](int reg[4], int func_id) noexcept
|
||||
{
|
||||
|
@ -108,31 +118,38 @@ namespace xsimd
|
|||
#endif
|
||||
};
|
||||
|
||||
int regs[4];
|
||||
int regs1[4];
|
||||
|
||||
get_cpuid(regs, 0x1);
|
||||
get_cpuid(regs1, 0x1);
|
||||
|
||||
sse2 = regs[3] >> 26 & 1;
|
||||
sse2 = regs1[3] >> 26 & 1;
|
||||
best = std::max(best, sse2::version() * sse2);
|
||||
|
||||
sse3 = regs[2] >> 0 & 1;
|
||||
sse3 = regs1[2] >> 0 & 1;
|
||||
best = std::max(best, sse3::version() * sse3);
|
||||
|
||||
ssse3 = regs[2] >> 9 & 1;
|
||||
ssse3 = regs1[2] >> 9 & 1;
|
||||
best = std::max(best, ssse3::version() * ssse3);
|
||||
|
||||
sse4_1 = regs[2] >> 19 & 1;
|
||||
sse4_1 = regs1[2] >> 19 & 1;
|
||||
best = std::max(best, sse4_1::version() * sse4_1);
|
||||
|
||||
sse4_2 = regs[2] >> 20 & 1;
|
||||
sse4_2 = regs1[2] >> 20 & 1;
|
||||
best = std::max(best, sse4_2::version() * sse4_2);
|
||||
|
||||
fma3_sse = regs[2] >> 12 & 1;
|
||||
fma3_sse = regs1[2] >> 12 & 1;
|
||||
if (sse4_2)
|
||||
best = std::max(best, fma3<xsimd::sse4_2>::version() * fma3_sse);
|
||||
|
||||
get_cpuid(regs, 0x80000001);
|
||||
fma4 = regs[2] >> 16 & 1;
|
||||
avx = regs1[2] >> 28 & 1;
|
||||
best = std::max(best, avx::version() * avx);
|
||||
|
||||
fma3_avx = avx && fma3_sse;
|
||||
best = std::max(best, fma3<xsimd::avx>::version() * fma3_avx);
|
||||
|
||||
int regs8[4];
|
||||
get_cpuid(regs8, 0x80000001);
|
||||
fma4 = regs8[2] >> 16 & 1;
|
||||
best = std::max(best, fma4::version() * fma4);
|
||||
|
||||
// sse4a = regs[2] >> 6 & 1;
|
||||
|
@ -141,29 +158,24 @@ namespace xsimd
|
|||
// xop = regs[2] >> 11 & 1;
|
||||
// best = std::max(best, XSIMD_X86_AMD_XOP_VERSION * xop);
|
||||
|
||||
avx = regs[2] >> 28 & 1;
|
||||
best = std::max(best, avx::version() * avx);
|
||||
|
||||
fma3_avx = avx && fma3_sse;
|
||||
best = std::max(best, fma3<xsimd::avx>::version() * fma3_avx);
|
||||
|
||||
get_cpuid(regs, 0x7);
|
||||
avx2 = regs[1] >> 5 & 1;
|
||||
int regs7[4];
|
||||
get_cpuid(regs7, 0x7);
|
||||
avx2 = regs7[1] >> 5 & 1;
|
||||
best = std::max(best, avx2::version() * avx2);
|
||||
|
||||
fma3_avx2 = avx2 && fma3_sse;
|
||||
best = std::max(best, fma3<xsimd::avx2>::version() * fma3_avx2);
|
||||
|
||||
avx512f = regs[1] >> 16 & 1;
|
||||
avx512f = regs7[1] >> 16 & 1;
|
||||
best = std::max(best, avx512f::version() * avx512f);
|
||||
|
||||
avx512cd = regs[1] >> 28 & 1;
|
||||
avx512cd = regs7[1] >> 28 & 1;
|
||||
best = std::max(best, avx512cd::version() * avx512cd * avx512f);
|
||||
|
||||
avx512dq = regs[1] >> 17 & 1;
|
||||
avx512dq = regs7[1] >> 17 & 1;
|
||||
best = std::max(best, avx512dq::version() * avx512dq * avx512cd * avx512f);
|
||||
|
||||
avx512bw = regs[1] >> 30 & 1;
|
||||
avx512bw = regs7[1] >> 30 & 1;
|
||||
best = std::max(best, avx512bw::version() * avx512bw * avx512dq * avx512cd * avx512f);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -39,7 +39,7 @@ namespace xsimd
|
|||
* @tparam T type of objects to allocate.
|
||||
* @tparam Align alignment in bytes.
|
||||
*/
|
||||
template <class T, size_t Align = default_arch::alignment()>
|
||||
template <class T, size_t Align>
|
||||
class aligned_allocator
|
||||
{
|
||||
public:
|
||||
|
|
|
@ -43,8 +43,8 @@ namespace xsimd
|
|||
using type = unaligned_mode;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct allocator_alignment<aligned_allocator<T>>
|
||||
template <class T, size_t N>
|
||||
struct allocator_alignment<aligned_allocator<T, N>>
|
||||
{
|
||||
using type = aligned_mode;
|
||||
};
|
||||
|
|
|
@ -82,7 +82,7 @@ namespace xsimd
|
|||
* @return the sum of \c x and \c y
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline auto add(batch<T> const& x, batch<T, A> const& y) noexcept -> decltype(x + y)
|
||||
inline auto add(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x + y)
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return x + y;
|
||||
|
@ -546,6 +546,36 @@ namespace xsimd
|
|||
return kernel::cosh<A>(x, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_arithmetic
|
||||
*
|
||||
* Subtract 1 to batch \c x.
|
||||
* @param x batch involved in the decrement.
|
||||
* @return the subtraction of \c x and 1.
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline batch<T, A> decr(batch<T, A> const& x) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::decr<A>(x, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_arithmetic
|
||||
*
|
||||
* Subtract 1 to batch \c x for each element where \c mask is true.
|
||||
* @param x batch involved in the increment.
|
||||
* @param mask whether to perform the increment or not. Can be a \c
|
||||
* batch_bool or a \c batch_bool_constant.
|
||||
* @return the subtraction of \c x and 1 when \c mask is true.
|
||||
*/
|
||||
template <class T, class A, class Mask>
|
||||
inline batch<T, A> decr_if(batch<T, A> const& x, Mask const& mask) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::decr_if<A>(x, mask, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_arithmetic
|
||||
*
|
||||
|
@ -878,63 +908,6 @@ namespace xsimd
|
|||
return x > y;
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_reducers
|
||||
*
|
||||
* Generic reducer using only batch operations
|
||||
* @param f reducing function, accepting `batch ()(batch, batch)`
|
||||
* @param x batch involved in the reduction
|
||||
* @return the result of the reduction, as a scalar.
|
||||
*/
|
||||
template <class T, class A, class F>
|
||||
inline T reduce(F&& f, batch<T, A> const& x) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::detail::reduce(std::forward<F>(f), x, std::integral_constant<unsigned, batch<T, A>::size>());
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_reducers
|
||||
*
|
||||
* Adds all the scalars of the batch \c x.
|
||||
* @param x batch involved in the reduction
|
||||
* @return the result of the reduction.
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline T reduce_add(batch<T, A> const& x) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::reduce_add<A>(x, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_reducers
|
||||
*
|
||||
* Max of all the scalars of the batch \c x.
|
||||
* @param x batch involved in the reduction
|
||||
* @return the result of the reduction.
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline T reduce_max(batch<T, A> const& x) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::reduce_max<A>(x, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_reducers
|
||||
*
|
||||
* Min of all the scalars of the batch \c x.
|
||||
* @param x batch involved in the reduction
|
||||
* @return the result of the reduction.
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline T reduce_min(batch<T, A> const& x) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::reduce_min<A>(x, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_reducers
|
||||
*
|
||||
|
@ -981,6 +954,36 @@ namespace xsimd
|
|||
return kernel::imag<A>(x, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_arithmetic
|
||||
*
|
||||
* Add 1 to batch \c x.
|
||||
* @param x batch involved in the increment.
|
||||
* @return the sum of \c x and 1.
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline batch<T, A> incr(batch<T, A> const& x) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::incr<A>(x, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_arithmetic
|
||||
*
|
||||
* Add 1 to batch \c x for each element where \c mask is true.
|
||||
* @param x batch involved in the increment.
|
||||
* @param mask whether to perform the increment or not. Can be a \c
|
||||
* batch_bool or a \c batch_bool_constant.
|
||||
* @return the sum of \c x and 1 when \c mask is true.
|
||||
*/
|
||||
template <class T, class A, class Mask>
|
||||
inline batch<T, A> incr_if(batch<T, A> const& x, Mask const& mask) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::incr_if<A>(x, mask, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_constant
|
||||
*
|
||||
|
@ -1595,6 +1598,20 @@ namespace xsimd
|
|||
return kernel::proj(z, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_complex
|
||||
*
|
||||
* Computes the real part of the batch \c z.
|
||||
* @param z batch of complex or real values.
|
||||
* @return the argument of \c z.
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline real_batch_type_t<batch<T, A>> real(batch<T, A> const& z) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::real<A>(z, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_arithmetic
|
||||
*
|
||||
|
@ -1612,17 +1629,60 @@ namespace xsimd
|
|||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_complex
|
||||
* @ingroup batch_reducers
|
||||
*
|
||||
* Computes the real part of the batch \c z.
|
||||
* @param z batch of complex or real values.
|
||||
* @return the argument of \c z.
|
||||
* Generic reducer using only batch operations
|
||||
* @param f reducing function, accepting `batch ()(batch, batch)`
|
||||
* @param x batch involved in the reduction
|
||||
* @return the result of the reduction, as a scalar.
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline real_batch_type_t<batch<T, A>> real(batch<T, A> const& z) noexcept
|
||||
template <class T, class A, class F>
|
||||
inline T reduce(F&& f, batch<T, A> const& x) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::real<A>(z, A {});
|
||||
return kernel::detail::reduce(std::forward<F>(f), x, std::integral_constant<unsigned, batch<T, A>::size>());
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_reducers
|
||||
*
|
||||
* Adds all the scalars of the batch \c x.
|
||||
* @param x batch involved in the reduction
|
||||
* @return the result of the reduction.
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline T reduce_add(batch<T, A> const& x) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::reduce_add<A>(x, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_reducers
|
||||
*
|
||||
* Max of all the scalars of the batch \c x.
|
||||
* @param x batch involved in the reduction
|
||||
* @return the result of the reduction.
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline T reduce_max(batch<T, A> const& x) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::reduce_max<A>(x, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_reducers
|
||||
*
|
||||
* Min of all the scalars of the batch \c x.
|
||||
* @param x batch involved in the reduction
|
||||
* @return the result of the reduction.
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline T reduce_min(batch<T, A> const& x) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::reduce_min<A>(x, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
namespace xsimd
|
||||
{
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX2 instructions
|
||||
*/
|
||||
|
|
|
@ -18,7 +18,7 @@ namespace xsimd
|
|||
{
|
||||
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX512BW instructions
|
||||
*/
|
||||
|
|
|
@ -18,9 +18,9 @@ namespace xsimd
|
|||
{
|
||||
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX512CD instrutions
|
||||
* AVX512CD instructions
|
||||
*/
|
||||
struct avx512cd : avx512f
|
||||
{
|
||||
|
|
|
@ -18,7 +18,7 @@ namespace xsimd
|
|||
{
|
||||
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX512DQ instructions
|
||||
*/
|
||||
|
|
|
@ -18,7 +18,7 @@ namespace xsimd
|
|||
{
|
||||
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX512F instructions
|
||||
*/
|
||||
|
|
|
@ -18,7 +18,7 @@ namespace xsimd
|
|||
{
|
||||
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX instructions
|
||||
*/
|
||||
|
|
|
@ -20,7 +20,7 @@ namespace xsimd
|
|||
struct fma3;
|
||||
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX2 + FMA instructions
|
||||
*/
|
||||
|
|
|
@ -20,7 +20,7 @@ namespace xsimd
|
|||
struct fma3;
|
||||
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX + FMA instructions
|
||||
*/
|
||||
|
|
|
@ -20,7 +20,7 @@ namespace xsimd
|
|||
struct fma3;
|
||||
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* SSE4.2 + FMA instructions
|
||||
*/
|
||||
|
|
|
@ -17,9 +17,9 @@
|
|||
namespace xsimd
|
||||
{
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* FMA4 instructions
|
||||
* SSE4.2 + FMA4 instructions
|
||||
*/
|
||||
struct fma4 : sse4_2
|
||||
{
|
||||
|
|
|
@ -15,17 +15,30 @@
|
|||
#include "../config/xsimd_config.hpp"
|
||||
|
||||
/**
|
||||
* @defgroup arch Architecture description
|
||||
* @defgroup architectures Architecture description
|
||||
* */
|
||||
namespace xsimd
|
||||
{
|
||||
/**
|
||||
* @ingroup architectures
|
||||
*
|
||||
* Base class for all architectures.
|
||||
*/
|
||||
struct generic
|
||||
{
|
||||
/// Whether this architecture is supported at compile-time.
|
||||
static constexpr bool supported() noexcept { return true; }
|
||||
/// Whether this architecture is available at run-time.
|
||||
static constexpr bool available() noexcept { return true; }
|
||||
/// If this architectures supports aligned memory accesses, the required
|
||||
/// alignment.
|
||||
static constexpr std::size_t alignment() noexcept { return 0; }
|
||||
/// Whether this architecture requires aligned memory access.
|
||||
static constexpr bool requires_alignment() noexcept { return false; }
|
||||
/// Unique identifier for this architecture.
|
||||
static constexpr unsigned version() noexcept { return generic::version(0, 0, 0); }
|
||||
/// Name of the architecture.
|
||||
static constexpr char const* name() noexcept { return "generic"; }
|
||||
|
||||
protected:
|
||||
static constexpr unsigned version(unsigned major, unsigned minor, unsigned patch) noexcept { return major * 10000u + minor * 100u + patch; }
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
namespace xsimd
|
||||
{
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* NEON instructions for arm64
|
||||
*/
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
namespace xsimd
|
||||
{
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* NEON instructions for arm32
|
||||
*/
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
namespace xsimd
|
||||
{
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* SSE2 instructions
|
||||
*/
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
namespace xsimd
|
||||
{
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* SSE3 instructions
|
||||
*/
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
namespace xsimd
|
||||
{
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* SSE4.1 instructions
|
||||
*/
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
namespace xsimd
|
||||
{
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* SSE4.2 instructions
|
||||
*/
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
namespace xsimd
|
||||
{
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* SSSE3 instructions
|
||||
*/
|
||||
|
|
|
@ -25,7 +25,7 @@ namespace xsimd
|
|||
namespace detail
|
||||
{
|
||||
/**
|
||||
* @ingroup arch
|
||||
* @ingroup architectures
|
||||
*
|
||||
* SVE instructions (fixed vector size) for arm64
|
||||
*/
|
||||
|
|
|
@ -10,8 +10,8 @@ origin:
|
|||
|
||||
url: https://github.com/QuantStack/xsimd
|
||||
|
||||
release: e8f209c3397c8a866be2312682689a04e4abfd66 (2023-02-27T06:32:46Z).
|
||||
revision: e8f209c3397c8a866be2312682689a04e4abfd66
|
||||
release: 11.1.0 (2023-05-13T15:49:21+00:00).
|
||||
revision: 11.1.0
|
||||
|
||||
license: BSD-3-Clause
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче