Bug 1859085 - Update xsimd to 11.1.0. r=padenot,sergesanspaille

Changlog:

11.1.0
------

* Introduce XSIMD_DEFAULT_ARCH to force default architecture (if any)
* Remove C++ requirement on xsimd::exp10 scalar implementation

11.0.0
------

* Provide a generic reducer
* Fix ``find_package(xsimd)`` for xtl enabled xsimd, reloaded
* Provide avx512f implementation of FMA and variant
* Hexadecimal floating points are not a C++11 feature
* back to slow implementation of exp10 on Windows
* Changed bitwise_cast API
* Provide generic signed /unsigned type conversion
* Fixed sde location
* Feature/incr decr

Depends on D191042

Differential Revision: https://phabricator.services.mozilla.com/D191043
This commit is contained in:
Chris Peterson 2023-10-17 05:59:03 +00:00
Родитель 52ca4b99e8
Коммит 727ac25d91
33 изменённых файлов: 446 добавлений и 154 удалений

55
third_party/xsimd/Changelog.rst поставляемый
Просмотреть файл

@ -9,6 +9,61 @@
Changelog
=========
11.1.0
------
* Introduce XSIMD_DEFAULT_ARCH to force default architecture (if any)
* Remove C++ requirement on xsimd::exp10 scalar implementation
* Improve and test documentation
11.0.0
------
* Provide a generic reducer
* Fix ``find_package(xsimd)`` for xtl enabled xsimd, reloaded
* Cleanup benchmark code
* Provide avx512f implementation of FMA and variant
* Hexadecimal floating points are not a C++11 feature
* back to slow implementation of exp10 on Windows
* Changed bitwise_cast API
* Provide generic signed /unsigned type conversion
* Fixed sde location
* Feature/incr decr
* Cleanup documentation
10.0.0
------
* Fix potential ABI issue in SVE support
* Disable fast exp10 on OSX
* Assert on unaligned memory when calling aligned load/store
* Fix warning about uninitialized storage
* Always forward arch parameter
* Do not specialize the behavior of ``simd_return_type`` for char
* Support broadcasting of complex batches
* Make xsimd compatible with -fno-exceptions
* Provide and test comparison operators overloads that accept scalars
9.0.1
-----

Просмотреть файл

@ -43,6 +43,20 @@ namespace xsimd
self, other);
}
// decr
template <class A, class T>
inline batch<T, A> decr(batch<T, A> const& self, requires_arch<generic>) noexcept
{
return self - T(1);
}
// decr_if
template <class A, class T, class Mask>
inline batch<T, A> decr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept
{
return select(mask, decr(self), self);
}
// div
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
inline batch<T, A> div(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
@ -112,6 +126,20 @@ namespace xsimd
return { res_r, res_i };
}
// incr
template <class A, class T>
inline batch<T, A> incr(batch<T, A> const& self, requires_arch<generic>) noexcept
{
return self + T(1);
}
// incr_if
template <class A, class T, class Mask>
inline batch<T, A> incr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept
{
return select(mask, incr(self), self);
}
// mul
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
inline batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept

Просмотреть файл

@ -135,6 +135,51 @@ namespace xsimd
}
}
// some generic fast_cast conversion
namespace detail
{
template <class A>
inline batch<uint8_t, A> fast_cast(batch<int8_t, A> const& self, batch<uint8_t, A> const&, requires_arch<generic>) noexcept
{
return bitwise_cast<uint8_t>(self);
}
template <class A>
inline batch<uint16_t, A> fast_cast(batch<int16_t, A> const& self, batch<uint16_t, A> const&, requires_arch<generic>) noexcept
{
return bitwise_cast<uint16_t>(self);
}
template <class A>
inline batch<uint32_t, A> fast_cast(batch<int32_t, A> const& self, batch<uint32_t, A> const&, requires_arch<generic>) noexcept
{
return bitwise_cast<uint32_t>(self);
}
template <class A>
inline batch<uint64_t, A> fast_cast(batch<int64_t, A> const& self, batch<uint64_t, A> const&, requires_arch<generic>) noexcept
{
return bitwise_cast<uint64_t>(self);
}
template <class A>
inline batch<int8_t, A> fast_cast(batch<uint8_t, A> const& self, batch<int8_t, A> const&, requires_arch<generic>) noexcept
{
return bitwise_cast<int8_t>(self);
}
template <class A>
inline batch<int16_t, A> fast_cast(batch<uint16_t, A> const& self, batch<int16_t, A> const&, requires_arch<generic>) noexcept
{
return bitwise_cast<int16_t>(self);
}
template <class A>
inline batch<int32_t, A> fast_cast(batch<uint32_t, A> const& self, batch<int32_t, A> const&, requires_arch<generic>) noexcept
{
return bitwise_cast<int32_t>(self);
}
template <class A>
inline batch<int64_t, A> fast_cast(batch<uint64_t, A> const& self, batch<int64_t, A> const&, requires_arch<generic>) noexcept
{
return bitwise_cast<int64_t>(self);
}
}
namespace detail
{
// Generic conversion handling machinery. Each architecture must define

Просмотреть файл

@ -549,6 +549,13 @@ namespace xsimd
}
}
// decr_if
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
inline batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx>) noexcept
{
return self + batch<T, A>(mask.data);
}
// div
template <class A>
inline batch<float, A> div(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
@ -749,6 +756,13 @@ namespace xsimd
return _mm256_add_pd(tmp1, tmp2);
}
// incr_if
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
inline batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx>) noexcept
{
return self - batch<T, A>(mask.data);
}
// insert
template <class A, class T, size_t I, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
inline batch<T, A> insert(batch<T, A> const& self, T val, index<I> pos, requires_arch<avx>) noexcept

Просмотреть файл

@ -378,8 +378,8 @@ namespace xsimd
std::complex<float> c0, std::complex<float> c1,
std::complex<float> c2, std::complex<float> c3) noexcept
{
return batch<std::complex<float>>(float32x4_t { c0.real(), c1.real(), c2.real(), c3.real() },
float32x4_t { c0.imag(), c1.imag(), c2.imag(), c3.imag() });
return batch<std::complex<float>, A>(float32x4_t { c0.real(), c1.real(), c2.real(), c3.real() },
float32x4_t { c0.imag(), c1.imag(), c2.imag(), c3.imag() });
}
template <class A, class... Args>

Просмотреть файл

@ -138,6 +138,33 @@ namespace xsimd
return x + y;
}
template <class T>
inline T incr(T const& x) noexcept
{
return x + T(1);
}
template <class T>
inline T incr_if(T const& x, bool mask) noexcept
{
return x + T(mask ? 1 : 0);
}
inline bool all(bool mask)
{
return mask;
}
inline bool any(bool mask)
{
return mask;
}
inline bool none(bool mask)
{
return !mask;
}
template <class T>
inline typename std::enable_if<std::is_integral<T>::value, T>::type
bitwise_and(T x, T y) noexcept
@ -470,11 +497,13 @@ namespace xsimd
#else
inline float exp10(const float& x) noexcept
{
return std::exp(0x1.26bb1cp+1f * x);
const float ln10 = std::log(10.f);
return std::exp(ln10 * x);
}
inline double exp10(const double& x) noexcept
{
return std::exp(0x1.26bb1bbb55516p+1 * x);
const double ln10 = std::log(10.);
return std::exp(ln10 * x);
}
#endif
@ -738,6 +767,18 @@ namespace xsimd
return x - y;
}
template <class T>
inline T decr(T const& x) noexcept
{
return x - T(1);
}
template <class T>
inline T decr_if(T const& x, bool mask) noexcept
{
return x - T(mask ? 1 : 0);
}
#ifdef XSIMD_ENABLE_XTL_COMPLEX
template <class T, bool i3ec>
inline xtl::xcomplex<T, T, i3ec> log2(const xtl::xcomplex<T, T, i3ec>& val) noexcept

Просмотреть файл

@ -501,6 +501,13 @@ namespace xsimd
}
}
// decr_if
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
inline batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<sse2>) noexcept
{
return self + batch<T, A>(mask.data);
}
// div
template <class A>
inline batch<float, A> div(batch<float, A> const& self, batch<float, A> const& other, requires_arch<sse2>) noexcept
@ -808,6 +815,13 @@ namespace xsimd
_mm_unpackhi_pd(row[0], row[1]));
}
// incr_if
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
inline batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<sse2>) noexcept
{
return self - batch<T, A>(mask.data);
}
// insert
template <class A, class T, size_t I, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
inline batch<T, A> insert(batch<T, A> const& self, T val, index<I> pos, requires_arch<sse2>) noexcept

Просмотреть файл

@ -23,6 +23,22 @@
namespace xsimd
{
/**
* @ingroup architectures
*
* Dummy architectures that only appears in a list of architecture when no
* other architecture has been detected.
*/
struct unavailable
{
static constexpr bool supported() noexcept { return false; }
static constexpr bool available() noexcept { return false; }
static constexpr unsigned version() noexcept { return 0; }
static constexpr std::size_t alignment() noexcept { return 0; }
static constexpr bool requires_alignment() noexcept { return false; }
static constexpr char const* name() noexcept { return "<none>"; }
};
namespace detail
{
// Checks whether T appears in Tys.
@ -73,6 +89,21 @@ namespace xsimd
return max_of((head0 > head1 ? head0 : head1), tail...);
}
template <typename... Ts>
struct head;
template <typename T, typename... Ts>
struct head<T, Ts...>
{
using type = T;
};
template <>
struct head<>
{
using type = unavailable;
};
} // namespace detail
// An arch_list is a list of architectures, sorted by version number.
@ -84,6 +115,8 @@ namespace xsimd
"architecture list must be sorted by version");
#endif
using best = typename detail::head<Archs...>::type;
template <class Arch>
using add = arch_list<Archs..., Arch>;
@ -109,34 +142,8 @@ namespace xsimd
}
};
struct unavailable
{
static constexpr bool supported() noexcept { return false; }
static constexpr bool available() noexcept { return false; }
static constexpr unsigned version() noexcept { return 0; }
static constexpr std::size_t alignment() noexcept { return 0; }
static constexpr bool requires_alignment() noexcept { return false; }
static constexpr char const* name() noexcept { return "<none>"; }
};
namespace detail
{
// Pick the best architecture in arch_list L, which is the last
// because architectures are sorted by version.
template <class L>
struct best;
template <>
struct best<arch_list<>>
{
using type = unavailable;
};
template <class Arch, class... Archs>
struct best<arch_list<Arch, Archs...>>
{
using type = Arch;
};
// Filter archlists Archs, picking only supported archs and adding
// them to L.
@ -190,12 +197,15 @@ namespace xsimd
using supported_architectures = typename detail::supported<all_architectures>::type;
using x86_arch = typename detail::best<typename detail::supported<all_x86_architectures>::type>::type;
using arm_arch = typename detail::best<typename detail::supported<all_arm_architectures>::type>::type;
// using default_arch = typename detail::best<typename detail::supported<arch_list</*arm_arch,*/ x86_arch>>::type>::type;
using default_arch = typename std::conditional<std::is_same<x86_arch, unavailable>::value,
arm_arch,
x86_arch>::type;
using x86_arch = typename detail::supported<all_x86_architectures>::type::best;
using arm_arch = typename detail::supported<all_arm_architectures>::type::best;
using best_arch = typename supported_architectures::best;
#ifdef XSIMD_DEFAULT_ARCH
using default_arch = XSIMD_DEFAULT_ARCH;
#else
using default_arch = best_arch;
#endif
namespace detail
{
@ -203,7 +213,7 @@ namespace xsimd
class dispatcher
{
const unsigned best_arch;
const unsigned best_arch_found;
F functor;
template <class Arch, class... Tys>
@ -216,7 +226,7 @@ namespace xsimd
template <class Arch, class ArchNext, class... Archs, class... Tys>
auto walk_archs(arch_list<Arch, ArchNext, Archs...>, Tys&&... args) noexcept -> decltype(functor(Arch {}, std::forward<Tys>(args)...))
{
if (Arch::version() <= best_arch)
if (Arch::version() <= best_arch_found)
return functor(Arch {}, std::forward<Tys>(args)...);
else
return walk_archs(arch_list<ArchNext, Archs...> {}, std::forward<Tys>(args)...);
@ -224,7 +234,7 @@ namespace xsimd
public:
dispatcher(F f) noexcept
: best_arch(available_architectures().best)
: best_arch_found(available_architectures().best)
, functor(f)
{
}

Просмотреть файл

@ -12,9 +12,9 @@
#ifndef XSIMD_CONFIG_HPP
#define XSIMD_CONFIG_HPP
#define XSIMD_VERSION_MAJOR 10
#define XSIMD_VERSION_MAJOR 11
#define XSIMD_VERSION_MINOR 0
#define XSIMD_VERSION_PATCH 0
#define XSIMD_VERSION_PATCH 1
/**
* high level free functions

Просмотреть файл

@ -52,6 +52,7 @@ namespace xsimd
unsigned avx512bw : 1;
unsigned neon : 1;
unsigned neon64 : 1;
unsigned sve : 1;
// version number of the best arch available
unsigned best;
@ -75,6 +76,15 @@ namespace xsimd
neon64 = 0;
best = neon::version() * neon;
#elif defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS > 0
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
sve = bool(getauxval(AT_HWCAP) & HWCAP_SVE);
#else
sve = 0;
#endif
best = sve::version() * sve;
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86)
auto get_cpuid = [](int reg[4], int func_id) noexcept
{
@ -108,31 +118,38 @@ namespace xsimd
#endif
};
int regs[4];
int regs1[4];
get_cpuid(regs, 0x1);
get_cpuid(regs1, 0x1);
sse2 = regs[3] >> 26 & 1;
sse2 = regs1[3] >> 26 & 1;
best = std::max(best, sse2::version() * sse2);
sse3 = regs[2] >> 0 & 1;
sse3 = regs1[2] >> 0 & 1;
best = std::max(best, sse3::version() * sse3);
ssse3 = regs[2] >> 9 & 1;
ssse3 = regs1[2] >> 9 & 1;
best = std::max(best, ssse3::version() * ssse3);
sse4_1 = regs[2] >> 19 & 1;
sse4_1 = regs1[2] >> 19 & 1;
best = std::max(best, sse4_1::version() * sse4_1);
sse4_2 = regs[2] >> 20 & 1;
sse4_2 = regs1[2] >> 20 & 1;
best = std::max(best, sse4_2::version() * sse4_2);
fma3_sse = regs[2] >> 12 & 1;
fma3_sse = regs1[2] >> 12 & 1;
if (sse4_2)
best = std::max(best, fma3<xsimd::sse4_2>::version() * fma3_sse);
get_cpuid(regs, 0x80000001);
fma4 = regs[2] >> 16 & 1;
avx = regs1[2] >> 28 & 1;
best = std::max(best, avx::version() * avx);
fma3_avx = avx && fma3_sse;
best = std::max(best, fma3<xsimd::avx>::version() * fma3_avx);
int regs8[4];
get_cpuid(regs8, 0x80000001);
fma4 = regs8[2] >> 16 & 1;
best = std::max(best, fma4::version() * fma4);
// sse4a = regs[2] >> 6 & 1;
@ -141,29 +158,24 @@ namespace xsimd
// xop = regs[2] >> 11 & 1;
// best = std::max(best, XSIMD_X86_AMD_XOP_VERSION * xop);
avx = regs[2] >> 28 & 1;
best = std::max(best, avx::version() * avx);
fma3_avx = avx && fma3_sse;
best = std::max(best, fma3<xsimd::avx>::version() * fma3_avx);
get_cpuid(regs, 0x7);
avx2 = regs[1] >> 5 & 1;
int regs7[4];
get_cpuid(regs7, 0x7);
avx2 = regs7[1] >> 5 & 1;
best = std::max(best, avx2::version() * avx2);
fma3_avx2 = avx2 && fma3_sse;
best = std::max(best, fma3<xsimd::avx2>::version() * fma3_avx2);
avx512f = regs[1] >> 16 & 1;
avx512f = regs7[1] >> 16 & 1;
best = std::max(best, avx512f::version() * avx512f);
avx512cd = regs[1] >> 28 & 1;
avx512cd = regs7[1] >> 28 & 1;
best = std::max(best, avx512cd::version() * avx512cd * avx512f);
avx512dq = regs[1] >> 17 & 1;
avx512dq = regs7[1] >> 17 & 1;
best = std::max(best, avx512dq::version() * avx512dq * avx512cd * avx512f);
avx512bw = regs[1] >> 30 & 1;
avx512bw = regs7[1] >> 30 & 1;
best = std::max(best, avx512bw::version() * avx512bw * avx512dq * avx512cd * avx512f);
#endif

Просмотреть файл

@ -39,7 +39,7 @@ namespace xsimd
* @tparam T type of objects to allocate.
* @tparam Align alignment in bytes.
*/
template <class T, size_t Align = default_arch::alignment()>
template <class T, size_t Align>
class aligned_allocator
{
public:

Просмотреть файл

@ -43,8 +43,8 @@ namespace xsimd
using type = unaligned_mode;
};
template <class T>
struct allocator_alignment<aligned_allocator<T>>
template <class T, size_t N>
struct allocator_alignment<aligned_allocator<T, N>>
{
using type = aligned_mode;
};

Просмотреть файл

@ -82,7 +82,7 @@ namespace xsimd
* @return the sum of \c x and \c y
*/
template <class T, class A>
inline auto add(batch<T> const& x, batch<T, A> const& y) noexcept -> decltype(x + y)
inline auto add(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x + y)
{
detail::static_check_supported_config<T, A>();
return x + y;
@ -546,6 +546,36 @@ namespace xsimd
return kernel::cosh<A>(x, A {});
}
/**
* @ingroup batch_arithmetic
*
* Subtract 1 to batch \c x.
* @param x batch involved in the decrement.
* @return the subtraction of \c x and 1.
*/
template <class T, class A>
inline batch<T, A> decr(batch<T, A> const& x) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::decr<A>(x, A {});
}
/**
* @ingroup batch_arithmetic
*
* Subtract 1 to batch \c x for each element where \c mask is true.
* @param x batch involved in the increment.
* @param mask whether to perform the increment or not. Can be a \c
* batch_bool or a \c batch_bool_constant.
* @return the subtraction of \c x and 1 when \c mask is true.
*/
template <class T, class A, class Mask>
inline batch<T, A> decr_if(batch<T, A> const& x, Mask const& mask) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::decr_if<A>(x, mask, A {});
}
/**
* @ingroup batch_arithmetic
*
@ -878,63 +908,6 @@ namespace xsimd
return x > y;
}
/**
* @ingroup batch_reducers
*
* Generic reducer using only batch operations
* @param f reducing function, accepting `batch ()(batch, batch)`
* @param x batch involved in the reduction
* @return the result of the reduction, as a scalar.
*/
template <class T, class A, class F>
inline T reduce(F&& f, batch<T, A> const& x) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::detail::reduce(std::forward<F>(f), x, std::integral_constant<unsigned, batch<T, A>::size>());
}
/**
* @ingroup batch_reducers
*
* Adds all the scalars of the batch \c x.
* @param x batch involved in the reduction
* @return the result of the reduction.
*/
template <class T, class A>
inline T reduce_add(batch<T, A> const& x) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::reduce_add<A>(x, A {});
}
/**
* @ingroup batch_reducers
*
* Max of all the scalars of the batch \c x.
* @param x batch involved in the reduction
* @return the result of the reduction.
*/
template <class T, class A>
inline T reduce_max(batch<T, A> const& x) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::reduce_max<A>(x, A {});
}
/**
* @ingroup batch_reducers
*
* Min of all the scalars of the batch \c x.
* @param x batch involved in the reduction
* @return the result of the reduction.
*/
template <class T, class A>
inline T reduce_min(batch<T, A> const& x) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::reduce_min<A>(x, A {});
}
/**
* @ingroup batch_reducers
*
@ -981,6 +954,36 @@ namespace xsimd
return kernel::imag<A>(x, A {});
}
/**
* @ingroup batch_arithmetic
*
* Add 1 to batch \c x.
* @param x batch involved in the increment.
* @return the sum of \c x and 1.
*/
template <class T, class A>
inline batch<T, A> incr(batch<T, A> const& x) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::incr<A>(x, A {});
}
/**
* @ingroup batch_arithmetic
*
* Add 1 to batch \c x for each element where \c mask is true.
* @param x batch involved in the increment.
* @param mask whether to perform the increment or not. Can be a \c
* batch_bool or a \c batch_bool_constant.
* @return the sum of \c x and 1 when \c mask is true.
*/
template <class T, class A, class Mask>
inline batch<T, A> incr_if(batch<T, A> const& x, Mask const& mask) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::incr_if<A>(x, mask, A {});
}
/**
* @ingroup batch_constant
*
@ -1595,6 +1598,20 @@ namespace xsimd
return kernel::proj(z, A {});
}
/**
* @ingroup batch_complex
*
* Computes the real part of the batch \c z.
* @param z batch of complex or real values.
* @return the argument of \c z.
*/
template <class T, class A>
inline real_batch_type_t<batch<T, A>> real(batch<T, A> const& z) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::real<A>(z, A {});
}
/**
* @ingroup batch_arithmetic
*
@ -1612,17 +1629,60 @@ namespace xsimd
}
/**
* @ingroup batch_complex
* @ingroup batch_reducers
*
* Computes the real part of the batch \c z.
* @param z batch of complex or real values.
* @return the argument of \c z.
* Generic reducer using only batch operations
* @param f reducing function, accepting `batch ()(batch, batch)`
* @param x batch involved in the reduction
* @return the result of the reduction, as a scalar.
*/
template <class T, class A>
inline real_batch_type_t<batch<T, A>> real(batch<T, A> const& z) noexcept
template <class T, class A, class F>
inline T reduce(F&& f, batch<T, A> const& x) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::real<A>(z, A {});
return kernel::detail::reduce(std::forward<F>(f), x, std::integral_constant<unsigned, batch<T, A>::size>());
}
/**
* @ingroup batch_reducers
*
* Adds all the scalars of the batch \c x.
* @param x batch involved in the reduction
* @return the result of the reduction.
*/
template <class T, class A>
inline T reduce_add(batch<T, A> const& x) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::reduce_add<A>(x, A {});
}
/**
* @ingroup batch_reducers
*
* Max of all the scalars of the batch \c x.
* @param x batch involved in the reduction
* @return the result of the reduction.
*/
template <class T, class A>
inline T reduce_max(batch<T, A> const& x) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::reduce_max<A>(x, A {});
}
/**
* @ingroup batch_reducers
*
* Min of all the scalars of the batch \c x.
* @param x batch involved in the reduction
* @return the result of the reduction.
*/
template <class T, class A>
inline T reduce_min(batch<T, A> const& x) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::reduce_min<A>(x, A {});
}
/**

Просмотреть файл

@ -17,7 +17,7 @@
namespace xsimd
{
/**
* @ingroup arch
* @ingroup architectures
*
* AVX2 instructions
*/

Просмотреть файл

@ -18,7 +18,7 @@ namespace xsimd
{
/**
* @ingroup arch
* @ingroup architectures
*
* AVX512BW instructions
*/

Просмотреть файл

@ -18,9 +18,9 @@ namespace xsimd
{
/**
* @ingroup arch
* @ingroup architectures
*
* AVX512CD instrutions
* AVX512CD instructions
*/
struct avx512cd : avx512f
{

Просмотреть файл

@ -18,7 +18,7 @@ namespace xsimd
{
/**
* @ingroup arch
* @ingroup architectures
*
* AVX512DQ instructions
*/

Просмотреть файл

@ -18,7 +18,7 @@ namespace xsimd
{
/**
* @ingroup arch
* @ingroup architectures
*
* AVX512F instructions
*/

Просмотреть файл

@ -18,7 +18,7 @@ namespace xsimd
{
/**
* @ingroup arch
* @ingroup architectures
*
* AVX instructions
*/

Просмотреть файл

@ -20,7 +20,7 @@ namespace xsimd
struct fma3;
/**
* @ingroup arch
* @ingroup architectures
*
* AVX2 + FMA instructions
*/

Просмотреть файл

@ -20,7 +20,7 @@ namespace xsimd
struct fma3;
/**
* @ingroup arch
* @ingroup architectures
*
* AVX + FMA instructions
*/

Просмотреть файл

@ -20,7 +20,7 @@ namespace xsimd
struct fma3;
/**
* @ingroup arch
* @ingroup architectures
*
* SSE4.2 + FMA instructions
*/

Просмотреть файл

@ -17,9 +17,9 @@
namespace xsimd
{
/**
* @ingroup arch
* @ingroup architectures
*
* FMA4 instructions
* SSE4.2 + FMA4 instructions
*/
struct fma4 : sse4_2
{

Просмотреть файл

@ -15,17 +15,30 @@
#include "../config/xsimd_config.hpp"
/**
* @defgroup arch Architecture description
* @defgroup architectures Architecture description
* */
namespace xsimd
{
/**
* @ingroup architectures
*
* Base class for all architectures.
*/
struct generic
{
/// Whether this architecture is supported at compile-time.
static constexpr bool supported() noexcept { return true; }
/// Whether this architecture is available at run-time.
static constexpr bool available() noexcept { return true; }
/// If this architectures supports aligned memory accesses, the required
/// alignment.
static constexpr std::size_t alignment() noexcept { return 0; }
/// Whether this architecture requires aligned memory access.
static constexpr bool requires_alignment() noexcept { return false; }
/// Unique identifier for this architecture.
static constexpr unsigned version() noexcept { return generic::version(0, 0, 0); }
/// Name of the architecture.
static constexpr char const* name() noexcept { return "generic"; }
protected:
static constexpr unsigned version(unsigned major, unsigned minor, unsigned patch) noexcept { return major * 10000u + minor * 100u + patch; }

Просмотреть файл

@ -17,7 +17,7 @@
namespace xsimd
{
/**
* @ingroup arch
* @ingroup architectures
*
* NEON instructions for arm64
*/

Просмотреть файл

@ -22,7 +22,7 @@
namespace xsimd
{
/**
* @ingroup arch
* @ingroup architectures
*
* NEON instructions for arm32
*/

Просмотреть файл

@ -23,7 +23,7 @@
namespace xsimd
{
/**
* @ingroup arch
* @ingroup architectures
*
* SSE2 instructions
*/

Просмотреть файл

@ -21,7 +21,7 @@
namespace xsimd
{
/**
* @ingroup arch
* @ingroup architectures
*
* SSE3 instructions
*/

Просмотреть файл

@ -21,7 +21,7 @@
namespace xsimd
{
/**
* @ingroup arch
* @ingroup architectures
*
* SSE4.1 instructions
*/

Просмотреть файл

@ -21,7 +21,7 @@
namespace xsimd
{
/**
* @ingroup arch
* @ingroup architectures
*
* SSE4.2 instructions
*/

Просмотреть файл

@ -21,7 +21,7 @@
namespace xsimd
{
/**
* @ingroup arch
* @ingroup architectures
*
* SSSE3 instructions
*/

Просмотреть файл

@ -25,7 +25,7 @@ namespace xsimd
namespace detail
{
/**
* @ingroup arch
* @ingroup architectures
*
* SVE instructions (fixed vector size) for arm64
*/

4
third_party/xsimd/moz.yaml поставляемый
Просмотреть файл

@ -10,8 +10,8 @@ origin:
url: https://github.com/QuantStack/xsimd
release: e8f209c3397c8a866be2312682689a04e4abfd66 (2023-02-27T06:32:46Z).
revision: e8f209c3397c8a866be2312682689a04e4abfd66
release: 11.1.0 (2023-05-13T15:49:21+00:00).
revision: 11.1.0
license: BSD-3-Clause