зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1867339 - Update xsimd to 12.1.1 r=padenot
This most notably introduces support for avx vnni that's going to be useful to improve gemmology performances. It also fixes a critical bug wrt. unsupported architectures. Differential Revision: https://phabricator.services.mozilla.com/D195849
This commit is contained in:
Родитель
111705f5fd
Коммит
adc9a6a868
|
@ -9,6 +9,41 @@
|
|||
Changelog
|
||||
=========
|
||||
|
||||
12.1.1
|
||||
------
|
||||
|
||||
* Update readme with a section on adoption, and a section on the history of the project
|
||||
|
||||
* Fix/avx512vnni implementation
|
||||
|
||||
* Fix regression on XSIMD_NO_SUPPORTED_ARCHITECTURE
|
||||
|
||||
12.1.0
|
||||
------
|
||||
|
||||
* Fix various problems with architecture version handling
|
||||
|
||||
* Specialize xsimd::compress for riscv
|
||||
|
||||
* Provide stubs for various avx512xx architectures
|
||||
|
||||
12.0.0
|
||||
------
|
||||
|
||||
* Fix sincos implementation to cope with Emscripten
|
||||
|
||||
* Upgraded minimal version of cmake to remove deprecation warning
|
||||
|
||||
* Fixed constants::signmask for GCC when using ffast-math
|
||||
|
||||
* Add RISC-V Vector support
|
||||
|
||||
* Generic, simple implementation fox xsimd::compress
|
||||
|
||||
* Disable batch of bools, and suggest using batch_bool instead
|
||||
|
||||
* Add an option to skip installation
|
||||
|
||||
11.2.0
|
||||
------
|
||||
|
||||
|
|
|
@ -95,12 +95,12 @@ namespace xsimd
|
|||
template <class A>
|
||||
inline batch<float, A> bitofsign(batch<float, A> const& self, requires_arch<generic>) noexcept
|
||||
{
|
||||
return self & constants::minuszero<batch<float, A>>();
|
||||
return self & constants::signmask<batch<float, A>>();
|
||||
}
|
||||
template <class A>
|
||||
inline batch<double, A> bitofsign(batch<double, A> const& self, requires_arch<generic>) noexcept
|
||||
{
|
||||
return self & constants::minuszero<batch<double, A>>();
|
||||
return self & constants::signmask<batch<double, A>>();
|
||||
}
|
||||
|
||||
// bitwise_cast
|
||||
|
@ -974,12 +974,8 @@ namespace xsimd
|
|||
template <class A, class T>
|
||||
inline batch<std::complex<T>, A> polar(const batch<T, A>& r, const batch<T, A>& theta, requires_arch<generic>) noexcept
|
||||
{
|
||||
#ifndef EMSCRIPTEN
|
||||
auto sincosTheta = sincos(theta);
|
||||
return { r * sincosTheta.second, r * sincosTheta.first };
|
||||
#else
|
||||
return { r * cos(theta), r * sin(theta) };
|
||||
#endif
|
||||
}
|
||||
|
||||
// fdim
|
||||
|
|
|
@ -32,6 +32,60 @@ namespace xsimd
|
|||
|
||||
using namespace types;
|
||||
|
||||
// compress
|
||||
namespace detail
|
||||
{
|
||||
template <class IT, class A, class I, size_t... Is>
|
||||
inline batch<IT, A> create_compress_swizzle_mask(I bitmask, ::xsimd::detail::index_sequence<Is...>)
|
||||
{
|
||||
batch<IT, A> swizzle_mask(IT(0));
|
||||
alignas(A::alignment()) IT mask_buffer[batch<IT, A>::size] = { Is... };
|
||||
size_t inserted = 0;
|
||||
for (size_t i = 0; i < sizeof...(Is); ++i)
|
||||
if ((bitmask >> i) & 1u)
|
||||
std::swap(mask_buffer[inserted++], mask_buffer[i]);
|
||||
return batch<IT, A>::load_aligned(&mask_buffer[0]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename A, typename T>
|
||||
inline batch<T, A>
|
||||
compress(batch<T, A> const& x, batch_bool<T, A> const& mask,
|
||||
kernel::requires_arch<generic>) noexcept
|
||||
{
|
||||
using IT = as_unsigned_integer_t<T>;
|
||||
constexpr std::size_t size = batch_bool<T, A>::size;
|
||||
auto bitmask = mask.mask();
|
||||
auto z = select(mask, x, batch<T, A>((T)0));
|
||||
auto compress_mask = detail::create_compress_swizzle_mask<IT, A>(bitmask, ::xsimd::detail::make_index_sequence<size>());
|
||||
return swizzle(z, compress_mask);
|
||||
}
|
||||
|
||||
// expand
|
||||
namespace detail
|
||||
{
|
||||
template <class IT, class A, class I, size_t... Is>
|
||||
inline batch<IT, A> create_expand_swizzle_mask(I bitmask, ::xsimd::detail::index_sequence<Is...>)
|
||||
{
|
||||
batch<IT, A> swizzle_mask(IT(0));
|
||||
IT j = 0;
|
||||
(void)std::initializer_list<bool> { ((swizzle_mask = insert(swizzle_mask, j, index<Is>())), (j += ((bitmask >> Is) & 1u)), true)... };
|
||||
return swizzle_mask;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename A, typename T>
|
||||
inline batch<T, A>
|
||||
expand(batch<T, A> const& x, batch_bool<T, A> const& mask,
|
||||
kernel::requires_arch<generic>) noexcept
|
||||
{
|
||||
constexpr std::size_t size = batch_bool<T, A>::size;
|
||||
auto bitmask = mask.mask();
|
||||
auto swizzle_mask = detail::create_expand_swizzle_mask<as_unsigned_integer_t<T>, A>(bitmask, ::xsimd::detail::make_index_sequence<size>());
|
||||
auto z = swizzle(x, swizzle_mask);
|
||||
return select(mask, z, batch<T, A>(T(0)));
|
||||
}
|
||||
|
||||
// extract_pair
|
||||
template <class A, class T>
|
||||
inline batch<T, A> extract_pair(batch<T, A> const& self, batch<T, A> const& other, std::size_t i, requires_arch<generic>) noexcept
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVX512ER_HPP
|
||||
#define XSIMD_AVX512ER_HPP
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
|
||||
#include "../types/xsimd_avx512er_register.hpp"
|
||||
|
||||
#endif
|
|
@ -661,6 +661,38 @@ namespace xsimd
|
|||
return _mm512_roundscale_pd(self, _MM_FROUND_TO_POS_INF);
|
||||
}
|
||||
|
||||
// compress
|
||||
template <class A>
|
||||
inline batch<float, A> compress(batch<float, A> const& self, batch_bool<float, A> const& mask, requires_arch<avx512f>) noexcept
|
||||
{
|
||||
return _mm512_maskz_compress_ps(mask.mask(), self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<double, A> compress(batch<double, A> const& self, batch_bool<double, A> const& mask, requires_arch<avx512f>) noexcept
|
||||
{
|
||||
return _mm512_maskz_compress_pd(mask.mask(), self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<int32_t, A> compress(batch<int32_t, A> const& self, batch_bool<int32_t, A> const& mask, requires_arch<avx512f>) noexcept
|
||||
{
|
||||
return _mm512_maskz_compress_epi32(mask.mask(), self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<uint32_t, A> compress(batch<uint32_t, A> const& self, batch_bool<uint32_t, A> const& mask, requires_arch<avx512f>) noexcept
|
||||
{
|
||||
return _mm512_maskz_compress_epi32(mask.mask(), self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<int64_t, A> compress(batch<int64_t, A> const& self, batch_bool<int64_t, A> const& mask, requires_arch<avx512f>) noexcept
|
||||
{
|
||||
return _mm512_maskz_compress_epi64(mask.mask(), self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<uint64_t, A> compress(batch<uint64_t, A> const& self, batch_bool<uint64_t, A> const& mask, requires_arch<avx512f>) noexcept
|
||||
{
|
||||
return _mm512_maskz_compress_epi64(mask.mask(), self);
|
||||
}
|
||||
|
||||
// convert
|
||||
namespace detail
|
||||
{
|
||||
|
@ -756,6 +788,38 @@ namespace xsimd
|
|||
return register_type(~self.data ^ other.data);
|
||||
}
|
||||
|
||||
// expand
|
||||
template <class A>
|
||||
inline batch<float, A> expand(batch<float, A> const& self, batch_bool<float, A> const& mask, requires_arch<avx512f>) noexcept
|
||||
{
|
||||
return _mm512_maskz_expand_ps(mask.mask(), self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<double, A> expand(batch<double, A> const& self, batch_bool<double, A> const& mask, requires_arch<avx512f>) noexcept
|
||||
{
|
||||
return _mm512_maskz_expand_pd(mask.mask(), self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<int32_t, A> expand(batch<int32_t, A> const& self, batch_bool<int32_t, A> const& mask, requires_arch<avx512f>) noexcept
|
||||
{
|
||||
return _mm512_maskz_expand_epi32(mask.mask(), self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<uint32_t, A> expand(batch<uint32_t, A> const& self, batch_bool<uint32_t, A> const& mask, requires_arch<avx512f>) noexcept
|
||||
{
|
||||
return _mm512_maskz_expand_epi32(mask.mask(), self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<int64_t, A> expand(batch<int64_t, A> const& self, batch_bool<int64_t, A> const& mask, requires_arch<avx512f>) noexcept
|
||||
{
|
||||
return _mm512_maskz_expand_epi64(mask.mask(), self);
|
||||
}
|
||||
template <class A>
|
||||
inline batch<uint64_t, A> expand(batch<uint64_t, A> const& self, batch_bool<uint64_t, A> const& mask, requires_arch<avx512f>) noexcept
|
||||
{
|
||||
return _mm512_maskz_expand_epi64(mask.mask(), self);
|
||||
}
|
||||
|
||||
// floor
|
||||
template <class A>
|
||||
inline batch<float, A> floor(batch<float, A> const& self, requires_arch<avx512f>) noexcept
|
||||
|
@ -1969,10 +2033,12 @@ namespace xsimd
|
|||
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
||||
{
|
||||
assert(false && "not implemented yet");
|
||||
return {};
|
||||
}
|
||||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
||||
{
|
||||
assert(false && "not implemented yet");
|
||||
return {};
|
||||
}
|
||||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
|
||||
{
|
||||
|
@ -2035,10 +2101,12 @@ namespace xsimd
|
|||
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
||||
{
|
||||
assert(false && "not implemented yet");
|
||||
return {};
|
||||
}
|
||||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
||||
{
|
||||
assert(false && "not implemented yet");
|
||||
return {};
|
||||
}
|
||||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVX512VBMI_HPP
|
||||
#define XSIMD_AVX512VBMI_HPP
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
|
||||
#include "../types/xsimd_avx512vbmi_register.hpp"
|
||||
|
||||
#endif
|
|
@ -0,0 +1,20 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVX512PF_HPP
|
||||
#define XSIMD_AVX512PF_HPP
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
|
||||
#include "../types/xsimd_avx512pf_register.hpp"
|
||||
|
||||
#endif
|
|
@ -0,0 +1,20 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVX512VBMI_HPP
|
||||
#define XSIMD_AVX512VBMI_HPP
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
|
||||
#include "../types/xsimd_avx512vbmi_register.hpp"
|
||||
|
||||
#endif
|
|
@ -0,0 +1,20 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVX512VNNI_AVX512_BW_HPP
|
||||
#define XSIMD_AVX512VNNI_AVX512_BW_HPP
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
|
||||
#include "../types/xsimd_avx512vnni_avx512bw_register.hpp"
|
||||
|
||||
#endif
|
|
@ -0,0 +1,20 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVX512VNNI_AVX512VBMI_HPP
|
||||
#define XSIMD_AVX512VNNI_AVX512VBMI_HPP
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
|
||||
#include "../types/xsimd_avx512vnni_avx512vbmi_register.hpp"
|
||||
|
||||
#endif
|
|
@ -0,0 +1,20 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVXVNNI_HPP
|
||||
#define XSIMD_AVXVNNI_HPP
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
|
||||
#include "../types/xsimd_avxvnni_register.hpp"
|
||||
|
||||
#endif
|
|
@ -56,6 +56,11 @@ namespace xsimd
|
|||
return bit_cast<double>((uint64_t)DOUBLE); \
|
||||
}
|
||||
|
||||
// Under fast-math, GCC might replace signmask (minus zero) by zero
|
||||
#if defined(__FAST_MATH__) && defined(__GNUC__) && !defined(__clang__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC optimize("signed-zeros")
|
||||
#endif
|
||||
XSIMD_DEFINE_CONSTANT(infinity, (std::numeric_limits<float>::infinity()), (std::numeric_limits<double>::infinity()))
|
||||
XSIMD_DEFINE_CONSTANT(invlog_2, 1.442695040888963407359924681001892137426645954152986f, 1.442695040888963407359924681001892137426645954152986)
|
||||
XSIMD_DEFINE_CONSTANT_HEX(invlog_2hi, 0x3fb8b000, 0x3ff7154765200000)
|
||||
|
@ -79,7 +84,6 @@ namespace xsimd
|
|||
XSIMD_DEFINE_CONSTANT(minlog2, -127.0f, -1023.)
|
||||
XSIMD_DEFINE_CONSTANT(minlog10, -37.89999771118164f, -308.2547155599167)
|
||||
XSIMD_DEFINE_CONSTANT(minusinfinity, (-infinity<float>()), (-infinity<double>()))
|
||||
XSIMD_DEFINE_CONSTANT(minuszero, -0.0f, -0.0)
|
||||
XSIMD_DEFINE_CONSTANT_HEX(nan, 0xffffffff, 0xffffffffffffffff)
|
||||
XSIMD_DEFINE_CONSTANT_HEX(oneosqrteps, 0x453504f3, 0x4190000000000000)
|
||||
XSIMD_DEFINE_CONSTANT_HEX(oneotwoeps, 0x4a800000, 0x4320000000000000)
|
||||
|
@ -104,6 +108,9 @@ namespace xsimd
|
|||
XSIMD_DEFINE_CONSTANT_HEX(twoopi, 0x3f22f983, 0x3fe45f306dc9c883)
|
||||
XSIMD_DEFINE_CONSTANT(twotonmb, 8388608.0f, 4503599627370496.0)
|
||||
XSIMD_DEFINE_CONSTANT_HEX(twotonmbo3, 0x3ba14518, 0x3ed428a2f98d7286)
|
||||
#if defined(__FAST_MATH__) && defined(__GNUC__) && !defined(__clang__)
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
#undef XSIMD_DEFINE_CONSTANT
|
||||
#undef XSIMD_DEFINE_CONSTANT_HEX
|
||||
|
|
|
@ -52,6 +52,10 @@
|
|||
#include "./xsimd_fma3_avx.hpp"
|
||||
#endif
|
||||
|
||||
#if XSIMD_WITH_AVXVNNI
|
||||
#include "./xsimd_avxvnni.hpp"
|
||||
#endif
|
||||
|
||||
#if XSIMD_WITH_AVX2
|
||||
#include "./xsimd_avx2.hpp"
|
||||
#endif
|
||||
|
@ -68,6 +72,30 @@
|
|||
#include "./xsimd_avx512bw.hpp"
|
||||
#endif
|
||||
|
||||
#if XSIMD_WITH_AVX512ER
|
||||
#include "./xsimd_avx512er.hpp"
|
||||
#endif
|
||||
|
||||
#if XSIMD_WITH_AVX512PF
|
||||
#include "./xsimd_avx512pf.hpp"
|
||||
#endif
|
||||
|
||||
#if XSIMD_WITH_AVX512IFMA
|
||||
#include "./xsimd_avx512ifma.hpp"
|
||||
#endif
|
||||
|
||||
#if XSIMD_WITH_AVX512VBMI
|
||||
#include "./xsimd_avx512vbmi.hpp"
|
||||
#endif
|
||||
|
||||
#if XSIMD_WITH_AVX512VNNI_AVX512BW
|
||||
#include "./xsimd_avx512vnni_avx512bw.hpp"
|
||||
#endif
|
||||
|
||||
#if XSIMD_WITH_AVX512VNNI_AVX512VBMI
|
||||
#include "./xsimd_avx512vnni_avx512vbmi.hpp"
|
||||
#endif
|
||||
|
||||
#if XSIMD_WITH_NEON
|
||||
#include "./xsimd_neon.hpp"
|
||||
#endif
|
||||
|
@ -80,6 +108,10 @@
|
|||
#include "./xsimd_sve.hpp"
|
||||
#endif
|
||||
|
||||
#if XSIMD_WITH_RVV
|
||||
#include "./xsimd_rvv.hpp"
|
||||
#endif
|
||||
|
||||
#if XSIMD_WITH_WASM
|
||||
#include "./xsimd_wasm.hpp"
|
||||
#endif
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -380,7 +380,7 @@ namespace xsimd
|
|||
template <class A>
|
||||
inline batch_bool<float, A> eq(batch_bool<float, A> const& self, batch_bool<float, A> const& other, requires_arch<wasm>) noexcept
|
||||
{
|
||||
return wasm_f32x4_eq(self, other);
|
||||
return wasm_i32x4_eq(self, other);
|
||||
}
|
||||
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
||||
inline batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
|
||||
|
@ -440,7 +440,7 @@ namespace xsimd
|
|||
template <class A>
|
||||
inline batch_bool<double, A> eq(batch_bool<double, A> const& self, batch_bool<double, A> const& other, requires_arch<wasm>) noexcept
|
||||
{
|
||||
return wasm_f64x2_eq(self, other);
|
||||
return wasm_i64x2_eq(self, other);
|
||||
}
|
||||
|
||||
// fast_cast
|
||||
|
@ -579,6 +579,30 @@ namespace xsimd
|
|||
0xFFFFFF00,
|
||||
0xFFFFFFFF,
|
||||
};
|
||||
alignas(A::alignment()) static const uint32_t lut16[][4] = {
|
||||
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
||||
{ 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 },
|
||||
{ 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 },
|
||||
{ 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 },
|
||||
{ 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 },
|
||||
{ 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 },
|
||||
{ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 },
|
||||
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 },
|
||||
{ 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF },
|
||||
{ 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF },
|
||||
{ 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF },
|
||||
{ 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF },
|
||||
{ 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF },
|
||||
{ 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF },
|
||||
{ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
|
||||
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
|
||||
};
|
||||
alignas(A::alignment()) static const uint64_t lut8[][4] = {
|
||||
{ 0x0000000000000000ul, 0x0000000000000000ul },
|
||||
{ 0xFFFFFFFFFFFFFFFFul, 0x0000000000000000ul },
|
||||
{ 0x0000000000000000ul, 0xFFFFFFFFFFFFFFFFul },
|
||||
{ 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul },
|
||||
};
|
||||
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
||||
{
|
||||
assert(!(mask & ~0xFFFF) && "inbound mask");
|
||||
|
@ -587,15 +611,17 @@ namespace xsimd
|
|||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
||||
{
|
||||
assert(!(mask & ~0xFF) && "inbound mask");
|
||||
return wasm_i64x2_make(lut64[mask >> 4], lut64[mask & 0xF]);
|
||||
return wasm_i64x2_make(lut64[mask & 0xF], lut64[mask >> 4]);
|
||||
}
|
||||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
|
||||
{
|
||||
return batch_bool_cast<T>(from_mask(batch_bool<float, A> {}, mask, wasm {}));
|
||||
assert(!(mask & ~0xFul) && "inbound mask");
|
||||
return wasm_v128_load((const v128_t*)lut16[mask]);
|
||||
}
|
||||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
|
||||
{
|
||||
return batch_bool_cast<T>(from_mask(batch_bool<double, A> {}, mask, wasm {}));
|
||||
assert(!(mask & ~0x3ul) && "inbound mask");
|
||||
return wasm_v128_load((const v128_t*)lut8[mask]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1114,44 +1140,6 @@ namespace xsimd
|
|||
return wasm_f64x2_extract_lane(tmp2, 0);
|
||||
}
|
||||
|
||||
// reduce_max
|
||||
template <class A, class T, class _ = typename std::enable_if<(sizeof(T) <= 2), void>::type>
|
||||
inline T reduce_max(batch<T, A> const& self, requires_arch<wasm>) noexcept
|
||||
{
|
||||
batch<T, A> step0 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2, 3, 0, 0);
|
||||
batch<T, A> acc0 = max(self, step0);
|
||||
|
||||
batch<T, A> step1 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 1, 0, 0, 0);
|
||||
batch<T, A> acc1 = max(acc0, step1);
|
||||
|
||||
batch<T, A> step2 = wasm_i16x8_shuffle(acc1, wasm_i16x8_splat(0), 1, 0, 0, 0, 4, 5, 6, 7);
|
||||
batch<T, A> acc2 = max(acc1, step2);
|
||||
if (sizeof(T) == 2)
|
||||
return acc2.get(0);
|
||||
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
|
||||
batch<T, A> acc3 = max(acc2, step3);
|
||||
return acc3.get(0);
|
||||
}
|
||||
|
||||
// reduce_min
|
||||
template <class A, class T, class _ = typename std::enable_if<(sizeof(T) <= 2), void>::type>
|
||||
inline T reduce_min(batch<T, A> const& self, requires_arch<wasm>) noexcept
|
||||
{
|
||||
batch<T, A> step0 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2, 3, 0, 0);
|
||||
batch<T, A> acc0 = min(self, step0);
|
||||
|
||||
batch<T, A> step1 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 1, 0, 0, 0);
|
||||
batch<T, A> acc1 = min(acc0, step1);
|
||||
|
||||
batch<T, A> step2 = wasm_i16x8_shuffle(acc1, wasm_i16x8_splat(0), 1, 0, 0, 0, 4, 5, 6, 7);
|
||||
batch<T, A> acc2 = min(acc1, step2);
|
||||
if (sizeof(T) == 2)
|
||||
return acc2.get(0);
|
||||
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
|
||||
batch<T, A> acc3 = min(acc2, step3);
|
||||
return acc3.get(0);
|
||||
}
|
||||
|
||||
// rsqrt
|
||||
template <class A>
|
||||
inline batch<float, A> rsqrt(batch<float, A> const& self, requires_arch<wasm>) noexcept
|
||||
|
@ -1171,15 +1159,15 @@ namespace xsimd
|
|||
inline batch<T, A> slide_left(batch<T, A> const& x, requires_arch<wasm>) noexcept
|
||||
{
|
||||
return wasm_i8x16_shuffle(
|
||||
wasm_i64x2_const(0, 0), x, ((N)&0xF0) ? 0 : 16 - ((N)&0xF),
|
||||
((N)&0xF0) ? 0 : 17 - ((N)&0xF), ((N)&0xF0) ? 0 : 18 - ((N)&0xF),
|
||||
((N)&0xF0) ? 0 : 19 - ((N)&0xF), ((N)&0xF0) ? 0 : 20 - ((N)&0xF),
|
||||
((N)&0xF0) ? 0 : 21 - ((N)&0xF), ((N)&0xF0) ? 0 : 22 - ((N)&0xF),
|
||||
((N)&0xF0) ? 0 : 23 - ((N)&0xF), ((N)&0xF0) ? 0 : 24 - ((N)&0xF),
|
||||
((N)&0xF0) ? 0 : 25 - ((N)&0xF), ((N)&0xF0) ? 0 : 26 - ((N)&0xF),
|
||||
((N)&0xF0) ? 0 : 27 - ((N)&0xF), ((N)&0xF0) ? 0 : 28 - ((N)&0xF),
|
||||
((N)&0xF0) ? 0 : 29 - ((N)&0xF), ((N)&0xF0) ? 0 : 30 - ((N)&0xF),
|
||||
((N)&0xF0) ? 0 : 31 - ((N)&0xF));
|
||||
wasm_i64x2_const(0, 0), x, ((N) & 0xF0) ? 0 : 16 - ((N) & 0xF),
|
||||
((N) & 0xF0) ? 0 : 17 - ((N) & 0xF), ((N) & 0xF0) ? 0 : 18 - ((N) & 0xF),
|
||||
((N) & 0xF0) ? 0 : 19 - ((N) & 0xF), ((N) & 0xF0) ? 0 : 20 - ((N) & 0xF),
|
||||
((N) & 0xF0) ? 0 : 21 - ((N) & 0xF), ((N) & 0xF0) ? 0 : 22 - ((N) & 0xF),
|
||||
((N) & 0xF0) ? 0 : 23 - ((N) & 0xF), ((N) & 0xF0) ? 0 : 24 - ((N) & 0xF),
|
||||
((N) & 0xF0) ? 0 : 25 - ((N) & 0xF), ((N) & 0xF0) ? 0 : 26 - ((N) & 0xF),
|
||||
((N) & 0xF0) ? 0 : 27 - ((N) & 0xF), ((N) & 0xF0) ? 0 : 28 - ((N) & 0xF),
|
||||
((N) & 0xF0) ? 0 : 29 - ((N) & 0xF), ((N) & 0xF0) ? 0 : 30 - ((N) & 0xF),
|
||||
((N) & 0xF0) ? 0 : 31 - ((N) & 0xF));
|
||||
}
|
||||
|
||||
// slide_right
|
||||
|
@ -1187,15 +1175,15 @@ namespace xsimd
|
|||
inline batch<T, A> slide_right(batch<T, A> const& x, requires_arch<wasm>) noexcept
|
||||
{
|
||||
return wasm_i8x16_shuffle(
|
||||
x, wasm_i64x2_const(0, 0), ((N)&0xF0) ? 16 : ((N)&0xF) + 0,
|
||||
((N)&0xF0) ? 16 : ((N)&0xF) + 1, ((N)&0xF0) ? 16 : ((N)&0xF) + 2,
|
||||
((N)&0xF0) ? 16 : ((N)&0xF) + 3, ((N)&0xF0) ? 16 : ((N)&0xF) + 4,
|
||||
((N)&0xF0) ? 16 : ((N)&0xF) + 5, ((N)&0xF0) ? 16 : ((N)&0xF) + 6,
|
||||
((N)&0xF0) ? 16 : ((N)&0xF) + 7, ((N)&0xF0) ? 16 : ((N)&0xF) + 8,
|
||||
((N)&0xF0) ? 16 : ((N)&0xF) + 9, ((N)&0xF0) ? 16 : ((N)&0xF) + 10,
|
||||
((N)&0xF0) ? 16 : ((N)&0xF) + 11, ((N)&0xF0) ? 16 : ((N)&0xF) + 12,
|
||||
((N)&0xF0) ? 16 : ((N)&0xF) + 13, ((N)&0xF0) ? 16 : ((N)&0xF) + 14,
|
||||
((N)&0xF0) ? 16 : ((N)&0xF) + 15);
|
||||
x, wasm_i64x2_const(0, 0), ((N) & 0xF0) ? 16 : ((N) & 0xF) + 0,
|
||||
((N) & 0xF0) ? 16 : ((N) & 0xF) + 1, ((N) & 0xF0) ? 16 : ((N) & 0xF) + 2,
|
||||
((N) & 0xF0) ? 16 : ((N) & 0xF) + 3, ((N) & 0xF0) ? 16 : ((N) & 0xF) + 4,
|
||||
((N) & 0xF0) ? 16 : ((N) & 0xF) + 5, ((N) & 0xF0) ? 16 : ((N) & 0xF) + 6,
|
||||
((N) & 0xF0) ? 16 : ((N) & 0xF) + 7, ((N) & 0xF0) ? 16 : ((N) & 0xF) + 8,
|
||||
((N) & 0xF0) ? 16 : ((N) & 0xF) + 9, ((N) & 0xF0) ? 16 : ((N) & 0xF) + 10,
|
||||
((N) & 0xF0) ? 16 : ((N) & 0xF) + 11, ((N) & 0xF0) ? 16 : ((N) & 0xF) + 12,
|
||||
((N) & 0xF0) ? 16 : ((N) & 0xF) + 13, ((N) & 0xF0) ? 16 : ((N) & 0xF) + 14,
|
||||
((N) & 0xF0) ? 16 : ((N) & 0xF) + 15);
|
||||
}
|
||||
|
||||
// sadd
|
||||
|
@ -1259,29 +1247,15 @@ namespace xsimd
|
|||
|
||||
// shuffle
|
||||
template <class A, class ITy, ITy I0, ITy I1, ITy I2, ITy I3>
|
||||
inline batch<float, A> shuffle(batch<float, A> const& x, batch<float, A> const& y, batch_constant<batch<ITy, A>, I0, I1, I2, I3> mask, requires_arch<wasm>) noexcept
|
||||
inline batch<float, A> shuffle(batch<float, A> const& x, batch<float, A> const& y, batch_constant<batch<ITy, A>, I0, I1, I2, I3>, requires_arch<wasm>) noexcept
|
||||
{
|
||||
// shuffle within lane
|
||||
if (I0 < 4 && I1 < 4 && I2 >= 4 && I3 >= 4)
|
||||
return wasm_i32x4_shuffle(x, y, I0, I1, I2, I3);
|
||||
|
||||
// shuffle within opposite lane
|
||||
if (I0 >= 4 && I1 >= 4 && I2 < 4 && I3 < 4)
|
||||
return wasm_i32x4_shuffle(y, x, I0, I1, I2, I3);
|
||||
return shuffle(x, y, mask, generic {});
|
||||
}
|
||||
|
||||
template <class A, class ITy, ITy I0, ITy I1>
|
||||
inline batch<double, A> shuffle(batch<double, A> const& x, batch<double, A> const& y, batch_constant<batch<ITy, A>, I0, I1> mask, requires_arch<wasm>) noexcept
|
||||
inline batch<double, A> shuffle(batch<double, A> const& x, batch<double, A> const& y, batch_constant<batch<ITy, A>, I0, I1>, requires_arch<wasm>) noexcept
|
||||
{
|
||||
// shuffle within lane
|
||||
if (I0 < 2 && I1 >= 2)
|
||||
return wasm_i64x2_shuffle(x, y, I0, I1);
|
||||
|
||||
// shuffle within opposite lane
|
||||
if (I0 >= 2 && I1 < 2)
|
||||
return wasm_i64x2_shuffle(y, x, I0, I1);
|
||||
return shuffle(x, y, mask, generic {});
|
||||
}
|
||||
|
||||
// set
|
||||
|
@ -1500,7 +1474,6 @@ namespace xsimd
|
|||
}
|
||||
|
||||
// swizzle
|
||||
|
||||
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
|
||||
inline batch<float, A> swizzle(batch<float, A> const& self, batch_constant<batch<uint32_t, A>, V0, V1, V2, V3>, requires_arch<wasm>) noexcept
|
||||
{
|
||||
|
@ -1516,7 +1489,7 @@ namespace xsimd
|
|||
template <class A, uint64_t V0, uint64_t V1>
|
||||
inline batch<uint64_t, A> swizzle(batch<uint64_t, A> const& self, batch_constant<batch<uint64_t, A>, V0, V1>, requires_arch<wasm>) noexcept
|
||||
{
|
||||
return wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2 * V0, 2 * V0 + 1, 2 * V1, 2 * V1 + 1);
|
||||
return wasm_i64x2_shuffle(self, self, V0, V1);
|
||||
}
|
||||
|
||||
template <class A, uint64_t V0, uint64_t V1>
|
||||
|
@ -1528,7 +1501,7 @@ namespace xsimd
|
|||
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
|
||||
inline batch<uint32_t, A> swizzle(batch<uint32_t, A> const& self, batch_constant<batch<uint32_t, A>, V0, V1, V2, V3>, requires_arch<wasm>) noexcept
|
||||
{
|
||||
return wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), V0, V1, V2, V3);
|
||||
return wasm_i32x4_shuffle(self, self, V0, V1, V2, V3);
|
||||
}
|
||||
|
||||
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
|
||||
|
@ -1537,6 +1510,32 @@ namespace xsimd
|
|||
return bitwise_cast<int32_t>(swizzle(bitwise_cast<uint32_t>(self), mask, wasm {}));
|
||||
}
|
||||
|
||||
template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
|
||||
inline batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<batch<uint16_t, A>, V0, V1, V2, V3, V4, V5, V6, V7>, requires_arch<wasm>) noexcept
|
||||
{
|
||||
return wasm_i16x8_shuffle(self, self, V0, V1, V2, V3, V4, V5, V6, V7);
|
||||
}
|
||||
|
||||
template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
|
||||
inline batch<int16_t, A> swizzle(batch<int16_t, A> const& self, batch_constant<batch<uint16_t, A>, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<wasm>) noexcept
|
||||
{
|
||||
return bitwise_cast<int16_t>(swizzle(bitwise_cast<uint16_t>(self), mask, wasm {}));
|
||||
}
|
||||
|
||||
template <class A, uint8_t V0, uint8_t V1, uint8_t V2, uint8_t V3, uint8_t V4, uint8_t V5, uint8_t V6, uint8_t V7,
|
||||
uint8_t V8, uint8_t V9, uint8_t V10, uint8_t V11, uint8_t V12, uint8_t V13, uint8_t V14, uint8_t V15>
|
||||
inline batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch_constant<batch<uint8_t, A>, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15>, requires_arch<wasm>) noexcept
|
||||
{
|
||||
return wasm_i8x16_shuffle(self, self, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15);
|
||||
}
|
||||
|
||||
template <class A, uint8_t V0, uint8_t V1, uint8_t V2, uint8_t V3, uint8_t V4, uint8_t V5, uint8_t V6, uint8_t V7,
|
||||
uint8_t V8, uint8_t V9, uint8_t V10, uint8_t V11, uint8_t V12, uint8_t V13, uint8_t V14, uint8_t V15>
|
||||
inline batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch_constant<batch<uint8_t, A>, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15> mask, requires_arch<wasm>) noexcept
|
||||
{
|
||||
return bitwise_cast<int8_t>(swizzle(bitwise_cast<uint8_t>(self), mask, wasm {}));
|
||||
}
|
||||
|
||||
// trunc
|
||||
template <class A>
|
||||
inline batch<float, A> trunc(batch<float, A> const& self, requires_arch<wasm>) noexcept
|
||||
|
|
|
@ -57,7 +57,7 @@ namespace xsimd
|
|||
{
|
||||
};
|
||||
|
||||
template <class... Archs>
|
||||
template <unsigned... Vals>
|
||||
struct is_sorted;
|
||||
|
||||
template <>
|
||||
|
@ -65,14 +65,14 @@ namespace xsimd
|
|||
{
|
||||
};
|
||||
|
||||
template <class Arch>
|
||||
struct is_sorted<Arch> : std::true_type
|
||||
template <unsigned Val>
|
||||
struct is_sorted<Val> : std::true_type
|
||||
{
|
||||
};
|
||||
|
||||
template <class A0, class A1, class... Archs>
|
||||
struct is_sorted<A0, A1, Archs...>
|
||||
: std::conditional<(A0::version() >= A1::version()), is_sorted<Archs...>,
|
||||
template <unsigned V0, unsigned V1, unsigned... Vals>
|
||||
struct is_sorted<V0, V1, Vals...>
|
||||
: std::conditional<(V0 >= V1), is_sorted<V1, Vals...>,
|
||||
std::false_type>::type
|
||||
{
|
||||
};
|
||||
|
@ -111,7 +111,7 @@ namespace xsimd
|
|||
struct arch_list
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
static_assert(detail::is_sorted<Archs...>::value,
|
||||
static_assert(detail::is_sorted<Archs::version()...>::value,
|
||||
"architecture list must be sorted by version");
|
||||
#endif
|
||||
|
||||
|
@ -190,16 +190,23 @@ namespace xsimd
|
|||
struct unsupported
|
||||
{
|
||||
};
|
||||
using all_x86_architectures = arch_list<avx512bw, avx512dq, avx512cd, avx512f, fma3<avx2>, avx2, fma3<avx>, avx, fma4, fma3<sse4_2>, sse4_2, sse4_1, /*sse4a,*/ ssse3, sse3, sse2>;
|
||||
using all_x86_architectures = arch_list<
|
||||
avx512vnni<avx512vbmi>, avx512vbmi, avx512ifma, avx512pf, avx512vnni<avx512bw>, avx512bw, avx512er, avx512dq, avx512cd, avx512f,
|
||||
avxvnni, fma3<avx2>, avx2, fma3<avx>, avx, fma4, fma3<sse4_2>,
|
||||
sse4_2, sse4_1, /*sse4a,*/ ssse3, sse3, sse2>;
|
||||
|
||||
using all_sve_architectures = arch_list<detail::sve<512>, detail::sve<256>, detail::sve<128>>;
|
||||
using all_rvv_architectures = arch_list<detail::rvv<512>, detail::rvv<256>, detail::rvv<128>>;
|
||||
using all_arm_architectures = typename detail::join<all_sve_architectures, arch_list<neon64, neon>>::type;
|
||||
using all_riscv_architectures = all_rvv_architectures;
|
||||
using all_wasm_architectures = arch_list<wasm>;
|
||||
using all_architectures = typename detail::join<all_arm_architectures, all_x86_architectures, all_wasm_architectures>::type;
|
||||
using all_architectures = typename detail::join<all_riscv_architectures, all_wasm_architectures, all_arm_architectures, all_x86_architectures>::type;
|
||||
|
||||
using supported_architectures = typename detail::supported<all_architectures>::type;
|
||||
|
||||
using x86_arch = typename detail::supported<all_x86_architectures>::type::best;
|
||||
using arm_arch = typename detail::supported<all_arm_architectures>::type::best;
|
||||
using riscv_arch = typename detail::supported<all_riscv_architectures>::type::best;
|
||||
using best_arch = typename supported_architectures::best;
|
||||
|
||||
#ifdef XSIMD_DEFAULT_ARCH
|
||||
|
|
|
@ -12,9 +12,9 @@
|
|||
#ifndef XSIMD_CONFIG_HPP
|
||||
#define XSIMD_CONFIG_HPP
|
||||
|
||||
#define XSIMD_VERSION_MAJOR 11
|
||||
#define XSIMD_VERSION_MINOR 2
|
||||
#define XSIMD_VERSION_PATCH 0
|
||||
#define XSIMD_VERSION_MAJOR 12
|
||||
#define XSIMD_VERSION_MINOR 1
|
||||
#define XSIMD_VERSION_PATCH 1
|
||||
|
||||
/**
|
||||
* high level free functions
|
||||
|
@ -99,6 +99,17 @@
|
|||
#define XSIMD_WITH_AVX2 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @ingroup xsimd_config_macro
|
||||
*
|
||||
* Set to 1 if AVXVNNI is available at compile-time, to 0 otherwise.
|
||||
*/
|
||||
#ifdef __AVXVNNI__
|
||||
#define XSIMD_WITH_AVXVNNI 1
|
||||
#else
|
||||
#define XSIMD_WITH_AVXVNNI 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @ingroup xsimd_config_macro
|
||||
*
|
||||
|
@ -244,6 +255,72 @@
|
|||
#define XSIMD_WITH_AVX512BW 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @ingroup xsimd_config_macro
|
||||
*
|
||||
* Set to 1 if AVX512ER is available at compile-time, to 0 otherwise.
|
||||
*/
|
||||
#ifdef __AVX512ER__
|
||||
#define XSIMD_WITH_AVX512ER XSIMD_WITH_AVX512F
|
||||
#else
|
||||
#define XSIMD_WITH_AVX512ER 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @ingroup xsimd_config_macro
|
||||
*
|
||||
* Set to 1 if AVX512PF is available at compile-time, to 0 otherwise.
|
||||
*/
|
||||
#ifdef __AVX512PF__
|
||||
#define XSIMD_WITH_AVX512PF XSIMD_WITH_AVX512F
|
||||
#else
|
||||
#define XSIMD_WITH_AVX512PF 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @ingroup xsimd_config_macro
|
||||
*
|
||||
* Set to 1 if AVX512IFMA is available at compile-time, to 0 otherwise.
|
||||
*/
|
||||
#ifdef __AVX512IFMA__
|
||||
#define XSIMD_WITH_AVX512IFMA XSIMD_WITH_AVX512F
|
||||
#else
|
||||
#define XSIMD_WITH_AVX512IFMA 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @ingroup xsimd_config_macro
|
||||
*
|
||||
* Set to 1 if AVX512VBMI is available at compile-time, to 0 otherwise.
|
||||
*/
|
||||
#ifdef __AVX512VBMI__
|
||||
#define XSIMD_WITH_AVX512VBMI XSIMD_WITH_AVX512F
|
||||
#else
|
||||
#define XSIMD_WITH_AVX512VBMI 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @ingroup xsimd_config_macro
|
||||
*
|
||||
* Set to 1 if AVX512VNNI is available at compile-time, to 0 otherwise.
|
||||
*/
|
||||
#ifdef __AVX512VNNI__
|
||||
|
||||
#if XSIMD_WITH_AVX512_VBMI
|
||||
#define XSIMD_WITH_AVX512VNNI_AVX512VBMI XSIMD_WITH_AVX512F
|
||||
#define XSIMD_WITH_AVX512VNNI_AVX512BW XSIMD_WITH_AVX512F
|
||||
#else
|
||||
#define XSIMD_WITH_AVX512VNNI_AVX512VBMI 0
|
||||
#define XSIMD_WITH_AVX512VNNI_AVX512BW XSIMD_WITH_AVX512F
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#define XSIMD_WITH_AVX512VNNI_AVX512VBMI 0
|
||||
#define XSIMD_WITH_AVX512VNNI_AVX512BW 0
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __ARM_NEON
|
||||
|
||||
/**
|
||||
|
@ -285,6 +362,19 @@
|
|||
#define XSIMD_SVE_BITS 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @ingroup xsimd_config_macro
|
||||
*
|
||||
* Set to 1 if RVV is available and bit width is pre-set at compile-time, to 0 otherwise.
|
||||
*/
|
||||
#if defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0
|
||||
#define XSIMD_WITH_RVV 1
|
||||
#define XSIMD_RVV_BITS __riscv_v_fixed_vlen
|
||||
#else
|
||||
#define XSIMD_WITH_RVV 0
|
||||
#define XSIMD_RVV_BITS 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @ingroup xsimd_config_macro
|
||||
*
|
||||
|
@ -354,7 +444,8 @@
|
|||
|
||||
#endif
|
||||
|
||||
#if !XSIMD_WITH_SSE2 && !XSIMD_WITH_SSE3 && !XSIMD_WITH_SSSE3 && !XSIMD_WITH_SSE4_1 && !XSIMD_WITH_SSE4_2 && !XSIMD_WITH_AVX && !XSIMD_WITH_AVX2 && !XSIMD_WITH_FMA3_SSE && !XSIMD_WITH_FMA4 && !XSIMD_WITH_FMA3_AVX && !XSIMD_WITH_FMA3_AVX2 && !XSIMD_WITH_AVX512F && !XSIMD_WITH_AVX512CD && !XSIMD_WITH_AVX512DQ && !XSIMD_WITH_AVX512BW && !XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 && !XSIMD_WITH_SVE && !XSIMD_WITH_WASM
|
||||
#if !XSIMD_WITH_SSE2 && !XSIMD_WITH_SSE3 && !XSIMD_WITH_SSSE3 && !XSIMD_WITH_SSE4_1 && !XSIMD_WITH_SSE4_2 && !XSIMD_WITH_AVX && !XSIMD_WITH_AVX2 && !XSIMD_WITH_AVXVNNI && !XSIMD_WITH_FMA3_SSE && !XSIMD_WITH_FMA4 && !XSIMD_WITH_FMA3_AVX && !XSIMD_WITH_FMA3_AVX2 && !XSIMD_WITH_AVX512F && !XSIMD_WITH_AVX512CD && !XSIMD_WITH_AVX512DQ && !XSIMD_WITH_AVX512BW && !XSIMD_WITH_AVX512ER && !XSIMD_WITH_AVX512PF && !XSIMD_WITH_AVX512IFMA && !XSIMD_WITH_AVX512VBMI && !XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 && !XSIMD_WITH_SVE && !XSIMD_WITH_RVV && !XSIMD_WITH_WASM
|
||||
#define XSIMD_NO_SUPPORTED_ARCHITECTURE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM))
|
||||
#if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM) || defined(__riscv_vector))
|
||||
#include <asm/hwcap.h>
|
||||
#include <sys/auxv.h>
|
||||
#endif
|
||||
|
@ -45,14 +45,22 @@ namespace xsimd
|
|||
unsigned avx : 1;
|
||||
unsigned fma3_avx : 1;
|
||||
unsigned avx2 : 1;
|
||||
unsigned avxvnni : 1;
|
||||
unsigned fma3_avx2 : 1;
|
||||
unsigned avx512f : 1;
|
||||
unsigned avx512cd : 1;
|
||||
unsigned avx512dq : 1;
|
||||
unsigned avx512bw : 1;
|
||||
unsigned avx512er : 1;
|
||||
unsigned avx512pf : 1;
|
||||
unsigned avx512ifma : 1;
|
||||
unsigned avx512vbmi : 1;
|
||||
unsigned avx512vnni_bw : 1;
|
||||
unsigned avx512vnni_vbmi : 1;
|
||||
unsigned neon : 1;
|
||||
unsigned neon64 : 1;
|
||||
unsigned sve : 1;
|
||||
unsigned rvv : 1;
|
||||
|
||||
// version number of the best arch available
|
||||
unsigned best;
|
||||
|
@ -85,15 +93,27 @@ namespace xsimd
|
|||
#endif
|
||||
best = sve::version() * sve;
|
||||
|
||||
#elif defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0
|
||||
|
||||
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
|
||||
#ifndef HWCAP_V
|
||||
#define HWCAP_V (1 << ('V' - 'A'))
|
||||
#endif
|
||||
rvv = bool(getauxval(AT_HWCAP) & HWCAP_V);
|
||||
#else
|
||||
rvv = 0;
|
||||
#endif
|
||||
|
||||
best = ::xsimd::rvv::version() * rvv;
|
||||
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86)
|
||||
auto get_cpuid = [](int reg[4], int func_id) noexcept
|
||||
auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept
|
||||
{
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
__cpuidex(reg, func_id, 0);
|
||||
__cpuidex(reg, level, count);
|
||||
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
__cpuid(reg, func_id);
|
||||
__cpuid(reg, level);
|
||||
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
|
||||
|
@ -104,13 +124,13 @@ namespace xsimd
|
|||
"xchg{l}\t{%%}ebx, %1\n\t"
|
||||
: "=a"(reg[0]), "=r"(reg[1]), "=c"(reg[2]),
|
||||
"=d"(reg[3])
|
||||
: "a"(func_id), "c"(0));
|
||||
: "0"(level), "2"(count));
|
||||
|
||||
#else
|
||||
__asm__("cpuid\n\t"
|
||||
: "=a"(reg[0]), "=b"(reg[1]), "=c"(reg[2]),
|
||||
"=d"(reg[3])
|
||||
: "a"(func_id), "c"(0));
|
||||
: "0"(level), "2"(count));
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
@ -163,6 +183,11 @@ namespace xsimd
|
|||
avx2 = regs7[1] >> 5 & 1;
|
||||
best = std::max(best, avx2::version() * avx2);
|
||||
|
||||
int regs7a[4];
|
||||
get_cpuid(regs7a, 0x7, 0x1);
|
||||
avxvnni = regs7a[0] >> 4 & 1;
|
||||
best = std::max(best, avxvnni::version() * avxvnni * avx2);
|
||||
|
||||
fma3_avx2 = avx2 && fma3_sse;
|
||||
best = std::max(best, fma3<xsimd::avx2>::version() * fma3_avx2);
|
||||
|
||||
|
@ -178,6 +203,23 @@ namespace xsimd
|
|||
avx512bw = regs7[1] >> 30 & 1;
|
||||
best = std::max(best, avx512bw::version() * avx512bw * avx512dq * avx512cd * avx512f);
|
||||
|
||||
avx512er = regs7[1] >> 27 & 1;
|
||||
best = std::max(best, avx512er::version() * avx512er * avx512cd * avx512f);
|
||||
|
||||
avx512pf = regs7[1] >> 26 & 1;
|
||||
best = std::max(best, avx512pf::version() * avx512pf * avx512er * avx512cd * avx512f);
|
||||
|
||||
avx512ifma = regs7[1] >> 21 & 1;
|
||||
best = std::max(best, avx512ifma::version() * avx512ifma * avx512bw * avx512dq * avx512cd * avx512f);
|
||||
|
||||
avx512vbmi = regs7[2] >> 1 & 1;
|
||||
best = std::max(best, avx512vbmi::version() * avx512vbmi * avx512ifma * avx512bw * avx512dq * avx512cd * avx512f);
|
||||
|
||||
avx512vnni_bw = regs7[2] >> 11 & 1;
|
||||
best = std::max(best, avx512vnni<xsimd::avx512bw>::version() * avx512vnni_bw * avx512bw * avx512dq * avx512cd * avx512f);
|
||||
|
||||
avx512vnni_vbmi = avx512vbmi && avx512vnni_bw;
|
||||
best = std::max(best, avx512vnni<xsimd::avx512vbmi>::version() * avx512vnni_vbmi);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
|
|
@ -18,9 +18,19 @@
|
|||
|
||||
#include "xsimd_avx2_register.hpp"
|
||||
#include "xsimd_avx_register.hpp"
|
||||
#include "xsimd_avxvnni_register.hpp"
|
||||
#include "xsimd_fma3_avx2_register.hpp"
|
||||
#include "xsimd_fma3_avx_register.hpp"
|
||||
|
||||
#include "xsimd_avx512vnni_avx512bw_register.hpp"
|
||||
#include "xsimd_avx512vnni_avx512vbmi_register.hpp"
|
||||
|
||||
#include "xsimd_avx512ifma_register.hpp"
|
||||
#include "xsimd_avx512vbmi_register.hpp"
|
||||
|
||||
#include "xsimd_avx512er_register.hpp"
|
||||
#include "xsimd_avx512pf_register.hpp"
|
||||
|
||||
#include "xsimd_avx512bw_register.hpp"
|
||||
#include "xsimd_avx512cd_register.hpp"
|
||||
#include "xsimd_avx512dq_register.hpp"
|
||||
|
@ -31,4 +41,6 @@
|
|||
|
||||
#include "xsimd_sve_register.hpp"
|
||||
|
||||
#include "xsimd_rvv_register.hpp"
|
||||
|
||||
#include "xsimd_wasm_register.hpp"
|
||||
|
|
|
@ -530,6 +530,19 @@ namespace xsimd
|
|||
return kernel::clip(x, lo, hi, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_data_transfer
|
||||
*
|
||||
* Pick elements from \c x selected by \c mask, and append them to the
|
||||
* resulting vector, zeroing the remaining slots
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline batch<T, A> compress(batch<T, A> const& x, batch_bool<T, A> const& mask) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::compress<A>(x, mask, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_complex
|
||||
*
|
||||
|
@ -705,6 +718,19 @@ namespace xsimd
|
|||
return kernel::exp2<A>(x, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_data_transfer
|
||||
*
|
||||
* Load contiguous elements from \c x and place them in slots selected by \c
|
||||
* mask, zeroing the other slots
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline batch<T, A> expand(batch<T, A> const& x, batch_bool<T, A> const& mask) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::expand<A>(x, mask, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_math
|
||||
*
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVX512ER_REGISTER_HPP
|
||||
#define XSIMD_AVX512ER_REGISTER_HPP
|
||||
|
||||
#include "./xsimd_avx512dq_register.hpp"
|
||||
|
||||
namespace xsimd
|
||||
{
|
||||
|
||||
/**
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX512ER instructions
|
||||
*/
|
||||
struct avx512er : avx512cd
|
||||
{
|
||||
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512ER; }
|
||||
static constexpr bool available() noexcept { return true; }
|
||||
static constexpr unsigned version() noexcept { return generic::version(3, 3, 1); }
|
||||
static constexpr char const* name() noexcept { return "avx512er"; }
|
||||
};
|
||||
|
||||
#if XSIMD_WITH_AVX512ER
|
||||
|
||||
namespace types
|
||||
{
|
||||
template <class T>
|
||||
struct get_bool_simd_register<T, avx512er>
|
||||
{
|
||||
using type = simd_avx512_bool_register<T>;
|
||||
};
|
||||
|
||||
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512er, avx512cd);
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
|
@ -53,7 +53,6 @@ namespace xsimd
|
|||
using type = simd_avx512_bool_register<T>;
|
||||
};
|
||||
|
||||
XSIMD_DECLARE_SIMD_REGISTER(bool, avx512f, __m512i);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(signed char, avx512f, __m512i);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(unsigned char, avx512f, __m512i);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(char, avx512f, __m512i);
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVX512IFMA_REGISTER_HPP
|
||||
#define XSIMD_AVX512IFMA_REGISTER_HPP
|
||||
|
||||
#include "./xsimd_avx512bw_register.hpp"
|
||||
|
||||
namespace xsimd
|
||||
{
|
||||
|
||||
/**
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX512IFMA instructions
|
||||
*/
|
||||
struct avx512ifma : avx512bw
|
||||
{
|
||||
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512IFMA; }
|
||||
static constexpr bool available() noexcept { return true; }
|
||||
static constexpr unsigned version() noexcept { return generic::version(3, 5, 0); }
|
||||
static constexpr char const* name() noexcept { return "avx512ifma"; }
|
||||
};
|
||||
|
||||
#if XSIMD_WITH_AVX512IFMA
|
||||
|
||||
namespace types
|
||||
{
|
||||
template <class T>
|
||||
struct get_bool_simd_register<T, avx512ifma>
|
||||
{
|
||||
using type = simd_avx512_bool_register<T>;
|
||||
};
|
||||
|
||||
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512ifma, avx512bw);
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,48 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVX512PF_REGISTER_HPP
|
||||
#define XSIMD_AVX512PF_REGISTER_HPP
|
||||
|
||||
#include "./xsimd_avx512er_register.hpp"
|
||||
|
||||
namespace xsimd
|
||||
{
|
||||
|
||||
/**
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX512BW instructions
|
||||
*/
|
||||
struct avx512pf : avx512er
|
||||
{
|
||||
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512PF; }
|
||||
static constexpr bool available() noexcept { return true; }
|
||||
static constexpr unsigned version() noexcept { return generic::version(3, 4, 1); }
|
||||
static constexpr char const* name() noexcept { return "avx512pf"; }
|
||||
};
|
||||
|
||||
#if XSIMD_WITH_AVX512PF
|
||||
|
||||
namespace types
|
||||
{
|
||||
template <class T>
|
||||
struct get_bool_simd_register<T, avx512pf>
|
||||
{
|
||||
using type = simd_avx512_bool_register<T>;
|
||||
};
|
||||
|
||||
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512pf, avx512er);
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,48 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVX512VBMI_REGISTER_HPP
|
||||
#define XSIMD_AVX512VBMI_REGISTER_HPP
|
||||
|
||||
#include "./xsimd_avx512ifma_register.hpp"
|
||||
|
||||
namespace xsimd
|
||||
{
|
||||
|
||||
/**
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX512VBMI instructions
|
||||
*/
|
||||
struct avx512vbmi : avx512ifma
|
||||
{
|
||||
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VBMI; }
|
||||
static constexpr bool available() noexcept { return true; }
|
||||
static constexpr unsigned version() noexcept { return generic::version(3, 6, 0); }
|
||||
static constexpr char const* name() noexcept { return "avx512vbmi"; }
|
||||
};
|
||||
|
||||
#if XSIMD_WITH_AVX512VBMI
|
||||
|
||||
namespace types
|
||||
{
|
||||
template <class T>
|
||||
struct get_bool_simd_register<T, avx512vbmi>
|
||||
{
|
||||
using type = simd_avx512_bool_register<T>;
|
||||
};
|
||||
|
||||
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vbmi, avx512ifma);
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
51
third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp
поставляемый
Normal file
51
third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp
поставляемый
Normal file
|
@ -0,0 +1,51 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVX512VNNI_AVX512BW_REGISTER_HPP
|
||||
#define XSIMD_AVX512VNNI_AVX512BW_REGISTER_HPP
|
||||
|
||||
#include "./xsimd_avx512bw_register.hpp"
|
||||
|
||||
namespace xsimd
|
||||
{
|
||||
template <typename arch>
|
||||
struct avx512vnni;
|
||||
|
||||
/**
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX512VNNI instructions
|
||||
*/
|
||||
template <>
|
||||
struct avx512vnni<avx512bw> : avx512bw
|
||||
{
|
||||
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI_AVX512BW; }
|
||||
static constexpr bool available() noexcept { return true; }
|
||||
static constexpr unsigned version() noexcept { return generic::version(3, 4, 1); }
|
||||
static constexpr char const* name() noexcept { return "avx512vnni+avx512bw"; }
|
||||
};
|
||||
|
||||
#if XSIMD_WITH_AVX512VNNI_AVX512BW
|
||||
|
||||
namespace types
|
||||
{
|
||||
template <class T>
|
||||
struct get_bool_simd_register<T, avx512vnni<avx512bw>>
|
||||
{
|
||||
using type = simd_avx512_bool_register<T>;
|
||||
};
|
||||
|
||||
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vnni<avx512bw>, avx512bw);
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
51
third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp
поставляемый
Normal file
51
third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp
поставляемый
Normal file
|
@ -0,0 +1,51 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVX512VNNI_AVX512VBMI_REGISTER_HPP
|
||||
#define XSIMD_AVX512VNNI_AVX512VBMI_REGISTER_HPP
|
||||
|
||||
#include "./xsimd_avx512vbmi_register.hpp"
|
||||
|
||||
namespace xsimd
|
||||
{
|
||||
template <typename arch>
|
||||
struct avx512vnni;
|
||||
|
||||
/**
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX512VNNI instructions
|
||||
*/
|
||||
template <>
|
||||
struct avx512vnni<avx512vbmi> : avx512vbmi
|
||||
{
|
||||
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI_AVX512VBMI; }
|
||||
static constexpr bool available() noexcept { return true; }
|
||||
static constexpr unsigned version() noexcept { return generic::version(3, 6, 1); }
|
||||
static constexpr char const* name() noexcept { return "avx512vnni+avx512vbmi"; }
|
||||
};
|
||||
|
||||
#if XSIMD_WITH_AVX512VNNI_AVX512VBMI
|
||||
|
||||
namespace types
|
||||
{
|
||||
template <class T>
|
||||
struct get_bool_simd_register<T, avx512vnni<avx512vbmi>>
|
||||
{
|
||||
using type = simd_avx512_bool_register<T>;
|
||||
};
|
||||
|
||||
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vnni<avx512vbmi>, avx512vbmi);
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,48 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVX512VNNI_REGISTER_HPP
|
||||
#define XSIMD_AVX512VNNI_REGISTER_HPP
|
||||
|
||||
#include "./xsimd_avx512vbmi_register.hpp"
|
||||
|
||||
namespace xsimd
|
||||
{
|
||||
|
||||
/**
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVX512VNNI instructions
|
||||
*/
|
||||
struct avx512vnni : avx512vbmi
|
||||
{
|
||||
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI; }
|
||||
static constexpr bool available() noexcept { return true; }
|
||||
static constexpr unsigned version() noexcept { return generic::version(3, 7, 0); }
|
||||
static constexpr char const* name() noexcept { return "avx512vnni"; }
|
||||
};
|
||||
|
||||
#if XSIMD_WITH_AVX512VNNI
|
||||
|
||||
namespace types
|
||||
{
|
||||
template <class T>
|
||||
struct get_bool_simd_register<T, avx512vnni>
|
||||
{
|
||||
using type = simd_avx512_bool_register<T>;
|
||||
};
|
||||
|
||||
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vnni, avx512vbmi);
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
|
@ -42,7 +42,6 @@ namespace xsimd
|
|||
namespace types
|
||||
{
|
||||
|
||||
XSIMD_DECLARE_SIMD_REGISTER(bool, avx, __m256i);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(signed char, avx, __m256i);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(unsigned char, avx, __m256i);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(char, avx, __m256i);
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_AVXVNNI_REGISTER_HPP
|
||||
#define XSIMD_AVXVNNI_REGISTER_HPP
|
||||
|
||||
#include "./xsimd_avx2_register.hpp"
|
||||
|
||||
namespace xsimd
|
||||
{
|
||||
/**
|
||||
* @ingroup architectures
|
||||
*
|
||||
* AVXVNNI instructions
|
||||
*/
|
||||
struct avxvnni : avx2
|
||||
{
|
||||
static constexpr bool supported() noexcept { return XSIMD_WITH_AVXVNNI; }
|
||||
static constexpr bool available() noexcept { return true; }
|
||||
static constexpr unsigned version() noexcept { return generic::version(2, 3, 0); }
|
||||
static constexpr char const* name() noexcept { return "avxvnni"; }
|
||||
};
|
||||
|
||||
#if XSIMD_WITH_AVXVNNI
|
||||
namespace types
|
||||
{
|
||||
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avxvnni, avx2);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
|
@ -112,6 +112,7 @@ namespace xsimd
|
|||
template <class T, class A>
|
||||
class batch : public types::simd_register<T, A>, public types::integral_only_operators<T, A>
|
||||
{
|
||||
static_assert(!std::is_same<T, bool>::value, "use xsimd::batch_bool<T, A> instead of xsimd::batch<bool, A>");
|
||||
|
||||
public:
|
||||
static constexpr std::size_t size = sizeof(types::simd_register<T, A>) / sizeof(T); ///< Number of scalar elements in this batch.
|
||||
|
|
|
@ -88,7 +88,7 @@ namespace xsimd
|
|||
#define MAKE_BINARY_OP(OP, NAME) \
|
||||
template <bool... OtherValues> \
|
||||
constexpr auto operator OP(batch_bool_constant<batch_type, OtherValues...> other) const \
|
||||
->decltype(apply<NAME>(*this, other)) \
|
||||
-> decltype(apply<NAME>(*this, other)) \
|
||||
{ \
|
||||
return apply<NAME>(*this, other); \
|
||||
}
|
||||
|
@ -199,7 +199,7 @@ namespace xsimd
|
|||
#define MAKE_BINARY_OP(OP, NAME) \
|
||||
template <value_type... OtherValues> \
|
||||
constexpr auto operator OP(batch_constant<batch_type, OtherValues...> other) const \
|
||||
->decltype(apply<NAME>(*this, other)) \
|
||||
-> decltype(apply<NAME>(*this, other)) \
|
||||
{ \
|
||||
return apply<NAME>(*this, other); \
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ namespace xsimd
|
|||
static constexpr char const* name() noexcept { return "generic"; }
|
||||
|
||||
protected:
|
||||
static constexpr unsigned version(unsigned major, unsigned minor, unsigned patch) noexcept { return major * 10000u + minor * 100u + patch; }
|
||||
static constexpr unsigned version(unsigned major, unsigned minor, unsigned patch, unsigned multiplier = 100u) noexcept { return major * multiplier * multiplier + minor * multiplier + patch; }
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,417 @@
|
|||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* Copyright (c) Yibo Cai *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_RVV_REGISTER_HPP
|
||||
#define XSIMD_RVV_REGISTER_HPP
|
||||
|
||||
#include "xsimd_generic_arch.hpp"
|
||||
#include "xsimd_register.hpp"
|
||||
|
||||
#if XSIMD_WITH_RVV
|
||||
#include <riscv_vector.h>
|
||||
#endif
|
||||
|
||||
namespace xsimd
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
/**
|
||||
* @ingroup architectures
|
||||
*
|
||||
* RVV instructions (fixed vector size) for riscv
|
||||
*/
|
||||
template <size_t Width>
|
||||
struct rvv : xsimd::generic
|
||||
{
|
||||
static constexpr size_t width = Width;
|
||||
static constexpr bool supported() noexcept { return Width == XSIMD_RVV_BITS; }
|
||||
static constexpr bool available() noexcept { return true; }
|
||||
static constexpr bool requires_alignment() noexcept { return true; }
|
||||
static constexpr std::size_t alignment() noexcept { return 16; }
|
||||
static constexpr unsigned version() noexcept { return generic::version(1, 0, 0, /*multiplier=*/1000); }
|
||||
static constexpr char const* name() noexcept { return "riscv+rvv"; }
|
||||
};
|
||||
}
|
||||
|
||||
#if XSIMD_WITH_RVV
|
||||
|
||||
using rvv = detail::rvv<__riscv_v_fixed_vlen>;
|
||||
|
||||
#define XSIMD_RVV_JOINT_(a, b, c) a##b##c
|
||||
#define XSIMD_RVV_JOINT(a, b, c) XSIMD_RVV_JOINT_(a, b, c)
|
||||
#define XSIMD_RVV_JOINT5(a, b, c, d, e) XSIMD_RVV_JOINT(XSIMD_RVV_JOINT(a, b, c), d, e)
|
||||
|
||||
#define XSIMD_RVV_TYPE_i(S, V) XSIMD_RVV_JOINT5(vint, S, m, V, _t)
|
||||
#define XSIMD_RVV_TYPE_u(S, V) XSIMD_RVV_JOINT5(vuint, S, m, V, _t)
|
||||
#define XSIMD_RVV_TYPE_f(S, V) XSIMD_RVV_JOINT5(vfloat, S, m, V, _t)
|
||||
#define XSIMD_RVV_TYPE(T, S, V) XSIMD_RVV_JOINT(XSIMD_RVV_TYPE, _, T)(S, V)
|
||||
|
||||
namespace types
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
static constexpr size_t rvv_width_mf8 = XSIMD_RVV_BITS / 8;
|
||||
static constexpr size_t rvv_width_mf4 = XSIMD_RVV_BITS / 4;
|
||||
static constexpr size_t rvv_width_mf2 = XSIMD_RVV_BITS / 2;
|
||||
static constexpr size_t rvv_width_m1 = XSIMD_RVV_BITS;
|
||||
static constexpr size_t rvv_width_m2 = XSIMD_RVV_BITS * 2;
|
||||
static constexpr size_t rvv_width_m4 = XSIMD_RVV_BITS * 4;
|
||||
static constexpr size_t rvv_width_m8 = XSIMD_RVV_BITS * 8;
|
||||
|
||||
// rvv_type_info is a utility class to convert scalar type and
|
||||
// bitwidth into rvv register types.
|
||||
//
|
||||
// * `type` is the unadorned vector type.
|
||||
// * `fixed_type` is the same type, but with the storage attribute
|
||||
// applied.
|
||||
// * `byte_type` is the type which is the same size in unsigned
|
||||
// bytes, used as an intermediate step for bit-cast operations,
|
||||
// because only a subset of __riscv_vreinterpret() intrinsics
|
||||
// exist -- but always enough to get us to bytes and back.
|
||||
//
|
||||
template <class T, size_t Width>
|
||||
struct rvv_type_info;
|
||||
#define XSIMD_RVV_MAKE_TYPE(scalar, t, s, vmul) \
|
||||
template <> \
|
||||
struct rvv_type_info<scalar, rvv_width_m1 * vmul> \
|
||||
{ \
|
||||
static constexpr size_t width = rvv_width_m1 * vmul; \
|
||||
using type = XSIMD_RVV_TYPE(t, s, vmul); \
|
||||
using byte_type = XSIMD_RVV_TYPE(u, 8, vmul); \
|
||||
using fixed_type = type __attribute__((riscv_rvv_vector_bits(width))); \
|
||||
template <class U> \
|
||||
static inline type bitcast(U x) noexcept \
|
||||
{ \
|
||||
const auto words = XSIMD_RVV_JOINT5(__riscv_vreinterpret_, u, s, m, vmul)(x); \
|
||||
return XSIMD_RVV_JOINT5(__riscv_vreinterpret_, t, s, m, vmul)(words); \
|
||||
} \
|
||||
template <> \
|
||||
inline type bitcast<type>(type x) noexcept { return x; } \
|
||||
static inline byte_type as_bytes(type x) noexcept \
|
||||
{ \
|
||||
const auto words = XSIMD_RVV_JOINT5(__riscv_vreinterpret_, u, s, m, vmul)(x); \
|
||||
return XSIMD_RVV_JOINT5(__riscv_vreinterpret_, u, 8, m, vmul)(words); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define XSIMD_RVV_MAKE_TYPES(vmul) \
|
||||
XSIMD_RVV_MAKE_TYPE(int8_t, i, 8, vmul) \
|
||||
XSIMD_RVV_MAKE_TYPE(uint8_t, u, 8, vmul) \
|
||||
XSIMD_RVV_MAKE_TYPE(int16_t, i, 16, vmul) \
|
||||
XSIMD_RVV_MAKE_TYPE(uint16_t, u, 16, vmul) \
|
||||
XSIMD_RVV_MAKE_TYPE(int32_t, i, 32, vmul) \
|
||||
XSIMD_RVV_MAKE_TYPE(uint32_t, u, 32, vmul) \
|
||||
XSIMD_RVV_MAKE_TYPE(int64_t, i, 64, vmul) \
|
||||
XSIMD_RVV_MAKE_TYPE(uint64_t, u, 64, vmul) \
|
||||
XSIMD_RVV_MAKE_TYPE(float, f, 32, vmul) \
|
||||
XSIMD_RVV_MAKE_TYPE(double, f, 64, vmul)
|
||||
|
||||
XSIMD_RVV_MAKE_TYPES(8)
|
||||
XSIMD_RVV_MAKE_TYPES(4)
|
||||
XSIMD_RVV_MAKE_TYPES(2)
|
||||
XSIMD_RVV_MAKE_TYPES(1)
|
||||
#undef XSIMD_RVV_TYPE
|
||||
#undef XSIMD_RVV_TYPE_f
|
||||
#undef XSIMD_RVV_TYPE_u
|
||||
#undef XSIMD_RVV_TYPE_i
|
||||
#undef XSIMD_RVV_MAKE_TYPES
|
||||
#undef XSIMD_RVV_MAKE_TYPE
|
||||
|
||||
// rvv_blob is storage-type abstraction for a vector register.
|
||||
template <class T, size_t Width>
|
||||
struct rvv_blob : public rvv_type_info<T, Width>
|
||||
{
|
||||
using super = rvv_type_info<T, Width>;
|
||||
using typename super::fixed_type;
|
||||
using typename super::type;
|
||||
|
||||
fixed_type value;
|
||||
type get() const { return value; }
|
||||
void set(type v) { value = v; }
|
||||
};
|
||||
//
|
||||
// But sometimes we want our storage type to be less than a whole
|
||||
// register, while presenting as a whole register to the outside
|
||||
// world. This is because some partial-register types are not
|
||||
// defined, but they can (mostly) be emulated using shorter vl on a
|
||||
// full-width register for arithmetic, and cast back to a partial
|
||||
// byte register for storage.
|
||||
//
|
||||
template <class T, size_t divisor>
|
||||
struct rvv_semiblob : public rvv_type_info<T, rvv_width_m1>
|
||||
{
|
||||
using super = rvv_type_info<T, rvv_width_m1>;
|
||||
static constexpr size_t width = rvv_width_m1 / divisor;
|
||||
using typename super::type;
|
||||
template <size_t div>
|
||||
struct semitype;
|
||||
template <>
|
||||
struct semitype<2>
|
||||
{
|
||||
using type = vuint8mf2_t __attribute__((riscv_rvv_vector_bits(rvv_width_mf2)));
|
||||
};
|
||||
template <>
|
||||
struct semitype<4>
|
||||
{
|
||||
using type = vuint8mf4_t __attribute__((riscv_rvv_vector_bits(rvv_width_mf4)));
|
||||
};
|
||||
template <>
|
||||
struct semitype<8>
|
||||
{
|
||||
using type = vuint8mf8_t __attribute__((riscv_rvv_vector_bits(rvv_width_mf8)));
|
||||
};
|
||||
using fixed_type = typename semitype<divisor>::type;
|
||||
using super::as_bytes;
|
||||
using super::bitcast;
|
||||
|
||||
fixed_type value;
|
||||
template <size_t div>
|
||||
vuint8m1_t get_bytes() const;
|
||||
template <>
|
||||
vuint8m1_t get_bytes<2>() const { return __riscv_vlmul_ext_v_u8mf2_u8m1(value); }
|
||||
template <>
|
||||
vuint8m1_t get_bytes<4>() const { return __riscv_vlmul_ext_v_u8mf4_u8m1(value); }
|
||||
template <>
|
||||
vuint8m1_t get_bytes<8>() const { return __riscv_vlmul_ext_v_u8mf8_u8m1(value); }
|
||||
type get() const noexcept
|
||||
{
|
||||
vuint8m1_t bytes = get_bytes<divisor>();
|
||||
return bitcast(bytes);
|
||||
}
|
||||
template <size_t div>
|
||||
void set_bytes(vuint8m1_t);
|
||||
template <>
|
||||
void set_bytes<2>(vuint8m1_t v) { value = __riscv_vlmul_trunc_v_u8m1_u8mf2(v); }
|
||||
template <>
|
||||
void set_bytes<4>(vuint8m1_t v) { value = __riscv_vlmul_trunc_v_u8m1_u8mf4(v); }
|
||||
template <>
|
||||
void set_bytes<8>(vuint8m1_t v) { value = __riscv_vlmul_trunc_v_u8m1_u8mf8(v); }
|
||||
void set(type v)
|
||||
{
|
||||
vuint8m1_t bytes = as_bytes(v);
|
||||
set_bytes<divisor>(bytes);
|
||||
}
|
||||
};
|
||||
template <class T>
|
||||
struct rvv_blob<T, rvv_width_mf2> : rvv_semiblob<T, 2>
|
||||
{
|
||||
};
|
||||
template <class T>
|
||||
struct rvv_blob<T, rvv_width_mf4> : rvv_semiblob<T, 4>
|
||||
{
|
||||
};
|
||||
template <class T>
|
||||
struct rvv_blob<T, rvv_width_mf8> : rvv_semiblob<T, 8>
|
||||
{
|
||||
};
|
||||
|
||||
// It's difficult dealing with both char and whichever *int8_t type
|
||||
// is compatible with char, so just avoid it altogether.
|
||||
//
|
||||
using rvv_char_t = typename std::conditional<std::is_signed<char>::value, int8_t, uint8_t>::type;
|
||||
template <class T>
|
||||
using rvv_fix_char_t = typename std::conditional<
|
||||
std::is_same<char, typename std::decay<T>::type>::value,
|
||||
rvv_char_t, T>::type;
|
||||
|
||||
// An explicit constructor isn't really explicit enough to allow
|
||||
// implicit bit-casting operations between incompatible types, so
|
||||
// we add this vacuous flag argument when we're serious:
|
||||
//
|
||||
enum rvv_bitcast_flag
|
||||
{
|
||||
XSIMD_RVV_BITCAST
|
||||
};
|
||||
|
||||
// the general-purpose vector register type, usable within
|
||||
// templates, and supporting arithmetic on partial registers for
|
||||
// which there is no intrinsic type (by casting via a full register
|
||||
// type).
|
||||
//
|
||||
template <class T, size_t Width>
|
||||
struct rvv_reg
|
||||
{
|
||||
static constexpr size_t width = Width;
|
||||
static constexpr size_t vl = Width / (sizeof(T) * 8);
|
||||
using blob_type = rvv_blob<T, Width>;
|
||||
using register_type = typename blob_type::type;
|
||||
using byte_type = typename blob_type::byte_type;
|
||||
blob_type value;
|
||||
rvv_reg() noexcept = default;
|
||||
rvv_reg(register_type x) noexcept { value.set(x); }
|
||||
explicit rvv_reg(byte_type v, rvv_bitcast_flag) { value.set(value.bitcast(v)); }
|
||||
template <class U>
|
||||
explicit rvv_reg(rvv_reg<U, Width> v, rvv_bitcast_flag)
|
||||
: rvv_reg(v.get_bytes(), XSIMD_RVV_BITCAST)
|
||||
{
|
||||
}
|
||||
byte_type get_bytes() const noexcept
|
||||
{
|
||||
return blob_type::as_bytes(value.get());
|
||||
}
|
||||
operator register_type() const noexcept { return value.get(); }
|
||||
};
|
||||
template <class T, size_t Width = XSIMD_RVV_BITS>
|
||||
using rvv_reg_t = typename std::conditional<!std::is_void<T>::value, rvv_reg<rvv_fix_char_t<T>, Width>, void>::type;
|
||||
|
||||
// And some more of the same stuff for bool types, which have
|
||||
// similar problems and similar workarounds.
|
||||
//
|
||||
template <size_t>
|
||||
struct rvv_bool_info;
|
||||
#define XSIMD_RVV_MAKE_BOOL_TYPE(i) \
|
||||
template <> \
|
||||
struct rvv_bool_info<i> \
|
||||
{ \
|
||||
using type = XSIMD_RVV_JOINT(vbool, i, _t); \
|
||||
template <class T> \
|
||||
static inline type bitcast(T value) noexcept \
|
||||
{ \
|
||||
return XSIMD_RVV_JOINT(__riscv_vreinterpret_b, i, )(value); \
|
||||
} \
|
||||
/*template <> static inline type bitcast(type value) noexcept { return value; }*/ \
|
||||
};
|
||||
XSIMD_RVV_MAKE_BOOL_TYPE(1);
|
||||
XSIMD_RVV_MAKE_BOOL_TYPE(2);
|
||||
XSIMD_RVV_MAKE_BOOL_TYPE(4);
|
||||
XSIMD_RVV_MAKE_BOOL_TYPE(8);
|
||||
XSIMD_RVV_MAKE_BOOL_TYPE(16);
|
||||
XSIMD_RVV_MAKE_BOOL_TYPE(32);
|
||||
XSIMD_RVV_MAKE_BOOL_TYPE(64);
|
||||
#undef XSIMD_RVV_MAKE_BOOL_TYPE
|
||||
#undef XSIMD_RVV_JOINT5
|
||||
#undef XSIMD_RVV_JOINT
|
||||
#undef XSIMD_RVV_JOINT_
|
||||
|
||||
template <class T, size_t Width>
|
||||
struct rvv_bool
|
||||
{
|
||||
using bool_info = rvv_bool_info<rvv_width_m1 * sizeof(T) * 8 / Width>;
|
||||
using storage_type = vuint8m1_t __attribute__((riscv_rvv_vector_bits(rvv_width_m1)));
|
||||
using type = typename bool_info::type;
|
||||
storage_type value;
|
||||
rvv_bool() = default;
|
||||
rvv_bool(type v) noexcept
|
||||
: value(__riscv_vreinterpret_u8m1(v))
|
||||
{
|
||||
}
|
||||
template <class U, typename std::enable_if<sizeof(T) == sizeof(U), int>::type = 0>
|
||||
rvv_bool(rvv_bool<U, Width> v)
|
||||
: value(v.value)
|
||||
{
|
||||
}
|
||||
explicit rvv_bool(uint8_t mask) noexcept
|
||||
: value(__riscv_vmv_v_x_u8m1(mask, rvv_width_m1 / 8))
|
||||
{
|
||||
}
|
||||
explicit rvv_bool(uint64_t mask) noexcept
|
||||
: value(__riscv_vreinterpret_v_u64m1_u8m1(__riscv_vmv_v_x_u64m1(mask, rvv_width_m1 / 64)))
|
||||
{
|
||||
}
|
||||
operator type() const noexcept { return bool_info::bitcast(value); }
|
||||
};
|
||||
|
||||
template <class T, size_t Width = XSIMD_RVV_BITS>
|
||||
using rvv_bool_t = typename std::enable_if < !std::is_void<T>::value,
|
||||
rvv_bool<rvv_fix_char_t<T>, Width<rvv_width_m1 ? rvv_width_m1 : Width>>::type;
|
||||
|
||||
template <size_t S>
|
||||
struct rvv_vector_type_impl;
|
||||
|
||||
template <>
|
||||
struct rvv_vector_type_impl<8>
|
||||
{
|
||||
using signed_type = rvv_reg_t<int8_t>;
|
||||
using unsigned_type = rvv_reg_t<uint8_t>;
|
||||
using floating_point_type = void;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct rvv_vector_type_impl<16>
|
||||
{
|
||||
using signed_type = rvv_reg_t<int16_t>;
|
||||
using unsigned_type = rvv_reg_t<uint16_t>;
|
||||
using floating_point_type = rvv_reg_t<_Float16>;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct rvv_vector_type_impl<32>
|
||||
{
|
||||
using signed_type = rvv_reg_t<int32_t>;
|
||||
using unsigned_type = rvv_reg_t<uint32_t>;
|
||||
using floating_point_type = rvv_reg_t<float>;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct rvv_vector_type_impl<64>
|
||||
{
|
||||
using signed_type = rvv_reg_t<int64_t>;
|
||||
using unsigned_type = rvv_reg_t<uint64_t>;
|
||||
using floating_point_type = rvv_reg_t<double>;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
using signed_int_rvv_vector_type = typename rvv_vector_type_impl<8 * sizeof(T)>::signed_type;
|
||||
|
||||
template <class T>
|
||||
using unsigned_int_rvv_vector_type = typename rvv_vector_type_impl<8 * sizeof(T)>::unsigned_type;
|
||||
|
||||
template <class T>
|
||||
using floating_point_rvv_vector_type = typename rvv_vector_type_impl<8 * sizeof(T)>::floating_point_type;
|
||||
|
||||
template <class T>
|
||||
using signed_int_or_floating_point_rvv_vector_type = typename std::conditional<std::is_floating_point<T>::value,
|
||||
floating_point_rvv_vector_type<T>,
|
||||
signed_int_rvv_vector_type<T>>::type;
|
||||
|
||||
template <class T>
|
||||
using rvv_vector_type = typename std::conditional<std::is_signed<T>::value,
|
||||
signed_int_or_floating_point_rvv_vector_type<T>,
|
||||
unsigned_int_rvv_vector_type<T>>::type;
|
||||
} // namespace detail
|
||||
|
||||
XSIMD_DECLARE_SIMD_REGISTER(bool, rvv, detail::rvv_vector_type<unsigned char>);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(signed char, rvv, detail::rvv_vector_type<signed char>);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(unsigned char, rvv, detail::rvv_vector_type<unsigned char>);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(char, rvv, detail::rvv_vector_type<char>);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(short, rvv, detail::rvv_vector_type<short>);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(unsigned short, rvv, detail::rvv_vector_type<unsigned short>);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(int, rvv, detail::rvv_vector_type<int>);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(unsigned int, rvv, detail::rvv_vector_type<unsigned int>);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(long int, rvv, detail::rvv_vector_type<long int>);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, rvv, detail::rvv_vector_type<unsigned long int>);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(long long int, rvv, detail::rvv_vector_type<long long int>);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, rvv, detail::rvv_vector_type<unsigned long long int>);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(float, rvv, detail::rvv_vector_type<float>);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(double, rvv, detail::rvv_vector_type<double>);
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template <class T>
|
||||
struct rvv_bool_simd_register
|
||||
{
|
||||
using register_type = rvv_bool_t<T>;
|
||||
register_type data;
|
||||
operator register_type() const noexcept { return data; }
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
template <class T>
|
||||
struct get_bool_simd_register<T, rvv>
|
||||
{
|
||||
using type = detail::rvv_bool_simd_register<T>;
|
||||
};
|
||||
} // namespace types
|
||||
#endif
|
||||
} // namespace xsimd
|
||||
|
||||
#endif
|
|
@ -40,7 +40,6 @@ namespace xsimd
|
|||
#if XSIMD_WITH_SSE2
|
||||
namespace types
|
||||
{
|
||||
XSIMD_DECLARE_SIMD_REGISTER(bool, sse2, __m128i);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(signed char, sse2, __m128i);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(unsigned char, sse2, __m128i);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(char, sse2, __m128i);
|
||||
|
|
|
@ -36,7 +36,7 @@ namespace xsimd
|
|||
static constexpr bool available() noexcept { return true; }
|
||||
static constexpr bool requires_alignment() noexcept { return true; }
|
||||
static constexpr std::size_t alignment() noexcept { return 16; }
|
||||
static constexpr unsigned version() noexcept { return generic::version(9, 0, 0); }
|
||||
static constexpr unsigned version() noexcept { return generic::version(9, Width / 32, 0); }
|
||||
static constexpr char const* name() noexcept { return "arm64+sve"; }
|
||||
};
|
||||
}
|
||||
|
|
|
@ -40,7 +40,6 @@ namespace xsimd
|
|||
#if XSIMD_WITH_WASM
|
||||
namespace types
|
||||
{
|
||||
XSIMD_DECLARE_SIMD_REGISTER(bool, wasm, v128_t);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(signed char, wasm, v128_t);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(unsigned char, wasm, v128_t);
|
||||
XSIMD_DECLARE_SIMD_REGISTER(char, wasm, v128_t);
|
||||
|
|
|
@ -10,8 +10,8 @@ origin:
|
|||
|
||||
url: https://github.com/QuantStack/xsimd
|
||||
|
||||
release: 11.2.0 (2023-11-08T21:37:47+01:00).
|
||||
revision: 11.2.0
|
||||
release: 12.1.1 (2023-12-12T17:17:27+01:00).
|
||||
revision: 12.1.1
|
||||
|
||||
license: BSD-3-Clause
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче