зеркало из https://github.com/microsoft/DeepSpeed.git
Fix confusing width in simd_load (#4714)
I found the width using in simd_load is different from simd_store. This implementation confuses me. The reason lies in the missing parentheses for the type conversion of x in the SIMD_LOAD2 macro definition, disrupting the intended semantics of width variable. I try to make a quick fix for it. Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com>
This commit is contained in:
Родитель
ade98365fc
Коммит
f517903162
|
@ -53,7 +53,7 @@
|
|||
#define SIMD_WIDTH 8
|
||||
|
||||
#define SIMD_LOAD2(x, h) \
|
||||
((h) ? _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)x)) : _mm256_loadu_ps(x))
|
||||
((h) ? _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)(x))) : _mm256_loadu_ps(x))
|
||||
#define SIMD_STORE2(x, d, h) \
|
||||
((h) ? _mm_store_ps(x, _mm_castsi128_ps(_mm256_cvtps_ph(d, _MM_FROUND_TO_NEAREST_INT))) \
|
||||
: _mm256_storeu_ps(x, d))
|
||||
|
@ -80,7 +80,7 @@ inline void simd_store(float* dst, AVX_Data* src, bool half_precision)
|
|||
template <int span>
|
||||
inline void simd_load(AVX_Data* dst, float* src, bool half_precision)
|
||||
{
|
||||
size_t width = (half_precision ? 1 : SIMD_WIDTH);
|
||||
size_t width = (half_precision ? SIMD_WIDTH / 2 : SIMD_WIDTH);
|
||||
#pragma unroll
|
||||
for (size_t i = 0; i < span; ++i) { dst[i].data = SIMD_LOAD2(src + width * i, half_precision); }
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче