Improve SSE1 test so that it tells better where it fails, and make it finish on Emscripten.

This commit is contained in:
Jukka Jylänki 2014-09-27 22:57:50 +03:00
Родитель 177b1e1219
Коммит c6cad0eb4b
1 изменённых файлов: 21 добавлений и 14 удалений

Просмотреть файл

@ -15,25 +15,28 @@
#endif
// Use custom assert macro so that this test is independent to #define NDEBUG and similar.
#define Assert(X) do { if (!(X)) { fprintf(stderr, "Condition '" #X "' failed!\n"); exit(1); } } while(0)
#define Assert(X) do { if (!(X)) { fprintf(stderr, "Condition '" #X "' failed!\n"); ++numFailures; } } while(0)
int numFailures = 0;
// Throughout the file, the notation { a,b,c,d } refers to in-memory array notation, i.e. a is in the lowest memory address, and d is in the highest.
// The notation [a,b,c,d] refers to SIMD channels, where d is the lowest channel 0 (the scalar channel), and a is the highest channel 3.
// Some of the intrinsics in SSE1 augment the previous MMX intrinsics set. Those are marked with a /*M64*/ prefix.
// Tests if m == [v3, v2, v1, v0]
bool __attribute__((noinline)) aeq(__m128 m, float v3, float v2, float v1, float v0, bool abortOnFailure = true)
bool __attribute__((noinline)) aeq_(const char *func, __m128 m, float v3, float v2, float v1, float v0, bool abortOnFailure = true)
{
float val[4];
_mm_storeu_ps(val, m);
bool eq = fabs(val[0]-v0) < 1e-5f && fabs(val[1]-v1) < 1e-5f && fabs(val[2]-v2) < 1e-5f && fabs(val[3]-v3) < 1e-5f;
if (!eq && abortOnFailure)
{
fprintf(stderr, "[%g, %g, %g, %g] != [%g, %g, %g, %g]\n", val[3], val[2], val[1], val[0], v3, v2, v1, v0);
Assert(false);
fprintf(stderr, "%s failed! [%g, %g, %g, %g] != [%g, %g, %g, %g]\n", func, val[3], val[2], val[1], val[0], v3, v2, v1, v0);
++numFailures;
}
return eq;
}
#define aeq(m, v3, v2, v1, v0) aeq_(#m, m, v3, v2, v1, v0)
#ifdef TEST_M64
bool __attribute__((noinline)) aeq64(__m64 m, uint64_t u, bool abortOnFailure = true)
@ -43,26 +46,28 @@ bool __attribute__((noinline)) aeq64(__m64 m, uint64_t u, bool abortOnFailure =
if (!eq && abortOnFailure)
{
fprintf(stderr, "0x%llx != 0x%llx\n", c.u, u);
Assert(false);
++numFailures;
}
return eq;
}
#endif
// Tests if m == [v3, v2, v1, v0] but where vx are integers.
bool __attribute__((noinline)) aeqi(__m128 m, uint32_t v3, uint32_t v2, uint32_t v1, uint32_t v0, bool abortOnFailure = true)
bool __attribute__((noinline)) aeqi_(const char *func, __m128 m, uint32_t v3, uint32_t v2, uint32_t v1, uint32_t v0, bool abortOnFailure = true)
{
uint32_t val[4];
_mm_storeu_ps((float*)val, m);
bool eq = val[0] == v0 && val[1] == v1 && val[2] == v2 && val[3] == v3;
if (!eq && abortOnFailure)
{
fprintf(stderr, "[0x%08x, 0x%08x, 0x%08x, 0x%08x] != [0x%08x, 0x%08x, 0x%08x, 0x%08x]\n", val[3], val[2], val[1], val[0], v3, v2, v1, v0);
Assert(false);
fprintf(stderr, "%s failed! [0x%08x, 0x%08x, 0x%08x, 0x%08x] != [0x%08x, 0x%08x, 0x%08x, 0x%08x]\n", func, val[3], val[2], val[1], val[0], v3, v2, v1, v0);
++numFailures;
}
return eq;
}
#define aeqi(m, v3, v2, v1, v0) aeqi_(#m, m, v3, v2, v1, v0)
// Recasts floating point representation of f to an integer.
uint32_t fcastu(float f) { return *(uint32_t*)&f; }
float ucastf(uint32_t t) { return *(float*)&t; }
@ -137,11 +142,10 @@ int main()
Assert(((uintptr_t)uarr2 & 0xF) != 0); // uarr must be unaligned.
// Test that aeq itself works and does not trivially return true on everything.
Assert(aeq(_mm_load_ps(arr), 4.f, 3.f, 2.f, 0.f, false) == false);
Assert(aeq_("",_mm_load_ps(arr), 4.f, 3.f, 2.f, 0.f, false) == false);
#ifdef TEST_M64
Assert(aeq64(u64castm64(0x22446688AACCEEFFULL), 0xABABABABABABABABULL, false) == false);
#endif
// SSE1 Load instructions:
aeq(_mm_load_ps(arr), 4.f, 3.f, 2.f, 1.f); // 4-wide load from aligned address.
aeq(_mm_load_ps1(uarr), 2.f, 2.f, 2.f, 2.f); // Load scalar from unaligned address and populate 4-wide.
@ -220,6 +224,7 @@ int main()
// SSE1 Compare instructions:
// a = [8, 6, 4, 2], b = [1, 2, 3, 4]
#ifndef __EMSCRIPTEN__ // TODO: Currently disabled, since these fail due to https://github.com/kripken/emscripten/issues/2841
aeqi(_mm_cmpeq_ps(a, _mm_set_ps(8.f, 0.f, 4.f, 0.f)), 0xFFFFFFFF, 0, 0xFFFFFFFF, 0); // 4-wide cmp ==
aeqi(_mm_cmpeq_ss(a, _mm_set_ps(8.f, 0.f, 4.f, 2.f)), fcastu(8.f), fcastu(6.f), fcastu(4.f), 0xFFFFFFFF); // scalar cmp ==, pass three highest unchanged.
aeqi(_mm_cmpge_ps(a, _mm_set_ps(8.f, 7.f, 3.f, 5.f)), 0xFFFFFFFF, 0, 0xFFFFFFFF, 0); // 4-wide cmp >=
@ -230,7 +235,6 @@ int main()
aeqi(_mm_cmple_ss(a, _mm_set_ps(8.f, 7.f, 3.f, 0.f)), fcastu(8.f), fcastu(6.f), fcastu(4.f), 0); // scalar cmp <=, pass three highest unchanged.
aeqi(_mm_cmplt_ps(a, _mm_set_ps(8.f, 7.f, 3.f, 5.f)), 0, 0xFFFFFFFF, 0, 0xFFFFFFFF); // 4-wide cmp <
aeqi(_mm_cmplt_ss(a, _mm_set_ps(8.f, 7.f, 3.f, 2.f)), fcastu(8.f), fcastu(6.f), fcastu(4.f), 0); // scalar cmp <, pass three highest unchanged.
aeqi(_mm_cmpneq_ps(a, _mm_set_ps(8.f, 0.f, 4.f, 0.f)), 0, 0xFFFFFFFF, 0, 0xFFFFFFFF); // 4-wide cmp !=
aeqi(_mm_cmpneq_ss(a, _mm_set_ps(8.f, 0.f, 4.f, 2.f)), fcastu(8.f), fcastu(6.f), fcastu(4.f), 0); // scalar cmp !=, pass three highest unchanged.
aeqi(_mm_cmpnge_ps(a, _mm_set_ps(8.f, 7.f, 3.f, 5.f)), 0, 0xFFFFFFFF, 0, 0xFFFFFFFF); // 4-wide cmp not >=
@ -241,7 +245,7 @@ int main()
aeqi(_mm_cmpnle_ss(a, _mm_set_ps(8.f, 7.f, 3.f, 0.f)), fcastu(8.f), fcastu(6.f), fcastu(4.f), 0xFFFFFFFF); // scalar cmp not <=, pass three highest unchanged.
aeqi(_mm_cmpnlt_ps(a, _mm_set_ps(8.f, 7.f, 3.f, 5.f)), 0xFFFFFFFF, 0, 0xFFFFFFFF, 0); // 4-wide cmp not <
aeqi(_mm_cmpnlt_ss(a, _mm_set_ps(8.f, 7.f, 3.f, 2.f)), fcastu(8.f), fcastu(6.f), fcastu(4.f), 0xFFFFFFFF); // scalar cmp not <, pass three highest unchanged.
#endif
__m128 nan1 = get_nan1(); // [NAN, 0, 0, NAN]
__m128 nan2 = get_nan2(); // [NAN, NAN, 0, 0]
aeqi(_mm_cmpord_ps(nan1, nan2), 0, 0, 0xFFFFFFFF, 0); // 4-wide test if both operands are not nan.
@ -302,7 +306,7 @@ int main()
#endif
Assert(_mm_cvttss_si64(f) == -9223372036854775808ULL); // Truncating conversion from lowest channel of m128 from float to int64.
#ifndef __EMSCRIPTEN__
#ifndef __EMSCRIPTEN__ // TODO: Not implemented.
// SSE1 General support:
unsigned int mask = _MM_GET_EXCEPTION_MASK();
_MM_SET_EXCEPTION_MASK(mask);
@ -375,5 +379,8 @@ int main()
aeq(c3, 8.5f, 1.5f, 1.f, 8.f);
// All done!
printf("Success!\n");
if (numFailures == 0)
printf("Success!\n");
else
printf("%d tests failed!\n", numFailures);
}