SIMD: use check in the benchmarks
This commit is contained in:
Родитель
c95bec6fb4
Коммит
37c5b5810e
|
@ -193,11 +193,6 @@ if (typeof SIMD.float32x4 === "undefined") {
|
|||
return new SIMD.float32x4(x, y, z, w);
|
||||
}
|
||||
|
||||
// Workaround until we implement .check in asm.js
|
||||
if (x instanceof SIMD.float32x4) {
|
||||
return x;
|
||||
}
|
||||
|
||||
this.x_ = _SIMD_PRIVATE.truncatef32(x);
|
||||
this.y_ = _SIMD_PRIVATE.truncatef32(y);
|
||||
this.z_ = _SIMD_PRIVATE.truncatef32(z);
|
||||
|
@ -469,11 +464,6 @@ if (typeof SIMD.int32x4 === "undefined") {
|
|||
return new SIMD.int32x4(x, y, z, w);
|
||||
}
|
||||
|
||||
// Workaround until we implement .check in asm.js
|
||||
if (x instanceof SIMD.int32x4) {
|
||||
return x;
|
||||
}
|
||||
|
||||
this.x_ = x|0;
|
||||
this.y_ = y|0;
|
||||
this.z_ = z|0;
|
||||
|
|
|
@ -114,8 +114,8 @@ function moduleCode(global, imp, buffer) {
|
|||
|
||||
for (i = 0; (i | 0) < (len | 0); i = (i + 16) | 0) {
|
||||
accelIndex = 0;
|
||||
newPosx4 = f4(f4load(u8, i & mk4));
|
||||
newVelx4 = f4(f4load(u8, (i & mk4) + maxBirdsx4));
|
||||
newPosx4 = f4load(u8, i & mk4);
|
||||
newVelx4 = f4load(u8, (i & mk4) + maxBirdsx4);
|
||||
for (a = 0; (a | 0) < (steps | 0); a = (a + 1) | 0) {
|
||||
accel = toF(f32[(accelIndex & accelMask) + maxBirdsx8 >> 2]);
|
||||
accelx4 = f4splat(accel);
|
||||
|
|
|
@ -113,8 +113,8 @@ function moduleCode(global, imp, buffer) {
|
|||
|
||||
for (i = 0; (i | 0) < (len | 0); i = (i + 16) | 0) {
|
||||
accelIndex = 0;
|
||||
newPosx4 = f4(f4load(u8, i & mk4));
|
||||
newVelx4 = f4(f4load(u8, (i & mk4) + maxBirdsx4));
|
||||
newPosx4 = f4load(u8, i & mk4);
|
||||
newVelx4 = f4load(u8, (i & mk4) + maxBirdsx4);
|
||||
for (a = 0; (a | 0) < (steps | 0); a = (a + 1) | 0) {
|
||||
accel = toF(f32[(accelIndex & accelMask) + maxBirdsx8 >> 2]);
|
||||
accelx4 = f4splat(accel);
|
||||
|
|
|
@ -36,6 +36,7 @@ function moduleCode(global, ffi, buffer) {
|
|||
var f4 = global.SIMD.float32x4;
|
||||
var i4add = i4.add;
|
||||
var i4and = i4.and;
|
||||
var i4check = i4.check;
|
||||
var f4add = f4.add;
|
||||
var f4sub = f4.sub;
|
||||
var f4mul = f4.mul;
|
||||
|
@ -110,7 +111,7 @@ function moduleCode(global, ffi, buffer) {
|
|||
z_im4 = f4add(c_im4, new_im4);
|
||||
count4 = i4add(count4, i4and(mi4, one4));
|
||||
}
|
||||
return i4(count4);
|
||||
return i4check(count4);
|
||||
}
|
||||
|
||||
function mandelColumnX4 (x, width, height, xf, yf, yd, max_iterations) {
|
||||
|
@ -128,7 +129,7 @@ function moduleCode(global, ffi, buffer) {
|
|||
|
||||
ydx4 = toF(yd * toF(4));
|
||||
for (y = 0; (y | 0) < (height | 0); y = (y + 4) | 0) {
|
||||
m4 = i4(mandelPixelX4(toF(xf), toF(yf), toF(yd), max_iterations));
|
||||
m4 = i4check(mandelPixelX4(toF(xf), toF(yf), toF(yd), max_iterations));
|
||||
mapColorAndSetPixel(x | 0, y | 0, width, m4.x, max_iterations);
|
||||
mapColorAndSetPixel(x | 0, (y + 1) | 0, width, m4.y, max_iterations);
|
||||
mapColorAndSetPixel(x | 0, (y + 2) | 0, width, m4.z, max_iterations);
|
||||
|
|
|
@ -35,6 +35,7 @@ function moduleCode(global, ffi, buffer) {
|
|||
var f4 = global.SIMD.float32x4;
|
||||
var i4add = i4.add;
|
||||
var i4and = i4.and;
|
||||
var i4check = i4.check;
|
||||
var f4add = f4.add;
|
||||
var f4sub = f4.sub;
|
||||
var f4mul = f4.mul;
|
||||
|
@ -109,7 +110,7 @@ function moduleCode(global, ffi, buffer) {
|
|||
z_im4 = f4add(c_im4, new_im4);
|
||||
count4 = i4add(count4, i4and(mi4, one4));
|
||||
}
|
||||
return i4(count4);
|
||||
return i4check(count4);
|
||||
}
|
||||
|
||||
function mandelColumnX4 (x, width, height, xf, yf, yd, max_iterations) {
|
||||
|
@ -127,7 +128,7 @@ function moduleCode(global, ffi, buffer) {
|
|||
|
||||
ydx4 = toF(yd * toF(4));
|
||||
for (y = 0; (y | 0) < (height | 0); y = (y + 4) | 0) {
|
||||
m4 = i4(mandelPixelX4(toF(xf), toF(yf), toF(yd), max_iterations));
|
||||
m4 = i4check(mandelPixelX4(toF(xf), toF(yf), toF(yd), max_iterations));
|
||||
mapColorAndSetPixel(x | 0, y | 0, width, m4.x, max_iterations);
|
||||
mapColorAndSetPixel(x | 0, (y + 1) | 0, width, m4.y, max_iterations);
|
||||
mapColorAndSetPixel(x | 0, (y + 2) | 0, width, m4.z, max_iterations);
|
||||
|
|
Загрузка…
Ссылка в новой задаче