Bug 1246800 - Masked shift-by-scalar amounts. r=sunfish

Modulo-reduce SIMD shift amounts by the size of the lane instead of saturating shift amounts larger than the number of bits in a lane. MozReview-Commit-ID: KdfpIvpucPt
2016-02-22 09:43:09 -08:00 · 2016-02-22 09:43:09 -08:00 · 4376359dd4
--- a/js/src/builtin/SIMD.cpp
+++ b/js/src/builtin/SIMD.cpp
@ -712,27 +712,37 @@ template<typename T>
 struct Or {
    static T apply(T l, T r) { return l | r; }
 };
+
 // For the following three operators, if the value v we're trying to shift is
 // such that v << bits can't fit in the int32 range, then we have undefined
-// behavior, according to C++11 [expr.shift]p2.
+// behavior, according to C++11 [expr.shift]p2. However, left-shifting an
+// unsigned type is well-defined.
+//
+// In C++, shifting by an amount outside the range [0;N-1] is undefined
+// behavior. SIMD.js reduces the shift amount modulo the number of bits in a
+// lane and has defined behavior for all shift amounts.
 template<typename T>
 struct ShiftLeft {
    static T apply(T v, int32_t bits) {
-        return uint32_t(bits) >= sizeof(T) * 8 ? 0 : v << bits;
+        typedef typename mozilla::MakeUnsigned<T>::Type UnsignedT;
+        uint32_t maskedBits = uint32_t(bits) % (sizeof(T) * 8);
+        return UnsignedT(v) << maskedBits;
    }
 };
 template<typename T>
 struct ShiftRightArithmetic {
    static T apply(T v, int32_t bits) {
        typedef typename mozilla::MakeSigned<T>::Type SignedT;
-        uint32_t maxBits = sizeof(T) * 8;
-        return SignedT(v) >> (uint32_t(bits) >= maxBits ? maxBits - 1 : bits);
+        uint32_t maskedBits = uint32_t(bits) % (sizeof(T) * 8);
+        return SignedT(v) >> maskedBits;
    }
 };
 template<typename T>
 struct ShiftRightLogical {
    static T apply(T v, int32_t bits) {
-        return uint32_t(bits) >= sizeof(T) * 8 ? 0 : uint32_t(v) >> bits;
+        typedef typename mozilla::MakeUnsigned<T>::Type UnsignedT;
+        uint32_t maskedBits = uint32_t(bits) % (sizeof(T) * 8);
+        return UnsignedT(v) >> maskedBits;
    }
 };

--- a/js/src/jit-test/tests/SIMD/shift.js
+++ b/js/src/jit-test/tests/SIMD/shift.js
@ -4,16 +4,16 @@ setJitCompilerOption("ion.warmup.trigger", 50);

 function curry(f, arg) { return f.bind(null, arg); }

-function binaryLsh(count, v) { if (count>>>0 >= 32) return 0; return (v << count) | 0; }
+function binaryLsh(count, v) { count &= 31; return (v << count) | 0; }
 function lsh(count) { return curry(binaryLsh, count); }

-function binaryRsh(count, v) { if (count>>>0 >= 32) count = 31; return (v >> count) | 0; }
+function binaryRsh(count, v) { count &= 31; return (v >> count) | 0; }
 function rsh(count) { return curry(binaryRsh, count); }

-function binaryUlsh(count, v) { if (count>>>0 >= 32) return 0; return (v << count) >>> 0; }
+function binaryUlsh(count, v) { count &= 31; return (v << count) >>> 0; }
 function ulsh(count) { return curry(binaryUlsh, count); }

-function binaryUrsh(count, v) { if (count>>>0 >= 32)  return 0; return v >>> count; }
+function binaryUrsh(count, v) { count &= 31; return v >>> count; }
 function ursh(count) { return curry(binaryUrsh, count); }

 function f() {
@ -21,9 +21,8 @@ function f() {
    var u = SIMD.Uint32x4(1, 0x55005500, -3, 0xaa00aa00);
    var a = [1, 2, -3, 4];
    var b = [1, 0x55005500, -3, 0xaa00aa00];
-    var zeros = [0,0,0,0];

-    var shifts = [-1, 0, 1, 31, 32];
+    var shifts = [-2, -1, 0, 1, 31, 32, 33];

    var r;
    for (var i = 0; i < 150; i++) {
@ -34,13 +33,15 @@ function f() {
        assertEqX4(SIMD.Int32x4.shiftLeftByScalar(v, 2),  a.map(lsh(2)));
        assertEqX4(SIMD.Int32x4.shiftLeftByScalar(v, 31), a.map(lsh(31)));
        assertEqX4(SIMD.Int32x4.shiftLeftByScalar(v, 32), a.map(lsh(32)));
+        assertEqX4(SIMD.Int32x4.shiftLeftByScalar(v, 33), a.map(lsh(33)));

        assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, -1), a.map(rsh(31)));
        assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 0),  a.map(rsh(0)));
        assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 1),  a.map(rsh(1)));
        assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 2),  a.map(rsh(2)));
        assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 31), a.map(rsh(31)));
-        assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 32), a.map(rsh(31)));
+        assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 32), a.map(rsh(32)));
+        assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 33), a.map(rsh(33)));

        assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, -1), b.map(ulsh(-1)));
        assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 0),  b.map(ulsh(0)));
@ -48,6 +49,7 @@ function f() {
        assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 2),  b.map(ulsh(2)));
        assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 31), b.map(ulsh(31)));
        assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 32), b.map(ulsh(32)));
+        assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 33), b.map(ulsh(33)));

        assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, -1), b.map(ursh(-1)));
        assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 0),  b.map(ursh(0)));
@ -55,6 +57,7 @@ function f() {
        assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 2),  b.map(ursh(2)));
        assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 31), b.map(ursh(31)));
        assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 32), b.map(ursh(32)));
+        assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 33), b.map(ursh(33)));

        // Non constant shift counts
        var c = shifts[i % shifts.length];
--- a/js/src/jit-test/tests/asm.js/testSIMD.js
+++ b/js/src/jit-test/tests/asm.js/testSIMD.js
@ -1082,11 +1082,8 @@ assertAsmTypeFail('glob', USE_ASM + I32 + CI32 + FROUND + LSHI + "function f() {
 var input = 'i4(0, 1, ' + INT32_MIN + ', ' + INT32_MAX + ')';
 var vinput = [0, 1, INT32_MIN, INT32_MAX];

-// TODO: What to do for masks > 31? Should we keep only the five low bits of
-// the mask (JS) or not (x86)?
-// See bug 1246800.
-function Lsh(i) { if (i > 31) return () => 0; return function(x) { return (x << i) | 0 } }
-function Rsh(i) { if (i > 31) return (x) => (x<0)?-1:0; return function(x) { return (x >> i) | 0 } }
+function Lsh(i) { return function(x) { return (x << (i & 31)) | 0 } }
+function Rsh(i) { return function(x) { return (x >> (i & 31)) | 0 } }

 var asmLsh = asmLink(asmCompile('glob', USE_ASM + I32 + CI32 + LSHI + 'function f(x, y){x=x|0;y=y|0; var v=' + input + ';return ci4(lsh(v, x+y))} return f;'), this)
 var asmRsh = asmLink(asmCompile('glob', USE_ASM + I32 + CI32 + RSHI + 'function f(x, y){x=x|0;y=y|0; var v=' + input + ';return ci4(rsh(v, x+y))} return f;'), this)
@ -1106,8 +1103,8 @@ const RSHU = 'var rsh=u4.shiftRightByScalar;'
 input = 'u4(0, 1, 0x80008000, ' + INT32_MAX + ')';
 vinput = [0, 1, 0x80008000, INT32_MAX];

-function uLsh(i) { if (i > 31) return () => 0; return function(x) { return (x << i) >>> 0 } }
-function uRsh(i) { if (i > 31) return () => 0; return function(x) { return (x >>> i) } }
+function uLsh(i) { return function(x) { return (x << (i & 31)) >>> 0 } }
+function uRsh(i) { return function(x) { return (x >>> (i & 31)) } }

 // Need to bitcast to Int32x4 before returning result.
 asmLsh = asmLink(asmCompile('glob', USE_ASM + U32 + CU32 + LSHU + I32 + CI32 + I32U32 +
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@ -4375,7 +4375,10 @@ LIRGenerator::visitSimdShift(MSimdShift* ins)

    LUse vector = useRegisterAtStart(ins->lhs());
    LAllocation value = useRegisterOrConstant(ins->rhs());
-    LSimdShift* lir = new(alloc()) LSimdShift(vector, value);
+    // We need a temp register to mask the shift amount, but not if the shift
+    // amount is a constant.
+    LDefinition tempReg = value.isConstant() ? LDefinition::BogusTemp() : temp();
+    LSimdShift* lir = new(alloc()) LSimdShift(vector, value, tempReg);
    defineReuseInput(lir, ins, 0);
 }

--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@ -578,13 +578,17 @@ class LSimdBinaryBitwiseX4 : public LInstructionHelper<1, 2, 0>
    }
 };

-class LSimdShift : public LInstructionHelper<1, 2, 0>
+// Shift a SIMD vector by a scalar amount.
+// The temp register is only required if the shift amount is a dynamical
+// value. If it is a constant, use a BogusTemp instead.
+class LSimdShift : public LInstructionHelper<1, 2, 1>
 {
  public:
    LIR_HEADER(SimdShift)
-    LSimdShift(const LAllocation& vec, const LAllocation& val) {
+    LSimdShift(const LAllocation& vec, const LAllocation& val, const LDefinition& temp) {
        setOperand(0, vec);
        setOperand(1, val);
+        setTemp(0, temp);
    }
    const LAllocation* vector() {
        return getOperand(0);
@ -592,6 +596,9 @@ class LSimdShift : public LInstructionHelper<1, 2, 0>
    const LAllocation* value() {
        return getOperand(1);
    }
+    const LDefinition* temp() {
+        return getTemp(0);
+    }
    MSimdShift::Operation operation() const {
        return mir_->toSimdShift()->operation();
    }
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@ -3433,24 +3433,11 @@ CodeGeneratorX86Shared::visitSimdShift(LSimdShift* ins)
    FloatRegister out = ToFloatRegister(ins->output());
    MOZ_ASSERT(ToFloatRegister(ins->vector()) == out); // defineReuseInput(0);

-    // If the shift count is greater than 31, this will just zero all lanes by
-    // default for lsh and ursh, and for rsh extend the sign bit to all bits,
-    // per the SIMD.js spec (as of March 19th 2015).
+    // If the shift count is out of range, only use the low 5 bits.
    const LAllocation* val = ins->value();
    if (val->isConstant()) {
-        uint32_t c = uint32_t(ToInt32(val));
-        if (c > 31) {
-            switch (ins->operation()) {
-              case MSimdShift::lsh:
-              case MSimdShift::ursh:
-                masm.zeroInt32x4(out);
-                return;
-              default:
-                c = 31;
-                break;
-            }
-        }
-        Imm32 count(c);
+        MOZ_ASSERT(ins->temp()->isBogusTemp());
+        Imm32 count(uint32_t(ToInt32(val)) % 32);
        switch (ins->operation()) {
          case MSimdShift::lsh:
            masm.packedLeftShiftByScalar(count, out);
@ -3465,9 +3452,13 @@ CodeGeneratorX86Shared::visitSimdShift(LSimdShift* ins)
        MOZ_CRASH("unexpected SIMD bitwise op");
    }

+    // Truncate val to 5 bits. We should have a temp register for that.
    MOZ_ASSERT(val->isRegister());
+    Register count = ToRegister(ins->temp());
+    masm.mov(ToRegister(val), count);
+    masm.andl(Imm32(31), count);
    ScratchFloat32Scope scratch(masm);
-    masm.vmovd(ToRegister(val), scratch);
+    masm.vmovd(count, scratch);

    switch (ins->operation()) {
      case MSimdShift::lsh:
--- a/js/src/tests/ecma_7/SIMD/shifts.js
+++ b/js/src/tests/ecma_7/SIMD/shifts.js
@ -14,68 +14,68 @@ var Uint32x4 = SIMD.Uint32x4;

 // Int8 shifts.
 function lsh8(a, b) {
-    return (b >>> 0) >= 8 ? 0 : (a << b) << 24 >> 24;
+    return (a << (b & 7)) << 24 >> 24;
 }
 function rsha8(a, b) {
-    return (a >> Math.min(b >>> 0, 7)) << 24 >> 24;
+    return (a >> (b & 7)) << 24 >> 24;
 }
 function rshl8(a, b) {
-    return (b >>> 0) >= 8 ? 0 : (a >>> b) << 24 >> 24;
+    return (a >>> (b & 7)) << 24 >> 24;
 }

 // Int16 shifts.
 function lsh16(a, b) {
-    return (b >>> 0) >= 16 ? 0 : (a << b) << 16 >> 16;
+    return (a << (b & 15)) << 16 >> 16;
 }
 function rsha16(a, b) {
-    return (a >> Math.min(b >>> 0, 15)) << 16 >> 16;
+    return (a >> (b & 15)) << 16 >> 16;
 }
 function rshl16(a, b) {
-    return (b >>> 0) >= 16 ? 0 : (a >>> b) << 16 >> 16;
+    return (a >>> (b & 15)) << 16 >> 16;
 }

 // Int32 shifts.
 function lsh32(a, b) {
-    return (b >>> 0) >= 32 ? 0 : (a << b) | 0;
+    return (a << (b & 31)) | 0;
 }
 function rsha32(a, b) {
-    return (a >> Math.min(b >>> 0, 31)) | 0;
+    return (a >> (b & 31)) | 0;
 }
 function rshl32(a, b) {
-    return (b >>> 0) >= 32 ? 0 : (a >>> b) | 0;
+    return (a >>> (b & 31)) | 0;
 }

 // Uint8 shifts.
 function ulsh8(a, b) {
-    return (b >>> 0) >= 8 ? 0 : (a << b) << 24 >>> 24;
+    return (a << (b & 7)) << 24 >>> 24;
 }
 function ursha8(a, b) {
-    return ((a << 24 >> 24) >> Math.min(b >>> 0, 7)) << 24 >>> 24;
+    return ((a << 24 >> 24) >> (b & 7)) << 24 >>> 24;
 }
 function urshl8(a, b) {
-    return (b >>> 0) >= 8 ? 0 : (a >>> b) << 24 >>> 24;
+    return (a >>> (b & 7)) << 24 >>> 24;
 }

 // Uint16 shifts.
 function ulsh16(a, b) {
-    return (b >>> 0) >= 16 ? 0 : (a << b) << 16 >>> 16;
+    return (a << (b & 15)) << 16 >>> 16;
 }
 function ursha16(a, b) {
-    return ((a << 16 >> 16) >> Math.min(b >>> 0, 15)) << 16 >>> 16;
+    return ((a << 16 >> 16) >> (b & 15)) << 16 >>> 16;
 }
 function urshl16(a, b) {
-    return (b >>> 0) >= 16 ? 0 : (a >>> b) << 16 >>> 16;
+    return (a >>> (b & 15)) << 16 >>> 16;
 }

 // Uint32 shifts.
 function ulsh32(a, b) {
-    return (b >>> 0) >= 32 ? 0 : (a << b) >>> 0;
+    return (a << (b & 31)) >>> 0;
 }
 function ursha32(a, b) {
-    return ((a | 0) >> Math.min(b >>> 0, 31)) >>> 0;
+    return ((a | 0) >> (b & 31)) >>> 0;
 }
 function urshl32(a, b) {
-    return (b >>> 0) >= 32 ? 0 : (a >>> b) >>> 0;
+    return (a >>> (b & 31)) >>> 0;
 }

 function test() {