Bug 1246800 - Masked shift-by-scalar amounts. r=sunfish

Modulo-reduce SIMD shift amounts by the size of the lane instead of saturating
shift amounts larger than the number of bits in a lane.

MozReview-Commit-ID: KdfpIvpucPt
This commit is contained in:
Jakob Stoklund Olesen 2016-02-22 09:43:09 -08:00
Родитель 58922d0029
Коммит 4376359dd4
7 изменённых файлов: 68 добавлений и 57 удалений

Просмотреть файл

@ -712,27 +712,37 @@ template<typename T>
struct Or {
static T apply(T l, T r) { return l | r; }
};
// For the following three operators, if the value v we're trying to shift is
// such that v << bits can't fit in the int32 range, then we have undefined
// behavior, according to C++11 [expr.shift]p2.
// behavior, according to C++11 [expr.shift]p2. However, left-shifting an
// unsigned type is well-defined.
//
// In C++, shifting by an amount outside the range [0;N-1] is undefined
// behavior. SIMD.js reduces the shift amount modulo the number of bits in a
// lane and has defined behavior for all shift amounts.
template<typename T>
struct ShiftLeft {
static T apply(T v, int32_t bits) {
return uint32_t(bits) >= sizeof(T) * 8 ? 0 : v << bits;
typedef typename mozilla::MakeUnsigned<T>::Type UnsignedT;
uint32_t maskedBits = uint32_t(bits) % (sizeof(T) * 8);
return UnsignedT(v) << maskedBits;
}
};
template<typename T>
struct ShiftRightArithmetic {
static T apply(T v, int32_t bits) {
typedef typename mozilla::MakeSigned<T>::Type SignedT;
uint32_t maxBits = sizeof(T) * 8;
return SignedT(v) >> (uint32_t(bits) >= maxBits ? maxBits - 1 : bits);
uint32_t maskedBits = uint32_t(bits) % (sizeof(T) * 8);
return SignedT(v) >> maskedBits;
}
};
template<typename T>
struct ShiftRightLogical {
static T apply(T v, int32_t bits) {
return uint32_t(bits) >= sizeof(T) * 8 ? 0 : uint32_t(v) >> bits;
typedef typename mozilla::MakeUnsigned<T>::Type UnsignedT;
uint32_t maskedBits = uint32_t(bits) % (sizeof(T) * 8);
return UnsignedT(v) >> maskedBits;
}
};

Просмотреть файл

@ -4,16 +4,16 @@ setJitCompilerOption("ion.warmup.trigger", 50);
function curry(f, arg) { return f.bind(null, arg); }
function binaryLsh(count, v) { if (count>>>0 >= 32) return 0; return (v << count) | 0; }
function binaryLsh(count, v) { count &= 31; return (v << count) | 0; }
function lsh(count) { return curry(binaryLsh, count); }
function binaryRsh(count, v) { if (count>>>0 >= 32) count = 31; return (v >> count) | 0; }
function binaryRsh(count, v) { count &= 31; return (v >> count) | 0; }
function rsh(count) { return curry(binaryRsh, count); }
function binaryUlsh(count, v) { if (count>>>0 >= 32) return 0; return (v << count) >>> 0; }
function binaryUlsh(count, v) { count &= 31; return (v << count) >>> 0; }
function ulsh(count) { return curry(binaryUlsh, count); }
function binaryUrsh(count, v) { if (count>>>0 >= 32) return 0; return v >>> count; }
function binaryUrsh(count, v) { count &= 31; return v >>> count; }
function ursh(count) { return curry(binaryUrsh, count); }
function f() {
@ -21,9 +21,8 @@ function f() {
var u = SIMD.Uint32x4(1, 0x55005500, -3, 0xaa00aa00);
var a = [1, 2, -3, 4];
var b = [1, 0x55005500, -3, 0xaa00aa00];
var zeros = [0,0,0,0];
var shifts = [-1, 0, 1, 31, 32];
var shifts = [-2, -1, 0, 1, 31, 32, 33];
var r;
for (var i = 0; i < 150; i++) {
@ -34,13 +33,15 @@ function f() {
assertEqX4(SIMD.Int32x4.shiftLeftByScalar(v, 2), a.map(lsh(2)));
assertEqX4(SIMD.Int32x4.shiftLeftByScalar(v, 31), a.map(lsh(31)));
assertEqX4(SIMD.Int32x4.shiftLeftByScalar(v, 32), a.map(lsh(32)));
assertEqX4(SIMD.Int32x4.shiftLeftByScalar(v, 33), a.map(lsh(33)));
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, -1), a.map(rsh(31)));
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 0), a.map(rsh(0)));
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 1), a.map(rsh(1)));
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 2), a.map(rsh(2)));
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 31), a.map(rsh(31)));
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 32), a.map(rsh(31)));
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 32), a.map(rsh(32)));
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 33), a.map(rsh(33)));
assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, -1), b.map(ulsh(-1)));
assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 0), b.map(ulsh(0)));
@ -48,6 +49,7 @@ function f() {
assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 2), b.map(ulsh(2)));
assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 31), b.map(ulsh(31)));
assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 32), b.map(ulsh(32)));
assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 33), b.map(ulsh(33)));
assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, -1), b.map(ursh(-1)));
assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 0), b.map(ursh(0)));
@ -55,6 +57,7 @@ function f() {
assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 2), b.map(ursh(2)));
assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 31), b.map(ursh(31)));
assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 32), b.map(ursh(32)));
assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 33), b.map(ursh(33)));
// Non constant shift counts
var c = shifts[i % shifts.length];

Просмотреть файл

@ -1082,11 +1082,8 @@ assertAsmTypeFail('glob', USE_ASM + I32 + CI32 + FROUND + LSHI + "function f() {
var input = 'i4(0, 1, ' + INT32_MIN + ', ' + INT32_MAX + ')';
var vinput = [0, 1, INT32_MIN, INT32_MAX];
// TODO: What to do for masks > 31? Should we keep only the five low bits of
// the mask (JS) or not (x86)?
// See bug 1246800.
function Lsh(i) { if (i > 31) return () => 0; return function(x) { return (x << i) | 0 } }
function Rsh(i) { if (i > 31) return (x) => (x<0)?-1:0; return function(x) { return (x >> i) | 0 } }
function Lsh(i) { return function(x) { return (x << (i & 31)) | 0 } }
function Rsh(i) { return function(x) { return (x >> (i & 31)) | 0 } }
var asmLsh = asmLink(asmCompile('glob', USE_ASM + I32 + CI32 + LSHI + 'function f(x, y){x=x|0;y=y|0; var v=' + input + ';return ci4(lsh(v, x+y))} return f;'), this)
var asmRsh = asmLink(asmCompile('glob', USE_ASM + I32 + CI32 + RSHI + 'function f(x, y){x=x|0;y=y|0; var v=' + input + ';return ci4(rsh(v, x+y))} return f;'), this)
@ -1106,8 +1103,8 @@ const RSHU = 'var rsh=u4.shiftRightByScalar;'
input = 'u4(0, 1, 0x80008000, ' + INT32_MAX + ')';
vinput = [0, 1, 0x80008000, INT32_MAX];
function uLsh(i) { if (i > 31) return () => 0; return function(x) { return (x << i) >>> 0 } }
function uRsh(i) { if (i > 31) return () => 0; return function(x) { return (x >>> i) } }
function uLsh(i) { return function(x) { return (x << (i & 31)) >>> 0 } }
function uRsh(i) { return function(x) { return (x >>> (i & 31)) } }
// Need to bitcast to Int32x4 before returning result.
asmLsh = asmLink(asmCompile('glob', USE_ASM + U32 + CU32 + LSHU + I32 + CI32 + I32U32 +

Просмотреть файл

@ -4375,7 +4375,10 @@ LIRGenerator::visitSimdShift(MSimdShift* ins)
LUse vector = useRegisterAtStart(ins->lhs());
LAllocation value = useRegisterOrConstant(ins->rhs());
LSimdShift* lir = new(alloc()) LSimdShift(vector, value);
// We need a temp register to mask the shift amount, but not if the shift
// amount is a constant.
LDefinition tempReg = value.isConstant() ? LDefinition::BogusTemp() : temp();
LSimdShift* lir = new(alloc()) LSimdShift(vector, value, tempReg);
defineReuseInput(lir, ins, 0);
}

Просмотреть файл

@ -578,13 +578,17 @@ class LSimdBinaryBitwiseX4 : public LInstructionHelper<1, 2, 0>
}
};
class LSimdShift : public LInstructionHelper<1, 2, 0>
// Shift a SIMD vector by a scalar amount.
// The temp register is only required if the shift amount is a dynamical
// value. If it is a constant, use a BogusTemp instead.
class LSimdShift : public LInstructionHelper<1, 2, 1>
{
public:
LIR_HEADER(SimdShift)
LSimdShift(const LAllocation& vec, const LAllocation& val) {
LSimdShift(const LAllocation& vec, const LAllocation& val, const LDefinition& temp) {
setOperand(0, vec);
setOperand(1, val);
setTemp(0, temp);
}
const LAllocation* vector() {
return getOperand(0);
@ -592,6 +596,9 @@ class LSimdShift : public LInstructionHelper<1, 2, 0>
const LAllocation* value() {
return getOperand(1);
}
const LDefinition* temp() {
return getTemp(0);
}
MSimdShift::Operation operation() const {
return mir_->toSimdShift()->operation();
}

Просмотреть файл

@ -3433,24 +3433,11 @@ CodeGeneratorX86Shared::visitSimdShift(LSimdShift* ins)
FloatRegister out = ToFloatRegister(ins->output());
MOZ_ASSERT(ToFloatRegister(ins->vector()) == out); // defineReuseInput(0);
// If the shift count is greater than 31, this will just zero all lanes by
// default for lsh and ursh, and for rsh extend the sign bit to all bits,
// per the SIMD.js spec (as of March 19th 2015).
// If the shift count is out of range, only use the low 5 bits.
const LAllocation* val = ins->value();
if (val->isConstant()) {
uint32_t c = uint32_t(ToInt32(val));
if (c > 31) {
switch (ins->operation()) {
case MSimdShift::lsh:
case MSimdShift::ursh:
masm.zeroInt32x4(out);
return;
default:
c = 31;
break;
}
}
Imm32 count(c);
MOZ_ASSERT(ins->temp()->isBogusTemp());
Imm32 count(uint32_t(ToInt32(val)) % 32);
switch (ins->operation()) {
case MSimdShift::lsh:
masm.packedLeftShiftByScalar(count, out);
@ -3465,9 +3452,13 @@ CodeGeneratorX86Shared::visitSimdShift(LSimdShift* ins)
MOZ_CRASH("unexpected SIMD bitwise op");
}
// Truncate val to 5 bits. We should have a temp register for that.
MOZ_ASSERT(val->isRegister());
Register count = ToRegister(ins->temp());
masm.mov(ToRegister(val), count);
masm.andl(Imm32(31), count);
ScratchFloat32Scope scratch(masm);
masm.vmovd(ToRegister(val), scratch);
masm.vmovd(count, scratch);
switch (ins->operation()) {
case MSimdShift::lsh:

Просмотреть файл

@ -14,68 +14,68 @@ var Uint32x4 = SIMD.Uint32x4;
// Int8 shifts.
function lsh8(a, b) {
return (b >>> 0) >= 8 ? 0 : (a << b) << 24 >> 24;
return (a << (b & 7)) << 24 >> 24;
}
function rsha8(a, b) {
return (a >> Math.min(b >>> 0, 7)) << 24 >> 24;
return (a >> (b & 7)) << 24 >> 24;
}
function rshl8(a, b) {
return (b >>> 0) >= 8 ? 0 : (a >>> b) << 24 >> 24;
return (a >>> (b & 7)) << 24 >> 24;
}
// Int16 shifts.
function lsh16(a, b) {
return (b >>> 0) >= 16 ? 0 : (a << b) << 16 >> 16;
return (a << (b & 15)) << 16 >> 16;
}
function rsha16(a, b) {
return (a >> Math.min(b >>> 0, 15)) << 16 >> 16;
return (a >> (b & 15)) << 16 >> 16;
}
function rshl16(a, b) {
return (b >>> 0) >= 16 ? 0 : (a >>> b) << 16 >> 16;
return (a >>> (b & 15)) << 16 >> 16;
}
// Int32 shifts.
function lsh32(a, b) {
return (b >>> 0) >= 32 ? 0 : (a << b) | 0;
return (a << (b & 31)) | 0;
}
function rsha32(a, b) {
return (a >> Math.min(b >>> 0, 31)) | 0;
return (a >> (b & 31)) | 0;
}
function rshl32(a, b) {
return (b >>> 0) >= 32 ? 0 : (a >>> b) | 0;
return (a >>> (b & 31)) | 0;
}
// Uint8 shifts.
function ulsh8(a, b) {
return (b >>> 0) >= 8 ? 0 : (a << b) << 24 >>> 24;
return (a << (b & 7)) << 24 >>> 24;
}
function ursha8(a, b) {
return ((a << 24 >> 24) >> Math.min(b >>> 0, 7)) << 24 >>> 24;
return ((a << 24 >> 24) >> (b & 7)) << 24 >>> 24;
}
function urshl8(a, b) {
return (b >>> 0) >= 8 ? 0 : (a >>> b) << 24 >>> 24;
return (a >>> (b & 7)) << 24 >>> 24;
}
// Uint16 shifts.
function ulsh16(a, b) {
return (b >>> 0) >= 16 ? 0 : (a << b) << 16 >>> 16;
return (a << (b & 15)) << 16 >>> 16;
}
function ursha16(a, b) {
return ((a << 16 >> 16) >> Math.min(b >>> 0, 15)) << 16 >>> 16;
return ((a << 16 >> 16) >> (b & 15)) << 16 >>> 16;
}
function urshl16(a, b) {
return (b >>> 0) >= 16 ? 0 : (a >>> b) << 16 >>> 16;
return (a >>> (b & 15)) << 16 >>> 16;
}
// Uint32 shifts.
function ulsh32(a, b) {
return (b >>> 0) >= 32 ? 0 : (a << b) >>> 0;
return (a << (b & 31)) >>> 0;
}
function ursha32(a, b) {
return ((a | 0) >> Math.min(b >>> 0, 31)) >>> 0;
return ((a | 0) >> (b & 31)) >>> 0;
}
function urshl32(a, b) {
return (b >>> 0) >= 32 ? 0 : (a >>> b) >>> 0;
return (a >>> (b & 31)) >>> 0;
}
function test() {