зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1246800 - Masked shift-by-scalar amounts. r=sunfish
Modulo-reduce SIMD shift amounts by the size of the lane instead of saturating shift amounts larger than the number of bits in a lane. MozReview-Commit-ID: KdfpIvpucPt
This commit is contained in:
Родитель
58922d0029
Коммит
4376359dd4
|
@ -712,27 +712,37 @@ template<typename T>
|
|||
struct Or {
|
||||
static T apply(T l, T r) { return l | r; }
|
||||
};
|
||||
|
||||
// For the following three operators, if the value v we're trying to shift is
|
||||
// such that v << bits can't fit in the int32 range, then we have undefined
|
||||
// behavior, according to C++11 [expr.shift]p2.
|
||||
// behavior, according to C++11 [expr.shift]p2. However, left-shifting an
|
||||
// unsigned type is well-defined.
|
||||
//
|
||||
// In C++, shifting by an amount outside the range [0;N-1] is undefined
|
||||
// behavior. SIMD.js reduces the shift amount modulo the number of bits in a
|
||||
// lane and has defined behavior for all shift amounts.
|
||||
template<typename T>
|
||||
struct ShiftLeft {
|
||||
static T apply(T v, int32_t bits) {
|
||||
return uint32_t(bits) >= sizeof(T) * 8 ? 0 : v << bits;
|
||||
typedef typename mozilla::MakeUnsigned<T>::Type UnsignedT;
|
||||
uint32_t maskedBits = uint32_t(bits) % (sizeof(T) * 8);
|
||||
return UnsignedT(v) << maskedBits;
|
||||
}
|
||||
};
|
||||
template<typename T>
|
||||
struct ShiftRightArithmetic {
|
||||
static T apply(T v, int32_t bits) {
|
||||
typedef typename mozilla::MakeSigned<T>::Type SignedT;
|
||||
uint32_t maxBits = sizeof(T) * 8;
|
||||
return SignedT(v) >> (uint32_t(bits) >= maxBits ? maxBits - 1 : bits);
|
||||
uint32_t maskedBits = uint32_t(bits) % (sizeof(T) * 8);
|
||||
return SignedT(v) >> maskedBits;
|
||||
}
|
||||
};
|
||||
template<typename T>
|
||||
struct ShiftRightLogical {
|
||||
static T apply(T v, int32_t bits) {
|
||||
return uint32_t(bits) >= sizeof(T) * 8 ? 0 : uint32_t(v) >> bits;
|
||||
typedef typename mozilla::MakeUnsigned<T>::Type UnsignedT;
|
||||
uint32_t maskedBits = uint32_t(bits) % (sizeof(T) * 8);
|
||||
return UnsignedT(v) >> maskedBits;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -4,16 +4,16 @@ setJitCompilerOption("ion.warmup.trigger", 50);
|
|||
|
||||
function curry(f, arg) { return f.bind(null, arg); }
|
||||
|
||||
function binaryLsh(count, v) { if (count>>>0 >= 32) return 0; return (v << count) | 0; }
|
||||
function binaryLsh(count, v) { count &= 31; return (v << count) | 0; }
|
||||
function lsh(count) { return curry(binaryLsh, count); }
|
||||
|
||||
function binaryRsh(count, v) { if (count>>>0 >= 32) count = 31; return (v >> count) | 0; }
|
||||
function binaryRsh(count, v) { count &= 31; return (v >> count) | 0; }
|
||||
function rsh(count) { return curry(binaryRsh, count); }
|
||||
|
||||
function binaryUlsh(count, v) { if (count>>>0 >= 32) return 0; return (v << count) >>> 0; }
|
||||
function binaryUlsh(count, v) { count &= 31; return (v << count) >>> 0; }
|
||||
function ulsh(count) { return curry(binaryUlsh, count); }
|
||||
|
||||
function binaryUrsh(count, v) { if (count>>>0 >= 32) return 0; return v >>> count; }
|
||||
function binaryUrsh(count, v) { count &= 31; return v >>> count; }
|
||||
function ursh(count) { return curry(binaryUrsh, count); }
|
||||
|
||||
function f() {
|
||||
|
@ -21,9 +21,8 @@ function f() {
|
|||
var u = SIMD.Uint32x4(1, 0x55005500, -3, 0xaa00aa00);
|
||||
var a = [1, 2, -3, 4];
|
||||
var b = [1, 0x55005500, -3, 0xaa00aa00];
|
||||
var zeros = [0,0,0,0];
|
||||
|
||||
var shifts = [-1, 0, 1, 31, 32];
|
||||
var shifts = [-2, -1, 0, 1, 31, 32, 33];
|
||||
|
||||
var r;
|
||||
for (var i = 0; i < 150; i++) {
|
||||
|
@ -34,13 +33,15 @@ function f() {
|
|||
assertEqX4(SIMD.Int32x4.shiftLeftByScalar(v, 2), a.map(lsh(2)));
|
||||
assertEqX4(SIMD.Int32x4.shiftLeftByScalar(v, 31), a.map(lsh(31)));
|
||||
assertEqX4(SIMD.Int32x4.shiftLeftByScalar(v, 32), a.map(lsh(32)));
|
||||
assertEqX4(SIMD.Int32x4.shiftLeftByScalar(v, 33), a.map(lsh(33)));
|
||||
|
||||
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, -1), a.map(rsh(31)));
|
||||
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 0), a.map(rsh(0)));
|
||||
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 1), a.map(rsh(1)));
|
||||
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 2), a.map(rsh(2)));
|
||||
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 31), a.map(rsh(31)));
|
||||
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 32), a.map(rsh(31)));
|
||||
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 32), a.map(rsh(32)));
|
||||
assertEqX4(SIMD.Int32x4.shiftRightByScalar(v, 33), a.map(rsh(33)));
|
||||
|
||||
assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, -1), b.map(ulsh(-1)));
|
||||
assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 0), b.map(ulsh(0)));
|
||||
|
@ -48,6 +49,7 @@ function f() {
|
|||
assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 2), b.map(ulsh(2)));
|
||||
assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 31), b.map(ulsh(31)));
|
||||
assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 32), b.map(ulsh(32)));
|
||||
assertEqX4(SIMD.Uint32x4.shiftLeftByScalar(u, 33), b.map(ulsh(33)));
|
||||
|
||||
assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, -1), b.map(ursh(-1)));
|
||||
assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 0), b.map(ursh(0)));
|
||||
|
@ -55,6 +57,7 @@ function f() {
|
|||
assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 2), b.map(ursh(2)));
|
||||
assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 31), b.map(ursh(31)));
|
||||
assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 32), b.map(ursh(32)));
|
||||
assertEqX4(SIMD.Uint32x4.shiftRightByScalar(u, 33), b.map(ursh(33)));
|
||||
|
||||
// Non constant shift counts
|
||||
var c = shifts[i % shifts.length];
|
||||
|
|
|
@ -1082,11 +1082,8 @@ assertAsmTypeFail('glob', USE_ASM + I32 + CI32 + FROUND + LSHI + "function f() {
|
|||
var input = 'i4(0, 1, ' + INT32_MIN + ', ' + INT32_MAX + ')';
|
||||
var vinput = [0, 1, INT32_MIN, INT32_MAX];
|
||||
|
||||
// TODO: What to do for masks > 31? Should we keep only the five low bits of
|
||||
// the mask (JS) or not (x86)?
|
||||
// See bug 1246800.
|
||||
function Lsh(i) { if (i > 31) return () => 0; return function(x) { return (x << i) | 0 } }
|
||||
function Rsh(i) { if (i > 31) return (x) => (x<0)?-1:0; return function(x) { return (x >> i) | 0 } }
|
||||
function Lsh(i) { return function(x) { return (x << (i & 31)) | 0 } }
|
||||
function Rsh(i) { return function(x) { return (x >> (i & 31)) | 0 } }
|
||||
|
||||
var asmLsh = asmLink(asmCompile('glob', USE_ASM + I32 + CI32 + LSHI + 'function f(x, y){x=x|0;y=y|0; var v=' + input + ';return ci4(lsh(v, x+y))} return f;'), this)
|
||||
var asmRsh = asmLink(asmCompile('glob', USE_ASM + I32 + CI32 + RSHI + 'function f(x, y){x=x|0;y=y|0; var v=' + input + ';return ci4(rsh(v, x+y))} return f;'), this)
|
||||
|
@ -1106,8 +1103,8 @@ const RSHU = 'var rsh=u4.shiftRightByScalar;'
|
|||
input = 'u4(0, 1, 0x80008000, ' + INT32_MAX + ')';
|
||||
vinput = [0, 1, 0x80008000, INT32_MAX];
|
||||
|
||||
function uLsh(i) { if (i > 31) return () => 0; return function(x) { return (x << i) >>> 0 } }
|
||||
function uRsh(i) { if (i > 31) return () => 0; return function(x) { return (x >>> i) } }
|
||||
function uLsh(i) { return function(x) { return (x << (i & 31)) >>> 0 } }
|
||||
function uRsh(i) { return function(x) { return (x >>> (i & 31)) } }
|
||||
|
||||
// Need to bitcast to Int32x4 before returning result.
|
||||
asmLsh = asmLink(asmCompile('glob', USE_ASM + U32 + CU32 + LSHU + I32 + CI32 + I32U32 +
|
||||
|
|
|
@ -4375,7 +4375,10 @@ LIRGenerator::visitSimdShift(MSimdShift* ins)
|
|||
|
||||
LUse vector = useRegisterAtStart(ins->lhs());
|
||||
LAllocation value = useRegisterOrConstant(ins->rhs());
|
||||
LSimdShift* lir = new(alloc()) LSimdShift(vector, value);
|
||||
// We need a temp register to mask the shift amount, but not if the shift
|
||||
// amount is a constant.
|
||||
LDefinition tempReg = value.isConstant() ? LDefinition::BogusTemp() : temp();
|
||||
LSimdShift* lir = new(alloc()) LSimdShift(vector, value, tempReg);
|
||||
defineReuseInput(lir, ins, 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -578,13 +578,17 @@ class LSimdBinaryBitwiseX4 : public LInstructionHelper<1, 2, 0>
|
|||
}
|
||||
};
|
||||
|
||||
class LSimdShift : public LInstructionHelper<1, 2, 0>
|
||||
// Shift a SIMD vector by a scalar amount.
|
||||
// The temp register is only required if the shift amount is a dynamical
|
||||
// value. If it is a constant, use a BogusTemp instead.
|
||||
class LSimdShift : public LInstructionHelper<1, 2, 1>
|
||||
{
|
||||
public:
|
||||
LIR_HEADER(SimdShift)
|
||||
LSimdShift(const LAllocation& vec, const LAllocation& val) {
|
||||
LSimdShift(const LAllocation& vec, const LAllocation& val, const LDefinition& temp) {
|
||||
setOperand(0, vec);
|
||||
setOperand(1, val);
|
||||
setTemp(0, temp);
|
||||
}
|
||||
const LAllocation* vector() {
|
||||
return getOperand(0);
|
||||
|
@ -592,6 +596,9 @@ class LSimdShift : public LInstructionHelper<1, 2, 0>
|
|||
const LAllocation* value() {
|
||||
return getOperand(1);
|
||||
}
|
||||
const LDefinition* temp() {
|
||||
return getTemp(0);
|
||||
}
|
||||
MSimdShift::Operation operation() const {
|
||||
return mir_->toSimdShift()->operation();
|
||||
}
|
||||
|
|
|
@ -3433,24 +3433,11 @@ CodeGeneratorX86Shared::visitSimdShift(LSimdShift* ins)
|
|||
FloatRegister out = ToFloatRegister(ins->output());
|
||||
MOZ_ASSERT(ToFloatRegister(ins->vector()) == out); // defineReuseInput(0);
|
||||
|
||||
// If the shift count is greater than 31, this will just zero all lanes by
|
||||
// default for lsh and ursh, and for rsh extend the sign bit to all bits,
|
||||
// per the SIMD.js spec (as of March 19th 2015).
|
||||
// If the shift count is out of range, only use the low 5 bits.
|
||||
const LAllocation* val = ins->value();
|
||||
if (val->isConstant()) {
|
||||
uint32_t c = uint32_t(ToInt32(val));
|
||||
if (c > 31) {
|
||||
switch (ins->operation()) {
|
||||
case MSimdShift::lsh:
|
||||
case MSimdShift::ursh:
|
||||
masm.zeroInt32x4(out);
|
||||
return;
|
||||
default:
|
||||
c = 31;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Imm32 count(c);
|
||||
MOZ_ASSERT(ins->temp()->isBogusTemp());
|
||||
Imm32 count(uint32_t(ToInt32(val)) % 32);
|
||||
switch (ins->operation()) {
|
||||
case MSimdShift::lsh:
|
||||
masm.packedLeftShiftByScalar(count, out);
|
||||
|
@ -3465,9 +3452,13 @@ CodeGeneratorX86Shared::visitSimdShift(LSimdShift* ins)
|
|||
MOZ_CRASH("unexpected SIMD bitwise op");
|
||||
}
|
||||
|
||||
// Truncate val to 5 bits. We should have a temp register for that.
|
||||
MOZ_ASSERT(val->isRegister());
|
||||
Register count = ToRegister(ins->temp());
|
||||
masm.mov(ToRegister(val), count);
|
||||
masm.andl(Imm32(31), count);
|
||||
ScratchFloat32Scope scratch(masm);
|
||||
masm.vmovd(ToRegister(val), scratch);
|
||||
masm.vmovd(count, scratch);
|
||||
|
||||
switch (ins->operation()) {
|
||||
case MSimdShift::lsh:
|
||||
|
|
|
@ -14,68 +14,68 @@ var Uint32x4 = SIMD.Uint32x4;
|
|||
|
||||
// Int8 shifts.
|
||||
function lsh8(a, b) {
|
||||
return (b >>> 0) >= 8 ? 0 : (a << b) << 24 >> 24;
|
||||
return (a << (b & 7)) << 24 >> 24;
|
||||
}
|
||||
function rsha8(a, b) {
|
||||
return (a >> Math.min(b >>> 0, 7)) << 24 >> 24;
|
||||
return (a >> (b & 7)) << 24 >> 24;
|
||||
}
|
||||
function rshl8(a, b) {
|
||||
return (b >>> 0) >= 8 ? 0 : (a >>> b) << 24 >> 24;
|
||||
return (a >>> (b & 7)) << 24 >> 24;
|
||||
}
|
||||
|
||||
// Int16 shifts.
|
||||
function lsh16(a, b) {
|
||||
return (b >>> 0) >= 16 ? 0 : (a << b) << 16 >> 16;
|
||||
return (a << (b & 15)) << 16 >> 16;
|
||||
}
|
||||
function rsha16(a, b) {
|
||||
return (a >> Math.min(b >>> 0, 15)) << 16 >> 16;
|
||||
return (a >> (b & 15)) << 16 >> 16;
|
||||
}
|
||||
function rshl16(a, b) {
|
||||
return (b >>> 0) >= 16 ? 0 : (a >>> b) << 16 >> 16;
|
||||
return (a >>> (b & 15)) << 16 >> 16;
|
||||
}
|
||||
|
||||
// Int32 shifts.
|
||||
function lsh32(a, b) {
|
||||
return (b >>> 0) >= 32 ? 0 : (a << b) | 0;
|
||||
return (a << (b & 31)) | 0;
|
||||
}
|
||||
function rsha32(a, b) {
|
||||
return (a >> Math.min(b >>> 0, 31)) | 0;
|
||||
return (a >> (b & 31)) | 0;
|
||||
}
|
||||
function rshl32(a, b) {
|
||||
return (b >>> 0) >= 32 ? 0 : (a >>> b) | 0;
|
||||
return (a >>> (b & 31)) | 0;
|
||||
}
|
||||
|
||||
// Uint8 shifts.
|
||||
function ulsh8(a, b) {
|
||||
return (b >>> 0) >= 8 ? 0 : (a << b) << 24 >>> 24;
|
||||
return (a << (b & 7)) << 24 >>> 24;
|
||||
}
|
||||
function ursha8(a, b) {
|
||||
return ((a << 24 >> 24) >> Math.min(b >>> 0, 7)) << 24 >>> 24;
|
||||
return ((a << 24 >> 24) >> (b & 7)) << 24 >>> 24;
|
||||
}
|
||||
function urshl8(a, b) {
|
||||
return (b >>> 0) >= 8 ? 0 : (a >>> b) << 24 >>> 24;
|
||||
return (a >>> (b & 7)) << 24 >>> 24;
|
||||
}
|
||||
|
||||
// Uint16 shifts.
|
||||
function ulsh16(a, b) {
|
||||
return (b >>> 0) >= 16 ? 0 : (a << b) << 16 >>> 16;
|
||||
return (a << (b & 15)) << 16 >>> 16;
|
||||
}
|
||||
function ursha16(a, b) {
|
||||
return ((a << 16 >> 16) >> Math.min(b >>> 0, 15)) << 16 >>> 16;
|
||||
return ((a << 16 >> 16) >> (b & 15)) << 16 >>> 16;
|
||||
}
|
||||
function urshl16(a, b) {
|
||||
return (b >>> 0) >= 16 ? 0 : (a >>> b) << 16 >>> 16;
|
||||
return (a >>> (b & 15)) << 16 >>> 16;
|
||||
}
|
||||
|
||||
// Uint32 shifts.
|
||||
function ulsh32(a, b) {
|
||||
return (b >>> 0) >= 32 ? 0 : (a << b) >>> 0;
|
||||
return (a << (b & 31)) >>> 0;
|
||||
}
|
||||
function ursha32(a, b) {
|
||||
return ((a | 0) >> Math.min(b >>> 0, 31)) >>> 0;
|
||||
return ((a | 0) >> (b & 31)) >>> 0;
|
||||
}
|
||||
function urshl32(a, b) {
|
||||
return (b >>> 0) >= 32 ? 0 : (a >>> b) >>> 0;
|
||||
return (a >>> (b & 31)) >>> 0;
|
||||
}
|
||||
|
||||
function test() {
|
||||
|
|
Загрузка…
Ссылка в новой задаче