Bug 1065339 - SpiderMonkey: VEX encodings for blendps and blendvps r=jandem

This commit is contained in:
Dan Gohman 2014-12-08 18:20:30 -08:00
Родитель 18c1c30005
Коммит 042838584e
3 изменённых файлов: 71 добавлений и 18 удалений

Просмотреть файл

@ -2217,18 +2217,18 @@ class AssemblerX86Shared : public AssemblerShared
unsigned blendpsMask(bool x, bool y, bool z, bool w) {
return x | (y << 1) | (z << 2) | (w << 3);
}
void blendps(FloatRegister src, FloatRegister dest, unsigned mask) {
void vblendps(unsigned mask, FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE41());
masm.blendps_irr(mask, src.code(), dest.code());
masm.vblendps_irr(mask, src1.code(), src0.code(), dest.code());
}
void blendps(const Operand &src, FloatRegister dest, unsigned mask) {
void vblendps(unsigned mask, const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE41());
switch (src.kind()) {
switch (src1.kind()) {
case Operand::FPREG:
masm.blendps_irr(mask, src.fpu(), dest.code());
masm.vblendps_irr(mask, src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.blendps_imr(mask, src.disp(), src.base(), dest.code());
masm.vblendps_imr(mask, src1.disp(), src1.base(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");

Просмотреть файл

@ -394,20 +394,24 @@ private:
typedef enum {
OP3_ROUNDSS_VsdWsd = 0x0A,
OP3_ROUNDSD_VsdWsd = 0x0B,
OP3_BLENDVPS_VdqWdq = 0x14,
OP3_PEXTRD_EdVdqIb = 0x16,
OP3_BLENDPS_VpsWpsIb = 0x0C,
OP3_PTEST_VdVd = 0x17,
OP3_INSERTPS_VpsUps = 0x21,
OP3_PINSRD_VdqEdIb = 0x22
OP3_PINSRD_VdqEdIb = 0x22,
OP3_VBLENDVPS_VdqWdq = 0x4A
} ThreeByteOpcodeID;
typedef enum {
ESCAPE_BLENDVPS = 0x38,
ESCAPE_PTEST = 0x38,
ESCAPE_PINSRD = 0x3A,
ESCAPE_PEXTRD = 0x3A,
ESCAPE_ROUNDSD = 0x3A,
ESCAPE_INSERTPS = 0x3A,
ESCAPE_BLENDPS = 0x3A
ESCAPE_BLENDPS = 0x3A,
ESCAPE_VBLENDVPS = 0x3A
} ThreeByteEscape;
typedef enum {
@ -3765,25 +3769,26 @@ public:
m_formatter.immediate8(uint8_t(lane));
}
void blendps_irr(unsigned imm, XMMRegisterID src, XMMRegisterID dst)
void vblendps_irr(unsigned imm, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
MOZ_ASSERT(imm < 16);
spew("blendps $%x, %s, %s", imm, nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.threeByteOp(OP3_BLENDPS_VpsWpsIb, ESCAPE_BLENDPS, (RegisterID)src, (RegisterID)dst);
// Despite being a "ps" instruction, vblendps is encoded with the "pd" prefix.
threeByteOpSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_BLENDPS, src1, src0, dst);
m_formatter.immediate8(uint8_t(imm));
}
void blendps_imr(unsigned imm, int offset, RegisterID base, XMMRegisterID dst)
void vblendps_imr(unsigned imm, int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
MOZ_ASSERT(imm < 16);
spew("blendps $%x, %s0x%x(%s), %s", imm, PRETTY_PRINT_OFFSET(offset), nameIReg(base),
nameFPReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.threeByteOp(OP3_BLENDPS_VpsWpsIb, ESCAPE_BLENDPS, offset, base, (RegisterID)dst);
// Despite being a "ps" instruction, vblendps is encoded with the "pd" prefix.
threeByteOpSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_BLENDPS, offset, base, src0, dst);
m_formatter.immediate8(uint8_t(imm));
}
void vblendvps_rr(XMMRegisterID mask, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
vblendvOpSimd(mask, src1, src0, dst);
}
void movsldup_rr(XMMRegisterID src, XMMRegisterID dst)
{
spew("movsldup %s, %s", nameFPReg(src), nameFPReg(dst));
@ -4227,6 +4232,20 @@ private:
return src0 == dst;
}
bool useLegacySSEEncodingForVblendv(XMMRegisterID mask, XMMRegisterID src0, XMMRegisterID dst)
{
// Similar to useLegacySSEEncoding, but for vblendv the Legacy SSE
// encoding also requires the mask to be in xmm0.
if (!useVEX_) {
MOZ_ASSERT(src0 == dst);
MOZ_ASSERT(mask == X86Registers::xmm0);
return true;
}
return src0 == dst && mask == X86Registers::xmm0;
}
const char *legacySSEOpName(const char *name)
{
MOZ_ASSERT(name[0] == 'v');
@ -4359,6 +4378,25 @@ private:
m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst);
}
// Blendv is a three-byte op, but the VEX encoding has a different opcode
// than the SSE encoding, so we handle it specially.
void vblendvOpSimd(XMMRegisterID mask, XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst)
{
if (useLegacySSEEncodingForVblendv(mask, src0, dst)) {
spew("blendvps %s, %s", nameFPReg(rm), nameFPReg(src0));
// Even though a "ps" instruction, vblendv is encoded with the "pd" prefix.
m_formatter.legacySSEPrefix(VEX_PD);
m_formatter.threeByteOp(OP3_BLENDVPS_VdqWdq, ESCAPE_BLENDVPS, (RegisterID)rm, src0);
return;
}
spew("vblendvps %s, %s, %s, %s",
nameFPReg(mask), nameFPReg(rm), nameFPReg(src0), nameFPReg(dst));
// Even though a "ps" instruction, vblendv is encoded with the "pd" prefix.
m_formatter.vblendvOpVex(VEX_PD, OP3_VBLENDVPS_VdqWdq, ESCAPE_VBLENDVPS,
mask, (RegisterID)rm, src0, dst);
}
#ifdef JS_CODEGEN_X64
void twoByteOpSimd64(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst)
@ -4680,6 +4718,21 @@ private:
memoryModRM(offset, base, reg);
}
void vblendvOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, ThreeByteEscape escape,
XMMRegisterID mask, RegisterID rm, XMMRegisterID src0, int reg)
{
int r = (reg >> 3), x = 0, b = (rm >> 3);
int m = 0, w = 0, v = src0, l = 0;
switch (escape) {
case 0x38: m = 2; break; // 0x0F 0x38
case 0x3A: m = 3; break; // 0x0F 0x3A
default: MOZ_CRASH("unexpected escape");
}
threeOpVex(ty, r, x, b, m, w, v, l, opcode);
registerModRM(rm, reg);
immediate8(mask << 4);
}
#ifdef JS_CODEGEN_X64
// Quad-word-sized operands:
//

Просмотреть файл

@ -2357,7 +2357,7 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
// If all values stay in their lane, this is a blend.
if (AssemblerX86Shared::HasSSE41()) {
if (x % 4 == 0 && y % 4 == 1 && z % 4 == 2 && w % 4 == 3) {
masm.blendps(rhs, out, masm.blendpsMask(x >= 4, y >= 4, z >= 4, w >= 4));
masm.vblendps(masm.blendpsMask(x >= 4, y >= 4, z >= 4, w >= 4), rhs, lhs, out);
return;
}
}