Bug 1115766 - SpiderMonkey: Eliminate a copy in shuffleFloat32 with AVX r=bbouvier

This commit is contained in:
Dan Gohman 2014-12-29 22:15:32 -08:00
Родитель 98e39c5921
Коммит 300a28fdf9
2 изменённых файлов: 5 добавлений и 7 удалений

Просмотреть файл

@ -2145,9 +2145,8 @@ CodeGeneratorX86Shared::visitSimdSplatX4(LSimdSplatX4 *ins)
}
case MIRType_Float32x4: {
FloatRegister r = ToFloatRegister(ins->getOperand(0));
if (r != output)
masm.moveFloat32x4(r, output);
masm.vshufps(0, output, output, output);
FloatRegister rCopy = masm.reusedInputFloat32x4(r, output);
masm.vshufps(0, rCopy, rCopy, output);
break;
}
default:

Просмотреть файл

@ -1011,12 +1011,11 @@ class MacroAssemblerX86Shared : public Assembler
// clobber the output with the input and apply the instruction
// afterwards.
// Note: this is useAtStart-safe because src isn't read afterwards.
if (src != dest)
moveFloat32x4(src, dest);
vshufps(mask, dest, dest, dest);
FloatRegister srcCopy = reusedInputFloat32x4(src, dest);
vshufps(mask, srcCopy, srcCopy, dest);
}
void shuffleMix(uint32_t mask, const Operand &src, FloatRegister dest) {
// Note this uses vshufps, which is a cross-domain penaly on CPU where it
// Note this uses vshufps, which is a cross-domain penalty on CPU where it
// applies, but that's the way clang and gcc do it.
vshufps(mask, src, dest, dest);
}