Bug 916167 - IonMonkey: Explicitly break dependencies from partial-writes to xmm registers. r=jandem

This commit is contained in:
Dan Gohman 2013-09-17 15:27:27 -07:00
Родитель 0e1d6bc554
Коммит 93e8d15780
4 изменённых файлов: 40 добавлений и 18 удалений

Просмотреть файл

@ -1365,7 +1365,7 @@ CodeGeneratorX86Shared::visitFloor(LFloor *lir)
return false;
// Test whether the input double was integer-valued.
masm.cvtsi2sd(output, scratch);
masm.convertInt32ToDouble(output, scratch);
masm.branchDouble(Assembler::DoubleEqualOrUnordered, input, scratch, &end);
// Input is not integer-valued, so we rounded off-by-one in the
@ -1453,7 +1453,7 @@ CodeGeneratorX86Shared::visitRound(LRound *lir)
return false;
// Test whether the truncated double was integer-valued.
masm.cvtsi2sd(output, scratch);
masm.convertInt32ToDouble(output, scratch);
masm.branchDouble(Assembler::DoubleEqualOrUnordered, temp, scratch, &end);
// Input is not integer-valued, so we rounded off-by-one in the

Просмотреть файл

@ -240,19 +240,38 @@ class MacroAssemblerX86Shared : public Assembler
}
void convertInt32ToDouble(const Register &src, const FloatRegister &dest) {
// cvtsi2sd and friends write only part of their output register, which
// causes slowdowns on out-of-order processors. Explicitly break
// dependencies with xorpd (and xorps elsewhere), which are handled
// specially in modern CPUs, for this purpose. See sections 8.14, 9.8,
// 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture
// document.
zeroDouble(dest);
cvtsi2sd(src, dest);
}
void convertInt32ToDouble(const Address &src, FloatRegister dest) {
convertInt32ToDouble(Operand(src), dest);
}
void convertInt32ToDouble(const Operand &src, FloatRegister dest) {
// Clear the output register first to break dependencies; see above;
zeroDouble(dest);
cvtsi2sd(Operand(src), dest);
}
void convertInt32ToFloat32(const Register &src, const FloatRegister &dest) {
// Clear the output register first to break dependencies; see above;
zeroFloat32(dest);
cvtsi2ss(src, dest);
}
void convertInt32ToFloat32(const Address &src, FloatRegister dest) {
cvtsi2ss(Operand(src), dest);
convertInt32ToFloat32(Operand(src), dest);
}
void convertInt32ToFloat32(const Operand &src, FloatRegister dest) {
// Clear the output register first to break dependencies; see above;
zeroFloat32(dest);
cvtsi2ss(src, dest);
}
Condition testDoubleTruthy(bool truthy, const FloatRegister &reg) {
xorpd(ScratchFloatReg, ScratchFloatReg);
zeroDouble(ScratchFloatReg);
ucomisd(ScratchFloatReg, reg);
return truthy ? NonZero : Zero;
}
@ -325,6 +344,9 @@ class MacroAssemblerX86Shared : public Assembler
void zeroDouble(FloatRegister reg) {
xorpd(reg, reg);
}
void zeroFloat32(FloatRegister reg) {
xorps(reg, reg);
}
void negateDouble(FloatRegister reg) {
// From MacroAssemblerX86Shared::maybeInlineDouble
pcmpeqw(ScratchFloatReg, ScratchFloatReg);

Просмотреть файл

@ -984,7 +984,7 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared
if (dest.isFloat()) {
Label notInt32, end;
branchTestInt32(Assembler::NotEqual, src, &notInt32);
cvtsi2sd(src.valueReg(), dest.fpu());
convertInt32ToDouble(src.valueReg(), dest.fpu());
jump(&end);
bind(&notInt32);
unboxDouble(src, dest.fpu());
@ -996,17 +996,17 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared
// These two functions use the low 32-bits of the full value register.
void boolValueToDouble(const ValueOperand &operand, const FloatRegister &dest) {
cvtsi2sd(operand.valueReg(), dest);
convertInt32ToDouble(operand.valueReg(), dest);
}
void int32ValueToDouble(const ValueOperand &operand, const FloatRegister &dest) {
cvtsi2sd(operand.valueReg(), dest);
convertInt32ToDouble(operand.valueReg(), dest);
}
void boolValueToFloat32(const ValueOperand &operand, const FloatRegister &dest) {
cvtsi2ss(operand.valueReg(), dest);
convertInt32ToFloat32(operand.valueReg(), dest);
}
void int32ValueToFloat32(const ValueOperand &operand, const FloatRegister &dest) {
cvtsi2ss(operand.valueReg(), dest);
convertInt32ToFloat32(operand.valueReg(), dest);
}
void loadConstantDouble(double d, const FloatRegister &dest);
@ -1053,7 +1053,7 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared
void loadInt32OrDouble(const Operand &operand, const FloatRegister &dest) {
Label notInt32, end;
branchTestInt32(Assembler::NotEqual, operand, &notInt32);
cvtsi2sd(operand, dest);
convertInt32ToDouble(operand, dest);
jump(&end);
bind(&notInt32);
movsd(operand, dest);

Просмотреть файл

@ -808,7 +808,7 @@ class MacroAssemblerX86 : public MacroAssemblerX86Shared
if (dest.isFloat()) {
Label notInt32, end;
branchTestInt32(Assembler::NotEqual, src, &notInt32);
cvtsi2sd(src.payloadReg(), dest.fpu());
convertInt32ToDouble(src.payloadReg(), dest.fpu());
jump(&end);
bind(&notInt32);
unboxDouble(src, dest.fpu());
@ -852,16 +852,16 @@ class MacroAssemblerX86 : public MacroAssemblerX86Shared
}
void boolValueToDouble(const ValueOperand &operand, const FloatRegister &dest) {
cvtsi2sd(operand.payloadReg(), dest);
convertInt32ToDouble(operand.payloadReg(), dest);
}
void boolValueToFloat32(const ValueOperand &operand, const FloatRegister &dest) {
cvtsi2ss(operand.payloadReg(), dest);
convertInt32ToFloat32(operand.payloadReg(), dest);
}
void int32ValueToDouble(const ValueOperand &operand, const FloatRegister &dest) {
cvtsi2sd(operand.payloadReg(), dest);
convertInt32ToDouble(operand.payloadReg(), dest);
}
void int32ValueToFloat32(const ValueOperand &operand, const FloatRegister &dest) {
cvtsi2ss(operand.payloadReg(), dest);
convertInt32ToFloat32(operand.payloadReg(), dest);
}
void loadConstantDouble(double d, const FloatRegister &dest);
@ -903,7 +903,7 @@ class MacroAssemblerX86 : public MacroAssemblerX86Shared
void loadInt32OrDouble(const Operand &operand, const FloatRegister &dest) {
Label notInt32, end;
branchTestInt32(Assembler::NotEqual, operand, &notInt32);
cvtsi2sd(ToPayload(operand), dest);
convertInt32ToDouble(ToPayload(operand), dest);
jump(&end);
bind(&notInt32);
movsd(operand, dest);
@ -953,7 +953,7 @@ class MacroAssemblerX86 : public MacroAssemblerX86Shared
subl(Imm32(0x80000000), src);
// Now src is [-2^31, 2^31-1] - int range, but not the same value.
cvtsi2sd(src, dest);
convertInt32ToDouble(src, dest);
// dest is now a double with the int range.
// correct the double value by adding 0x80000000.
@ -966,7 +966,7 @@ class MacroAssemblerX86 : public MacroAssemblerX86Shared
subl(Imm32(0x80000000), src);
// Do it the GCC way
cvtsi2ss(src, dest);
convertInt32ToFloat32(src, dest);
// dest is now a double with the int range.
// correct the double value by adding 0x80000000.