Bug 1141121 - Immediate operands to atomics, x86 and x64. r=h4writer

This commit is contained in:
Lars T Hansen 2015-03-25 10:51:12 +01:00
Родитель fb023ad02c
Коммит 50332264f3
5 изменённых файлов: 101 добавлений и 48 удалений

Просмотреть файл

@ -38,6 +38,18 @@ function f2(ia, k) {
Atomics.sub(ia, 2, 1);
}
function f4(ia, k) {
// For effect, variable value. The generated code on x86/x64
// should be one LOCK ORB. (On ARM, there should be no
// sign-extend of the current value in the cell, otherwise this is
// still a LDREX/STREX loop.)
Atomics.or(ia, 6, k);
// Ditto constant value. Here the LOCK ORB should have an
// immediate operand.
Atomics.or(ia, 6, 1);
}
function g(ia, k) {
// For its value, variable value. The generated code on x86/x64
// should be one LOCK XADDB.
@ -61,6 +73,16 @@ function g2(ia, k) {
sum += Atomics.sub(ia, 3, 1);
}
function g4(ia, k) {
// For its value, variable value. The generated code on x86/x64
// should be a loop around ORB ; CMPXCHGB
sum += Atomics.or(ia, 7, k);
// Ditto constant value. Here the ORB in the loop should have
// an immediate operand.
sum += Atomics.or(ia, 7, 1);
}
function mod(stdlib, ffi, heap) {
"use asm";
@ -92,6 +114,8 @@ for ( var i=0 ; i < 10000 ; i++ ) {
g2(i8a, i % 10);
f3(i % 10);
g3(i % 10);
f4(i8a, i % 10);
g4(i8a, i % 10);
}
assertEq(i8a[0], ((10000 + 10000*4.5) << 24) >> 24);
@ -100,3 +124,5 @@ assertEq(i8a[2], ((-10000 + -10000*4.5) << 24) >> 24);
assertEq(i8a[3], ((-10000 + -10000*4.5) << 24) >> 24);
assertEq(i8a[4], ((10000 + 10000*4.5) << 24) >> 24);
assertEq(i8a[5], ((10000 + 10000*4.5) << 24) >> 24);
assertEq(i8a[6], 15);
assertEq(i8a[7], 15);

Просмотреть файл

@ -5059,6 +5059,8 @@ class LAtomicTypedArrayElementBinop : public LInstructionHelper<1, 3, 2>
public:
LIR_HEADER(AtomicTypedArrayElementBinop)
static const int32_t valueOp = 2;
LAtomicTypedArrayElementBinop(const LAllocation &elements, const LAllocation &index,
const LAllocation &value, const LDefinition &temp1,
const LDefinition &temp2)
@ -5077,6 +5079,7 @@ class LAtomicTypedArrayElementBinop : public LInstructionHelper<1, 3, 2>
return getOperand(1);
}
const LAllocation *value() {
MOZ_ASSERT(valueOp == 2);
return getOperand(2);
}
const LDefinition *temp1() {
@ -6491,6 +6494,9 @@ class LAsmJSAtomicBinopHeap : public LInstructionHelper<1, 2, 2>
{
public:
LIR_HEADER(AsmJSAtomicBinopHeap);
static const int32_t valueOp = 1;
LAsmJSAtomicBinopHeap(const LAllocation &ptr, const LAllocation &value,
const LDefinition &temp)
{
@ -6503,6 +6509,7 @@ class LAsmJSAtomicBinopHeap : public LInstructionHelper<1, 2, 2>
return getOperand(0);
}
const LAllocation *value() {
MOZ_ASSERT(valueOp == 1);
return getOperand(1);
}
const LDefinition *temp() {

Просмотреть файл

@ -434,16 +434,13 @@ LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
//
// We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
// LOCK OR, or LOCK XOR. We can do this even for the Uint32 case.
//
// If the operand is 8-bit we shall need to use an 8-bit register
// for it on x86 systems.
if (!ins->hasUses()) {
LAllocation value;
if (useI386ByteRegisters && ins->isByteArray())
if (useI386ByteRegisters && ins->isByteArray() && !ins->value()->isConstant())
value = useFixed(ins->value(), ebx);
else
value = useRegister(ins->value());
value = useRegisterOrConstant(ins->value());
LAtomicTypedArrayElementBinopForEffect *lir =
new(alloc()) LAtomicTypedArrayElementBinopForEffect(elements, index, value);
@ -459,8 +456,7 @@ LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
// movl src, output
// lock xaddl output, mem
//
// For the 8-bit variants XADD needs a byte register for the
// output only.
// For the 8-bit variants XADD needs a byte register for the output.
//
// For AND/OR/XOR we need to use a CMPXCHG loop:
//
@ -484,23 +480,18 @@ LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
// - eax is the first temp
// - we also need a second temp
//
// For simplicity we force the 'value' into a byte register if the
// array has 1-byte elements, though that could be worked around.
//
// For simplicity we also choose fixed byte registers even when
// any available byte register would have been OK.
//
// There are optimization opportunities:
// - better register allocation and instruction selection, Bug #1077036.
// - better register allocation in the x86 8-bit case, Bug #1077036.
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
bool fixedOutput = true;
bool reuseInput = false;
LDefinition tempDef1 = LDefinition::BogusTemp();
LDefinition tempDef2 = LDefinition::BogusTemp();
LAllocation value;
if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
value = useRegister(ins->value());
value = useRegisterOrConstant(ins->value());
fixedOutput = false;
if (bitOp) {
tempDef1 = tempFixed(eax);
@ -509,13 +500,22 @@ LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
tempDef1 = temp();
}
} else if (useI386ByteRegisters && ins->isByteArray()) {
value = useFixed(ins->value(), ebx);
if (ins->value()->isConstant())
value = useRegisterOrConstant(ins->value());
else
value = useFixed(ins->value(), ebx);
if (bitOp)
tempDef1 = tempFixed(ecx);
} else if (bitOp) {
value = useRegisterOrConstant(ins->value());
tempDef1 = temp();
} else if (ins->value()->isConstant()) {
fixedOutput = false;
value = useRegisterOrConstant(ins->value());
} else {
value = useRegister(ins->value());
if (bitOp)
tempDef1 = temp();
fixedOutput = false;
reuseInput = true;
value = useRegisterAtStart(ins->value());
}
LAtomicTypedArrayElementBinop *lir =
@ -523,6 +523,8 @@ LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
if (fixedOutput)
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
else if (reuseInput)
defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp);
else
define(lir, ins);
}

Просмотреть файл

@ -233,42 +233,56 @@ LIRGeneratorX64::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
if (!ins->hasUses()) {
LAsmJSAtomicBinopHeapForEffect *lir =
new(alloc()) LAsmJSAtomicBinopHeapForEffect(useRegister(ptr),
useRegister(ins->value()));
useRegisterOrConstant(ins->value()));
add(lir, ins);
return;
}
// Case 2: the result of the operation is used.
//
// For ADD and SUB we'll use XADD (with word and byte ops as appropriate):
// For ADD and SUB we'll use XADD with word and byte ops as
// appropriate. Any output register can be used and if value is a
// register it's best if it's the same as output:
//
// movl value, output
// movl value, output ; if value != output
// lock xaddl output, mem
//
// For AND/OR/XOR we need to use a CMPXCHG loop:
// For AND/OR/XOR we need to use a CMPXCHG loop, and the output is
// always in rax:
//
// movl *mem, eax
// L: mov eax, temp
// movl *mem, rax
// L: mov rax, temp
// andl value, temp
// lock cmpxchg temp, mem ; reads eax also
// lock cmpxchg temp, mem ; reads rax also
// jnz L
// ; result in eax
// ; result in rax
//
// Note the placement of L, cmpxchg will update eax with *mem if
// Note the placement of L, cmpxchg will update rax with *mem if
// *mem does not have the expected value, so reloading it at the
// top of the loop would be redundant.
//
// We want to fix eax as the output. We also need a temp for
// the intermediate value.
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
LAllocation value = useRegister(ins->value());
LDefinition tempDef = bitOp ? temp() : LDefinition::BogusTemp();
bool reuseInput = false;
LAllocation value;
if (bitOp || ins->value()->isConstant()) {
value = useRegisterOrConstant(ins->value());
} else {
reuseInput = true;
value = useRegisterAtStart(ins->value());
}
LAsmJSAtomicBinopHeap *lir =
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr),
value,
bitOp ? temp() : LDefinition::BogusTemp());
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
if (reuseInput)
defineReuseInput(lir, ins, LAsmJSAtomicBinopHeap::valueOp);
else if (bitOp)
defineFixed(lir, ins, LAllocation(AnyRegister(rax)));
else
define(lir, ins);
}
void

Просмотреть файл

@ -317,13 +317,14 @@ LIRGeneratorX86::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
// Case 1: the result of the operation is not used.
//
// We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
// LOCK OR, or LOCK XOR.
//
// For the 8-bit variant the ops need a byte register for the
// value; just pin the value to ebx.
// LOCK OR, or LOCK XOR. These can all take an immediate.
if (!ins->hasUses()) {
LAllocation value = byteArray ? useFixed(ins->value(), ebx) : useRegister(ins->value());
LAllocation value;
if (byteArray && !ins->value()->isConstant())
value = useFixed(ins->value(), ebx);
else
value = useRegisterOrConstant(ins->value());
LAsmJSAtomicBinopHeapForEffect *lir =
new(alloc()) LAsmJSAtomicBinopHeapForEffect(useRegister(ptr), value);
lir->setAddrTemp(temp());
@ -361,31 +362,34 @@ LIRGeneratorX86::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
// For the 8-bit variants the temp must have a byte register.
//
// There are optimization opportunities:
// - better register allocation and instruction selection, Bug #1077036.
// - better 8-bit register allocation and instruction selection, Bug #1077036.
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
LDefinition tempDef = LDefinition::BogusTemp();
LAllocation value;
// Optimization opportunity: "value" need not be pinned to something that
// has a byte register unless the back-end insists on using a byte move
// for the setup or the payload computation, which really it need not do.
if (byteArray) {
value = useFixed(ins->value(), ebx);
if (bitOp)
tempDef = tempFixed(ecx);
} else {
value = useRegister(ins->value());
} else if (bitOp || ins->value()->isConstant()) {
value = useRegisterOrConstant(ins->value());
if (bitOp)
tempDef = temp();
} else {
value = useRegisterAtStart(ins->value());
}
LAsmJSAtomicBinopHeap *lir =
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
lir->setAddrTemp(temp());
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
if (byteArray || bitOp)
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
else if (ins->value()->isConstant())
define(lir, ins);
else
defineReuseInput(lir, ins, LAsmJSAtomicBinopHeap::valueOp);
}
void