зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1141121 - Immediate operands to atomics, x86 and x64. r=h4writer
This commit is contained in:
Родитель
fb023ad02c
Коммит
50332264f3
|
@ -38,6 +38,18 @@ function f2(ia, k) {
|
|||
Atomics.sub(ia, 2, 1);
|
||||
}
|
||||
|
||||
function f4(ia, k) {
|
||||
// For effect, variable value. The generated code on x86/x64
|
||||
// should be one LOCK ORB. (On ARM, there should be no
|
||||
// sign-extend of the current value in the cell, otherwise this is
|
||||
// still a LDREX/STREX loop.)
|
||||
Atomics.or(ia, 6, k);
|
||||
|
||||
// Ditto constant value. Here the LOCK ORB should have an
|
||||
// immediate operand.
|
||||
Atomics.or(ia, 6, 1);
|
||||
}
|
||||
|
||||
function g(ia, k) {
|
||||
// For its value, variable value. The generated code on x86/x64
|
||||
// should be one LOCK XADDB.
|
||||
|
@ -61,6 +73,16 @@ function g2(ia, k) {
|
|||
sum += Atomics.sub(ia, 3, 1);
|
||||
}
|
||||
|
||||
function g4(ia, k) {
|
||||
// For its value, variable value. The generated code on x86/x64
|
||||
// should be a loop around ORB ; CMPXCHGB
|
||||
sum += Atomics.or(ia, 7, k);
|
||||
|
||||
// Ditto constant value. Here the ORB in the loop should have
|
||||
// an immediate operand.
|
||||
sum += Atomics.or(ia, 7, 1);
|
||||
}
|
||||
|
||||
function mod(stdlib, ffi, heap) {
|
||||
"use asm";
|
||||
|
||||
|
@ -92,6 +114,8 @@ for ( var i=0 ; i < 10000 ; i++ ) {
|
|||
g2(i8a, i % 10);
|
||||
f3(i % 10);
|
||||
g3(i % 10);
|
||||
f4(i8a, i % 10);
|
||||
g4(i8a, i % 10);
|
||||
}
|
||||
|
||||
assertEq(i8a[0], ((10000 + 10000*4.5) << 24) >> 24);
|
||||
|
@ -100,3 +124,5 @@ assertEq(i8a[2], ((-10000 + -10000*4.5) << 24) >> 24);
|
|||
assertEq(i8a[3], ((-10000 + -10000*4.5) << 24) >> 24);
|
||||
assertEq(i8a[4], ((10000 + 10000*4.5) << 24) >> 24);
|
||||
assertEq(i8a[5], ((10000 + 10000*4.5) << 24) >> 24);
|
||||
assertEq(i8a[6], 15);
|
||||
assertEq(i8a[7], 15);
|
||||
|
|
|
@ -5059,6 +5059,8 @@ class LAtomicTypedArrayElementBinop : public LInstructionHelper<1, 3, 2>
|
|||
public:
|
||||
LIR_HEADER(AtomicTypedArrayElementBinop)
|
||||
|
||||
static const int32_t valueOp = 2;
|
||||
|
||||
LAtomicTypedArrayElementBinop(const LAllocation &elements, const LAllocation &index,
|
||||
const LAllocation &value, const LDefinition &temp1,
|
||||
const LDefinition &temp2)
|
||||
|
@ -5077,6 +5079,7 @@ class LAtomicTypedArrayElementBinop : public LInstructionHelper<1, 3, 2>
|
|||
return getOperand(1);
|
||||
}
|
||||
const LAllocation *value() {
|
||||
MOZ_ASSERT(valueOp == 2);
|
||||
return getOperand(2);
|
||||
}
|
||||
const LDefinition *temp1() {
|
||||
|
@ -6491,6 +6494,9 @@ class LAsmJSAtomicBinopHeap : public LInstructionHelper<1, 2, 2>
|
|||
{
|
||||
public:
|
||||
LIR_HEADER(AsmJSAtomicBinopHeap);
|
||||
|
||||
static const int32_t valueOp = 1;
|
||||
|
||||
LAsmJSAtomicBinopHeap(const LAllocation &ptr, const LAllocation &value,
|
||||
const LDefinition &temp)
|
||||
{
|
||||
|
@ -6503,6 +6509,7 @@ class LAsmJSAtomicBinopHeap : public LInstructionHelper<1, 2, 2>
|
|||
return getOperand(0);
|
||||
}
|
||||
const LAllocation *value() {
|
||||
MOZ_ASSERT(valueOp == 1);
|
||||
return getOperand(1);
|
||||
}
|
||||
const LDefinition *temp() {
|
||||
|
|
|
@ -434,16 +434,13 @@ LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
|
|||
//
|
||||
// We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
|
||||
// LOCK OR, or LOCK XOR. We can do this even for the Uint32 case.
|
||||
//
|
||||
// If the operand is 8-bit we shall need to use an 8-bit register
|
||||
// for it on x86 systems.
|
||||
|
||||
if (!ins->hasUses()) {
|
||||
LAllocation value;
|
||||
if (useI386ByteRegisters && ins->isByteArray())
|
||||
if (useI386ByteRegisters && ins->isByteArray() && !ins->value()->isConstant())
|
||||
value = useFixed(ins->value(), ebx);
|
||||
else
|
||||
value = useRegister(ins->value());
|
||||
value = useRegisterOrConstant(ins->value());
|
||||
|
||||
LAtomicTypedArrayElementBinopForEffect *lir =
|
||||
new(alloc()) LAtomicTypedArrayElementBinopForEffect(elements, index, value);
|
||||
|
@ -459,8 +456,7 @@ LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
|
|||
// movl src, output
|
||||
// lock xaddl output, mem
|
||||
//
|
||||
// For the 8-bit variants XADD needs a byte register for the
|
||||
// output only.
|
||||
// For the 8-bit variants XADD needs a byte register for the output.
|
||||
//
|
||||
// For AND/OR/XOR we need to use a CMPXCHG loop:
|
||||
//
|
||||
|
@ -484,23 +480,18 @@ LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
|
|||
// - eax is the first temp
|
||||
// - we also need a second temp
|
||||
//
|
||||
// For simplicity we force the 'value' into a byte register if the
|
||||
// array has 1-byte elements, though that could be worked around.
|
||||
//
|
||||
// For simplicity we also choose fixed byte registers even when
|
||||
// any available byte register would have been OK.
|
||||
//
|
||||
// There are optimization opportunities:
|
||||
// - better register allocation and instruction selection, Bug #1077036.
|
||||
// - better register allocation in the x86 8-bit case, Bug #1077036.
|
||||
|
||||
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
|
||||
bool fixedOutput = true;
|
||||
bool reuseInput = false;
|
||||
LDefinition tempDef1 = LDefinition::BogusTemp();
|
||||
LDefinition tempDef2 = LDefinition::BogusTemp();
|
||||
LAllocation value;
|
||||
|
||||
if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
|
||||
value = useRegister(ins->value());
|
||||
value = useRegisterOrConstant(ins->value());
|
||||
fixedOutput = false;
|
||||
if (bitOp) {
|
||||
tempDef1 = tempFixed(eax);
|
||||
|
@ -509,13 +500,22 @@ LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
|
|||
tempDef1 = temp();
|
||||
}
|
||||
} else if (useI386ByteRegisters && ins->isByteArray()) {
|
||||
value = useFixed(ins->value(), ebx);
|
||||
if (ins->value()->isConstant())
|
||||
value = useRegisterOrConstant(ins->value());
|
||||
else
|
||||
value = useFixed(ins->value(), ebx);
|
||||
if (bitOp)
|
||||
tempDef1 = tempFixed(ecx);
|
||||
} else if (bitOp) {
|
||||
value = useRegisterOrConstant(ins->value());
|
||||
tempDef1 = temp();
|
||||
} else if (ins->value()->isConstant()) {
|
||||
fixedOutput = false;
|
||||
value = useRegisterOrConstant(ins->value());
|
||||
} else {
|
||||
value = useRegister(ins->value());
|
||||
if (bitOp)
|
||||
tempDef1 = temp();
|
||||
fixedOutput = false;
|
||||
reuseInput = true;
|
||||
value = useRegisterAtStart(ins->value());
|
||||
}
|
||||
|
||||
LAtomicTypedArrayElementBinop *lir =
|
||||
|
@ -523,6 +523,8 @@ LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
|
|||
|
||||
if (fixedOutput)
|
||||
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
|
||||
else if (reuseInput)
|
||||
defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp);
|
||||
else
|
||||
define(lir, ins);
|
||||
}
|
||||
|
|
|
@ -233,42 +233,56 @@ LIRGeneratorX64::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
|
|||
if (!ins->hasUses()) {
|
||||
LAsmJSAtomicBinopHeapForEffect *lir =
|
||||
new(alloc()) LAsmJSAtomicBinopHeapForEffect(useRegister(ptr),
|
||||
useRegister(ins->value()));
|
||||
useRegisterOrConstant(ins->value()));
|
||||
add(lir, ins);
|
||||
return;
|
||||
}
|
||||
|
||||
// Case 2: the result of the operation is used.
|
||||
//
|
||||
// For ADD and SUB we'll use XADD (with word and byte ops as appropriate):
|
||||
// For ADD and SUB we'll use XADD with word and byte ops as
|
||||
// appropriate. Any output register can be used and if value is a
|
||||
// register it's best if it's the same as output:
|
||||
//
|
||||
// movl value, output
|
||||
// movl value, output ; if value != output
|
||||
// lock xaddl output, mem
|
||||
//
|
||||
// For AND/OR/XOR we need to use a CMPXCHG loop:
|
||||
// For AND/OR/XOR we need to use a CMPXCHG loop, and the output is
|
||||
// always in rax:
|
||||
//
|
||||
// movl *mem, eax
|
||||
// L: mov eax, temp
|
||||
// movl *mem, rax
|
||||
// L: mov rax, temp
|
||||
// andl value, temp
|
||||
// lock cmpxchg temp, mem ; reads eax also
|
||||
// lock cmpxchg temp, mem ; reads rax also
|
||||
// jnz L
|
||||
// ; result in eax
|
||||
// ; result in rax
|
||||
//
|
||||
// Note the placement of L, cmpxchg will update eax with *mem if
|
||||
// Note the placement of L, cmpxchg will update rax with *mem if
|
||||
// *mem does not have the expected value, so reloading it at the
|
||||
// top of the loop would be redundant.
|
||||
//
|
||||
// We want to fix eax as the output. We also need a temp for
|
||||
// the intermediate value.
|
||||
|
||||
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
|
||||
LAllocation value = useRegister(ins->value());
|
||||
LDefinition tempDef = bitOp ? temp() : LDefinition::BogusTemp();
|
||||
bool reuseInput = false;
|
||||
LAllocation value;
|
||||
|
||||
if (bitOp || ins->value()->isConstant()) {
|
||||
value = useRegisterOrConstant(ins->value());
|
||||
} else {
|
||||
reuseInput = true;
|
||||
value = useRegisterAtStart(ins->value());
|
||||
}
|
||||
|
||||
LAsmJSAtomicBinopHeap *lir =
|
||||
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
|
||||
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr),
|
||||
value,
|
||||
bitOp ? temp() : LDefinition::BogusTemp());
|
||||
|
||||
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
|
||||
if (reuseInput)
|
||||
defineReuseInput(lir, ins, LAsmJSAtomicBinopHeap::valueOp);
|
||||
else if (bitOp)
|
||||
defineFixed(lir, ins, LAllocation(AnyRegister(rax)));
|
||||
else
|
||||
define(lir, ins);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -317,13 +317,14 @@ LIRGeneratorX86::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
|
|||
// Case 1: the result of the operation is not used.
|
||||
//
|
||||
// We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
|
||||
// LOCK OR, or LOCK XOR.
|
||||
//
|
||||
// For the 8-bit variant the ops need a byte register for the
|
||||
// value; just pin the value to ebx.
|
||||
// LOCK OR, or LOCK XOR. These can all take an immediate.
|
||||
|
||||
if (!ins->hasUses()) {
|
||||
LAllocation value = byteArray ? useFixed(ins->value(), ebx) : useRegister(ins->value());
|
||||
LAllocation value;
|
||||
if (byteArray && !ins->value()->isConstant())
|
||||
value = useFixed(ins->value(), ebx);
|
||||
else
|
||||
value = useRegisterOrConstant(ins->value());
|
||||
LAsmJSAtomicBinopHeapForEffect *lir =
|
||||
new(alloc()) LAsmJSAtomicBinopHeapForEffect(useRegister(ptr), value);
|
||||
lir->setAddrTemp(temp());
|
||||
|
@ -361,31 +362,34 @@ LIRGeneratorX86::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
|
|||
// For the 8-bit variants the temp must have a byte register.
|
||||
//
|
||||
// There are optimization opportunities:
|
||||
// - better register allocation and instruction selection, Bug #1077036.
|
||||
// - better 8-bit register allocation and instruction selection, Bug #1077036.
|
||||
|
||||
bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
|
||||
LDefinition tempDef = LDefinition::BogusTemp();
|
||||
LAllocation value;
|
||||
|
||||
// Optimization opportunity: "value" need not be pinned to something that
|
||||
// has a byte register unless the back-end insists on using a byte move
|
||||
// for the setup or the payload computation, which really it need not do.
|
||||
|
||||
if (byteArray) {
|
||||
value = useFixed(ins->value(), ebx);
|
||||
if (bitOp)
|
||||
tempDef = tempFixed(ecx);
|
||||
} else {
|
||||
value = useRegister(ins->value());
|
||||
} else if (bitOp || ins->value()->isConstant()) {
|
||||
value = useRegisterOrConstant(ins->value());
|
||||
if (bitOp)
|
||||
tempDef = temp();
|
||||
} else {
|
||||
value = useRegisterAtStart(ins->value());
|
||||
}
|
||||
|
||||
LAsmJSAtomicBinopHeap *lir =
|
||||
new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
|
||||
|
||||
lir->setAddrTemp(temp());
|
||||
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
|
||||
if (byteArray || bitOp)
|
||||
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
|
||||
else if (ins->value()->isConstant())
|
||||
define(lir, ins);
|
||||
else
|
||||
defineReuseInput(lir, ins, LAsmJSAtomicBinopHeap::valueOp);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
Загрузка…
Ссылка в новой задаче