зеркало из https://github.com/mozilla/pjs.git
implement NJ_EXPANDED_LOADSTORE_SUPPORTED for x64 backend (r=edwsmith,r=nnethercote,bug=532240)
--HG-- extra : convert_revision : cd0b46495c9520568c6507766dfdcb5fbf24d482
This commit is contained in:
Родитель
741731ac0b
Коммит
41c2d79462
|
@ -99,6 +99,11 @@ namespace nanojit
|
|||
"ah", "ch", "dh", "bh"
|
||||
};
|
||||
|
||||
const char *gpRegNames16[] = {
|
||||
"ax", "cx", "dx", "bx", "spx", "bpx", "six", "dix",
|
||||
"r8x", "r9x", "r10x", "r11x", "r12x", "r13x", "r14x", "r15x"
|
||||
};
|
||||
|
||||
#ifdef _DEBUG
|
||||
#define TODO(x) todo(#x)
|
||||
static void todo(const char *s) {
|
||||
|
@ -240,6 +245,11 @@ namespace nanojit
|
|||
emit(rexprb(mod_rr(op, r, b), r, b));
|
||||
}
|
||||
|
||||
// disp32 modrm8 form, when the disp fits in the instruction (opcode is 1-3 bytes)
|
||||
void Assembler::emitrm8(uint64_t op, Register r, int32_t d, Register b) {
|
||||
emit(rexrb8(mod_disp32(op, r, b, d), r, b));
|
||||
}
|
||||
|
||||
// disp32 modrm form, when the disp fits in the instruction (opcode is 1-3 bytes)
|
||||
void Assembler::emitrm(uint64_t op, Register r, int32_t d, Register b) {
|
||||
emit(rexrb(mod_disp32(op, r, b, d), r, b));
|
||||
|
@ -343,6 +353,7 @@ namespace nanojit
|
|||
}
|
||||
|
||||
#define RB(r) gpRegNames8[(r)]
|
||||
#define RS(r) gpRegNames16[(r)]
|
||||
#define RBhi(r) gpRegNames8hi[(r)]
|
||||
#define RL(r) gpRegNames32[(r)]
|
||||
#define RQ(r) gpn(r)
|
||||
|
@ -439,6 +450,8 @@ namespace nanojit
|
|||
void Assembler::SUBSD( R l, R r) { emitprr(X64_subsd, l,r); asm_output("subsd %s, %s", RQ(l),RQ(r)); }
|
||||
void Assembler::CVTSQ2SD(R l, R r) { emitprr(X64_cvtsq2sd,l,r); asm_output("cvtsq2sd %s, %s",RQ(l),RQ(r)); }
|
||||
void Assembler::CVTSI2SD(R l, R r) { emitprr(X64_cvtsi2sd,l,r); asm_output("cvtsi2sd %s, %s",RQ(l),RL(r)); }
|
||||
void Assembler::CVTSS2SD(R l, R r) { emitprr(X64_cvtss2sd,l,r); asm_output("cvtss2sd %s, %s",RQ(l),RL(r)); }
|
||||
void Assembler::CVTSD2SS(R l, R r) { emitprr(X64_cvtsd2ss,l,r); asm_output("cvtsd2ss %s, %s",RL(l),RQ(r)); }
|
||||
void Assembler::UCOMISD( R l, R r) { emitprr(X64_ucomisd, l,r); asm_output("ucomisd %s, %s", RQ(l),RQ(r)); }
|
||||
void Assembler::MOVQRX( R l, R r) { emitprr(X64_movqrx, r,l); asm_output("movq %s, %s", RQ(l),RQ(r)); } // Nb: r and l are deliberately reversed within the emitprr() call.
|
||||
void Assembler::MOVQXR( R l, R r) { emitprr(X64_movqxr, l,r); asm_output("movq %s, %s", RQ(l),RQ(r)); }
|
||||
|
@ -483,14 +496,21 @@ namespace nanojit
|
|||
void Assembler::LEAQRM(R r1, I d, R r2) { emitrm(X64_leaqrm,r1,d,r2); asm_output("leaq %s, %d(%s)",RQ(r1),d,RQ(r2)); }
|
||||
void Assembler::MOVLRM(R r1, I d, R r2) { emitrm(X64_movlrm,r1,d,r2); asm_output("movl %s, %d(%s)",RL(r1),d,RQ(r2)); }
|
||||
void Assembler::MOVQRM(R r1, I d, R r2) { emitrm(X64_movqrm,r1,d,r2); asm_output("movq %s, %d(%s)",RQ(r1),d,RQ(r2)); }
|
||||
void Assembler::MOVBMR(R r1, I d, R r2) { emitrm8(X64_movbmr,r1,d,r2); asm_output("movb %d(%s), %s",d,RQ(r1),RB(r2)); }
|
||||
void Assembler::MOVSMR(R r1, I d, R r2) { emitprm(X64_movsmr,r1,d,r2); asm_output("movs %d(%s), %s",d,RQ(r1),RS(r2)); }
|
||||
void Assembler::MOVLMR(R r1, I d, R r2) { emitrm(X64_movlmr,r1,d,r2); asm_output("movl %d(%s), %s",d,RQ(r1),RL(r2)); }
|
||||
void Assembler::MOVQMR(R r1, I d, R r2) { emitrm(X64_movqmr,r1,d,r2); asm_output("movq %d(%s), %s",d,RQ(r1),RQ(r2)); }
|
||||
|
||||
void Assembler::MOVZX8M( R r1, I d, R r2) { emitrm_wide(X64_movzx8m, r1,d,r2); asm_output("movzxb %s, %d(%s)",RQ(r1),d,RQ(r2)); }
|
||||
void Assembler::MOVZX16M(R r1, I d, R r2) { emitrm_wide(X64_movzx16m,r1,d,r2); asm_output("movzxs %s, %d(%s)",RQ(r1),d,RQ(r2)); }
|
||||
|
||||
void Assembler::MOVSX8M( R r1, I d, R r2) { emitrm_wide(X64_movsx8m, r1,d,r2); asm_output("movsxb %s, %d(%s)",RQ(r1),d,RQ(r2)); }
|
||||
void Assembler::MOVSX16M(R r1, I d, R r2) { emitrm_wide(X64_movsx16m,r1,d,r2); asm_output("movsxs %s, %d(%s)",RQ(r1),d,RQ(r2)); }
|
||||
|
||||
void Assembler::MOVSDRM(R r1, I d, R r2) { emitprm(X64_movsdrm,r1,d,r2); asm_output("movsd %s, %d(%s)",RQ(r1),d,RQ(r2)); }
|
||||
void Assembler::MOVSDMR(R r1, I d, R r2) { emitprm(X64_movsdmr,r1,d,r2); asm_output("movsd %d(%s), %s",d,RQ(r1),RQ(r2)); }
|
||||
void Assembler::MOVSSRM(R r1, I d, R r2) { emitprm(X64_movssrm,r1,d,r2); asm_output("movss %s, %d(%s)",RQ(r1),d,RQ(r2)); }
|
||||
void Assembler::MOVSSMR(R r1, I d, R r2) { emitprm(X64_movssmr,r1,d,r2); asm_output("movss %d(%s), %s",d,RQ(r1),RQ(r2)); }
|
||||
|
||||
void Assembler::JMP8( S n, NIns* t) { emit_target8(n, X64_jmp8,t); asm_output("jmp %p", t); }
|
||||
|
||||
|
@ -1339,58 +1359,62 @@ namespace nanojit
|
|||
// xmm <- xmm: use movaps. movsd r,r causes partial register stall
|
||||
MOVAPSR(d, s);
|
||||
} else {
|
||||
NanoAssert(IsFpReg(d) && !IsFpReg(s));
|
||||
// xmm <- gpr: use movq xmm, r/m64 (66 REX.W 0F 6E /r)
|
||||
MOVQXR(d, s);
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::regalloc_load(LIns *ins, Register &rr, int32_t &dr, Register &rb) {
|
||||
void Assembler::regalloc_load(LIns *ins, RegisterMask allow, Register &rr, int32_t &dr, Register &rb) {
|
||||
dr = ins->disp();
|
||||
LIns *base = ins->oprnd1();
|
||||
rb = getBaseReg(ins->opcode(), base, dr, BaseRegs);
|
||||
if (ins->isUnusedOrHasUnknownReg()) {
|
||||
// use a gpr in case we're copying a non-double
|
||||
rr = prepResultReg(ins, GpRegs & ~rmask(rb));
|
||||
if (ins->isUnusedOrHasUnknownReg() || !(allow & rmask(ins->getReg()))) {
|
||||
rr = prepResultReg(ins, allow & ~rmask(rb));
|
||||
} else {
|
||||
// keep already assigned register
|
||||
rr = ins->getReg();
|
||||
NanoAssert(allow & rmask(rr));
|
||||
freeRsrcOf(ins, false);
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::asm_load64(LIns *ins) {
|
||||
|
||||
Register rr, rb;
|
||||
int32_t dr;
|
||||
switch (ins->opcode()) {
|
||||
case LIR_ldq:
|
||||
case LIR_ldqc:
|
||||
// handled by mainline code below for now
|
||||
regalloc_load(ins, GpRegs, rr, dr, rb);
|
||||
if (IsGpReg(rr)) {
|
||||
// general 64bit load, 32bit const displacement
|
||||
MOVQRM(rr, dr, rb);
|
||||
} else {
|
||||
NanoAssert(IsFpReg(rr));
|
||||
// load 64bits into XMM. don't know if double or int64, assume double.
|
||||
MOVSDRM(rr, dr, rb);
|
||||
}
|
||||
break;
|
||||
case LIR_ld32f:
|
||||
case LIR_ldc32f:
|
||||
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
|
||||
return;
|
||||
regalloc_load(ins, FpRegs, rr, dr, rb);
|
||||
NanoAssert(IsFpReg(rr));
|
||||
CVTSS2SD(rr, rr);
|
||||
MOVSSRM(rr, dr, rb);
|
||||
break;
|
||||
default:
|
||||
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
|
||||
return;
|
||||
break;
|
||||
}
|
||||
|
||||
Register rr, rb;
|
||||
int32_t dr;
|
||||
regalloc_load(ins, rr, dr, rb);
|
||||
if (IsGpReg(rr)) {
|
||||
// general 64bit load, 32bit const displacement
|
||||
MOVQRM(rr, dr, rb);
|
||||
} else {
|
||||
// load 64bits into XMM. don't know if double or int64, assume double.
|
||||
MOVSDRM(rr, dr, rb);
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::asm_load32(LIns *ins) {
|
||||
NanoAssert(!ins->isQuad());
|
||||
Register r, b;
|
||||
int32_t d;
|
||||
regalloc_load(ins, r, d, b);
|
||||
regalloc_load(ins, GpRegs, r, d, b);
|
||||
LOpcode op = ins->opcode();
|
||||
switch(op) {
|
||||
case LIR_ldzb:
|
||||
|
@ -1406,40 +1430,32 @@ namespace nanojit
|
|||
MOVLRM( r, d, b);
|
||||
break;
|
||||
case LIR_ldsb:
|
||||
case LIR_ldss:
|
||||
case LIR_ldcsb:
|
||||
MOVSX8M( r, d, b);
|
||||
break;
|
||||
case LIR_ldss:
|
||||
case LIR_ldcss:
|
||||
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
|
||||
return;
|
||||
MOVSX16M( r, d, b);
|
||||
break;
|
||||
default:
|
||||
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
|
||||
return;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::asm_store64(LOpcode op, LIns *value, int d, LIns *base) {
|
||||
NanoAssert(value->isQuad());
|
||||
|
||||
switch (op) {
|
||||
case LIR_stqi:
|
||||
// handled by mainline code below for now
|
||||
break;
|
||||
case LIR_st32f:
|
||||
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
|
||||
return;
|
||||
default:
|
||||
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
|
||||
return;
|
||||
}
|
||||
|
||||
Register b = getBaseReg(LIR_stqi, base, d, BaseRegs);
|
||||
Register r;
|
||||
|
||||
// if we have to choose a register, use a GPR, but not the base reg
|
||||
Register r;
|
||||
if (value->isUnusedOrHasUnknownReg()) {
|
||||
RegisterMask allow;
|
||||
// If op is LIR_st32f and we have no reg, prefer FPR over GPR: saves an instruction later,
|
||||
// and the value is almost certainly going to operated on as FP later anyway.
|
||||
// XXX: isFloat doesn't cover float/fmod! see bug 520208.
|
||||
if (value->isFloat() || value->isop(LIR_float) || value->isop(LIR_fmod)) {
|
||||
if (op == LIR_st32f || value->isFloat() || value->isop(LIR_float) || value->isop(LIR_fmod)) {
|
||||
allow = FpRegs;
|
||||
} else {
|
||||
allow = GpRegs;
|
||||
|
@ -1449,37 +1465,76 @@ namespace nanojit
|
|||
r = value->getReg();
|
||||
}
|
||||
|
||||
if (IsGpReg(r)) {
|
||||
// gpr store
|
||||
MOVQMR(r, d, b);
|
||||
}
|
||||
else {
|
||||
// xmm store
|
||||
MOVSDMR(r, d, b);
|
||||
switch (op) {
|
||||
case LIR_stqi:
|
||||
{
|
||||
if (IsGpReg(r)) {
|
||||
// gpr store
|
||||
MOVQMR(r, d, b);
|
||||
}
|
||||
else {
|
||||
// xmm store
|
||||
MOVSDMR(r, d, b);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LIR_st32f:
|
||||
{
|
||||
// need a scratch FPR reg
|
||||
Register t = registerAllocTmp(FpRegs & ~rmask(r));
|
||||
|
||||
// store
|
||||
MOVSSMR(t, d, b);
|
||||
|
||||
// cvt to single-precision
|
||||
if (IsGpReg(r))
|
||||
{
|
||||
CVTSD2SS(t, t);
|
||||
MOVQXR(t, r); // xmm <- gpr: use movq xmm, r/m64 (66 REX.W 0F 6E /r)
|
||||
}
|
||||
else
|
||||
{
|
||||
NanoAssert(IsFpReg(r));
|
||||
CVTSD2SS(t, r);
|
||||
}
|
||||
XORPS(t); // break dependency chains
|
||||
break;
|
||||
}
|
||||
default:
|
||||
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::asm_store32(LOpcode op, LIns *value, int d, LIns *base) {
|
||||
|
||||
switch (op) {
|
||||
case LIR_sti:
|
||||
// handled by mainline code below for now
|
||||
break;
|
||||
case LIR_stb:
|
||||
case LIR_sts:
|
||||
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
|
||||
return;
|
||||
default:
|
||||
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
|
||||
return;
|
||||
}
|
||||
// quirk of x86-64: reg cannot appear to be ah/bh/ch/dh
|
||||
// for single-byte stores with REX prefix
|
||||
const RegisterMask SrcRegs =
|
||||
(op == LIR_stb) ?
|
||||
(GpRegs & ~(1<<RSP | 1<<RBP | 1<<RSI | 1<<RDI)) :
|
||||
GpRegs;
|
||||
|
||||
NanoAssert(!value->isQuad());
|
||||
Register b = getBaseReg(LIR_sti, base, d, BaseRegs);
|
||||
Register r = findRegFor(value, GpRegs & ~rmask(b));
|
||||
Register r = findRegFor(value, SrcRegs & ~rmask(b));
|
||||
|
||||
switch (op) {
|
||||
case LIR_stb:
|
||||
MOVBMR(r, d, b);
|
||||
break;
|
||||
case LIR_sts:
|
||||
MOVSMR(r, d, b);
|
||||
break;
|
||||
case LIR_sti:
|
||||
MOVLMR(r, d, b);
|
||||
break;
|
||||
default:
|
||||
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
// store 32bits to 64bit addr. use rex so we can use all 16 regs
|
||||
MOVLMR(r, d, b);
|
||||
}
|
||||
|
||||
// generate a 64bit constant, must not affect condition codes!
|
||||
|
|
|
@ -61,7 +61,7 @@ namespace nanojit
|
|||
#define NJ_MAX_STACK_ENTRY 256
|
||||
#define NJ_ALIGN_STACK 16
|
||||
#define NJ_JTBL_SUPPORTED 1
|
||||
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 0
|
||||
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
|
||||
|
||||
enum Register {
|
||||
RAX = 0, // 1st int return, # of sse varargs
|
||||
|
@ -191,6 +191,8 @@ namespace nanojit
|
|||
X64_cmpqr8 = 0x00F8834800000004LL, // 64bit compare r,int64(imm8)
|
||||
X64_cvtsi2sd= 0xC02A0F40F2000005LL, // convert int32 to double r = (double) b
|
||||
X64_cvtsq2sd= 0xC02A0F48F2000005LL, // convert int64 to double r = (double) b
|
||||
X64_cvtss2sd= 0xC05A0F40F3000005LL, // convert float to double r = (double) b
|
||||
X64_cvtsd2ss= 0xC05A0F40F2000005LL, // convert double to float r = (float) b
|
||||
X64_divsd = 0xC05E0F40F2000005LL, // divide scalar double r /= b
|
||||
X64_mulsd = 0xC0590F40F2000005LL, // multiply scalar double r *= b
|
||||
X64_addsd = 0xC0580F40F2000005LL, // add scalar double r += b
|
||||
|
@ -230,6 +232,8 @@ namespace nanojit
|
|||
X64_learm = 0x00000000808D4007LL, // 32bit load effective addr reg <- disp32+base
|
||||
X64_learip = 0x00000000058D4807LL, // 64bit RIP-relative lea. reg <- disp32+rip (modrm = 00rrr101 = 05)
|
||||
X64_movlr = 0xC08B400000000003LL, // 32bit mov r <- b
|
||||
X64_movbmr = 0x0000000080884007LL, // 8bit store r -> [b+d32]
|
||||
X64_movsmr = 0x8089406600000004LL, // 16bit store r -> [b+d32]
|
||||
X64_movlmr = 0x0000000080894007LL, // 32bit store r -> [b+d32]
|
||||
X64_movlrm = 0x00000000808B4007LL, // 32bit load r <- [b+d32]
|
||||
X64_movqmr = 0x0000000080894807LL, // 64bit store gpr -> [b+d32]
|
||||
|
@ -245,10 +249,14 @@ namespace nanojit
|
|||
X64_movsdrr = 0xC0100F40F2000005LL, // 64bit mov xmm-r <- xmm-b (upper 64bits unchanged)
|
||||
X64_movsdrm = 0x80100F40F2000005LL, // 64bit load xmm-r <- [b+d32] (upper 64 cleared)
|
||||
X64_movsdmr = 0x80110F40F2000005LL, // 64bit store xmm-r -> [b+d32]
|
||||
X64_movssrm = 0x80100F40F3000005LL, // 32bit load xmm-r <- [b+d32] (upper 96 cleared)
|
||||
X64_movssmr = 0x80110F40F3000005LL, // 32bit store xmm-r -> [b+d32]
|
||||
X64_movsxdr = 0xC063480000000003LL, // sign extend i32 to i64 r = (int64)(int32) b
|
||||
X64_movzx8 = 0xC0B60F4000000004LL, // zero extend i8 to i64 r = (uint64)(uint8) b
|
||||
X64_movzx8m = 0x80B60F4000000004LL, // zero extend i8 load to i32 r <- [b+d32]
|
||||
X64_movzx16m= 0x80B70F4000000004LL, // zero extend i16 load to i32 r <- [b+d32]
|
||||
X64_movsx8m = 0x80BE0F4000000004LL, // sign extend i8 load to i32 r <- [b+d32]
|
||||
X64_movsx16m= 0x80BF0F4000000004LL, // sign extend i16 load to i32 r <- [b+d32]
|
||||
X64_neg = 0xD8F7400000000003LL, // 32bit two's compliment b = -b
|
||||
X64_nop1 = 0x9000000000000001LL, // one byte NOP
|
||||
X64_nop2 = 0x9066000000000002LL, // two byte NOP
|
||||
|
@ -359,6 +367,7 @@ namespace nanojit
|
|||
void emitr(uint64_t op, Register b) { emitrr(op, (Register)0, b); }\
|
||||
void emitr8(uint64_t op, Register b) { emitrr8(op, (Register)0, b); }\
|
||||
void emitprr(uint64_t op, Register r, Register b);\
|
||||
void emitrm8(uint64_t op, Register r, int32_t d, Register b);\
|
||||
void emitrm(uint64_t op, Register r, int32_t d, Register b);\
|
||||
void emitrm_wide(uint64_t op, Register r, int32_t d, Register b);\
|
||||
uint64_t emit_disp32(uint64_t op, int32_t d);\
|
||||
|
@ -380,7 +389,7 @@ namespace nanojit
|
|||
void asm_arith_imm(LIns*);\
|
||||
void regalloc_unary(LIns *ins, RegisterMask allow, Register &rr, Register &ra);\
|
||||
void regalloc_binary(LIns *ins, RegisterMask allow, Register &rr, Register &ra, Register &rb);\
|
||||
void regalloc_load(LIns *ins, Register &rr, int32_t &d, Register &rb);\
|
||||
void regalloc_load(LIns *ins, RegisterMask allow, Register &rr, int32_t &d, Register &rb);\
|
||||
void dis(NIns *p, int bytes);\
|
||||
void asm_cmp(LIns*);\
|
||||
void asm_cmp_imm(LIns*);\
|
||||
|
@ -460,6 +469,8 @@ namespace nanojit
|
|||
void SUBSD(Register l, Register r);\
|
||||
void CVTSQ2SD(Register l, Register r);\
|
||||
void CVTSI2SD(Register l, Register r);\
|
||||
void CVTSS2SD(Register l, Register r);\
|
||||
void CVTSD2SS(Register l, Register r);\
|
||||
void UCOMISD(Register l, Register r);\
|
||||
void MOVQRX(Register l, Register r);\
|
||||
void MOVQXR(Register l, Register r);\
|
||||
|
@ -495,12 +506,18 @@ namespace nanojit
|
|||
void LEAQRM(Register r1, int d, Register r2);\
|
||||
void MOVLRM(Register r1, int d, Register r2);\
|
||||
void MOVQRM(Register r1, int d, Register r2);\
|
||||
void MOVBMR(Register r1, int d, Register r2);\
|
||||
void MOVSMR(Register r1, int d, Register r2);\
|
||||
void MOVLMR(Register r1, int d, Register r2);\
|
||||
void MOVQMR(Register r1, int d, Register r2);\
|
||||
void MOVZX8M(Register r1, int d, Register r2);\
|
||||
void MOVZX16M(Register r1, int d, Register r2);\
|
||||
void MOVSX8M(Register r1, int d, Register r2);\
|
||||
void MOVSX16M(Register r1, int d, Register r2);\
|
||||
void MOVSDRM(Register r1, int d, Register r2);\
|
||||
void MOVSDMR(Register r1, int d, Register r2);\
|
||||
void MOVSSMR(Register r1, int d, Register r2);\
|
||||
void MOVSSRM(Register r1, int d, Register r2);\
|
||||
void JMP8(size_t n, NIns* t);\
|
||||
void JMP32(size_t n, NIns* t);\
|
||||
void JMPX(Register indexreg, NIns** table);\
|
||||
|
|
Загрузка…
Ссылка в новой задаче