implement NJ_EXPANDED_LOADSTORE_SUPPORTED for x64 backend (r=edwsmith,r=nnethercote,bug=532240)

--HG--
extra : convert_revision : cd0b46495c9520568c6507766dfdcb5fbf24d482
This commit is contained in:
Steven Johnson 2009-12-11 16:22:23 -08:00
Родитель 741731ac0b
Коммит 41c2d79462
2 изменённых файлов: 133 добавлений и 61 удалений

Просмотреть файл

@ -99,6 +99,11 @@ namespace nanojit
"ah", "ch", "dh", "bh"
};
const char *gpRegNames16[] = {
"ax", "cx", "dx", "bx", "spx", "bpx", "six", "dix",
"r8x", "r9x", "r10x", "r11x", "r12x", "r13x", "r14x", "r15x"
};
#ifdef _DEBUG
#define TODO(x) todo(#x)
static void todo(const char *s) {
@ -240,6 +245,11 @@ namespace nanojit
emit(rexprb(mod_rr(op, r, b), r, b));
}
// disp32 modrm8 form, when the disp fits in the instruction (opcode is 1-3 bytes)
void Assembler::emitrm8(uint64_t op, Register r, int32_t d, Register b) {
emit(rexrb8(mod_disp32(op, r, b, d), r, b));
}
// disp32 modrm form, when the disp fits in the instruction (opcode is 1-3 bytes)
void Assembler::emitrm(uint64_t op, Register r, int32_t d, Register b) {
emit(rexrb(mod_disp32(op, r, b, d), r, b));
@ -343,6 +353,7 @@ namespace nanojit
}
#define RB(r) gpRegNames8[(r)]
#define RS(r) gpRegNames16[(r)]
#define RBhi(r) gpRegNames8hi[(r)]
#define RL(r) gpRegNames32[(r)]
#define RQ(r) gpn(r)
@ -439,6 +450,8 @@ namespace nanojit
void Assembler::SUBSD( R l, R r) { emitprr(X64_subsd, l,r); asm_output("subsd %s, %s", RQ(l),RQ(r)); }
void Assembler::CVTSQ2SD(R l, R r) { emitprr(X64_cvtsq2sd,l,r); asm_output("cvtsq2sd %s, %s",RQ(l),RQ(r)); }
void Assembler::CVTSI2SD(R l, R r) { emitprr(X64_cvtsi2sd,l,r); asm_output("cvtsi2sd %s, %s",RQ(l),RL(r)); }
void Assembler::CVTSS2SD(R l, R r) { emitprr(X64_cvtss2sd,l,r); asm_output("cvtss2sd %s, %s",RQ(l),RL(r)); }
void Assembler::CVTSD2SS(R l, R r) { emitprr(X64_cvtsd2ss,l,r); asm_output("cvtsd2ss %s, %s",RL(l),RQ(r)); }
void Assembler::UCOMISD( R l, R r) { emitprr(X64_ucomisd, l,r); asm_output("ucomisd %s, %s", RQ(l),RQ(r)); }
void Assembler::MOVQRX( R l, R r) { emitprr(X64_movqrx, r,l); asm_output("movq %s, %s", RQ(l),RQ(r)); } // Nb: r and l are deliberately reversed within the emitprr() call.
void Assembler::MOVQXR( R l, R r) { emitprr(X64_movqxr, l,r); asm_output("movq %s, %s", RQ(l),RQ(r)); }
@ -483,14 +496,21 @@ namespace nanojit
void Assembler::LEAQRM(R r1, I d, R r2) { emitrm(X64_leaqrm,r1,d,r2); asm_output("leaq %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVLRM(R r1, I d, R r2) { emitrm(X64_movlrm,r1,d,r2); asm_output("movl %s, %d(%s)",RL(r1),d,RQ(r2)); }
void Assembler::MOVQRM(R r1, I d, R r2) { emitrm(X64_movqrm,r1,d,r2); asm_output("movq %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVBMR(R r1, I d, R r2) { emitrm8(X64_movbmr,r1,d,r2); asm_output("movb %d(%s), %s",d,RQ(r1),RB(r2)); }
void Assembler::MOVSMR(R r1, I d, R r2) { emitprm(X64_movsmr,r1,d,r2); asm_output("movs %d(%s), %s",d,RQ(r1),RS(r2)); }
void Assembler::MOVLMR(R r1, I d, R r2) { emitrm(X64_movlmr,r1,d,r2); asm_output("movl %d(%s), %s",d,RQ(r1),RL(r2)); }
void Assembler::MOVQMR(R r1, I d, R r2) { emitrm(X64_movqmr,r1,d,r2); asm_output("movq %d(%s), %s",d,RQ(r1),RQ(r2)); }
void Assembler::MOVZX8M( R r1, I d, R r2) { emitrm_wide(X64_movzx8m, r1,d,r2); asm_output("movzxb %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVZX16M(R r1, I d, R r2) { emitrm_wide(X64_movzx16m,r1,d,r2); asm_output("movzxs %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVSX8M( R r1, I d, R r2) { emitrm_wide(X64_movsx8m, r1,d,r2); asm_output("movsxb %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVSX16M(R r1, I d, R r2) { emitrm_wide(X64_movsx16m,r1,d,r2); asm_output("movsxs %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVSDRM(R r1, I d, R r2) { emitprm(X64_movsdrm,r1,d,r2); asm_output("movsd %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVSDMR(R r1, I d, R r2) { emitprm(X64_movsdmr,r1,d,r2); asm_output("movsd %d(%s), %s",d,RQ(r1),RQ(r2)); }
void Assembler::MOVSSRM(R r1, I d, R r2) { emitprm(X64_movssrm,r1,d,r2); asm_output("movss %s, %d(%s)",RQ(r1),d,RQ(r2)); }
void Assembler::MOVSSMR(R r1, I d, R r2) { emitprm(X64_movssmr,r1,d,r2); asm_output("movss %d(%s), %s",d,RQ(r1),RQ(r2)); }
void Assembler::JMP8( S n, NIns* t) { emit_target8(n, X64_jmp8,t); asm_output("jmp %p", t); }
@ -1339,58 +1359,62 @@ namespace nanojit
// xmm <- xmm: use movaps. movsd r,r causes partial register stall
MOVAPSR(d, s);
} else {
NanoAssert(IsFpReg(d) && !IsFpReg(s));
// xmm <- gpr: use movq xmm, r/m64 (66 REX.W 0F 6E /r)
MOVQXR(d, s);
}
}
void Assembler::regalloc_load(LIns *ins, Register &rr, int32_t &dr, Register &rb) {
void Assembler::regalloc_load(LIns *ins, RegisterMask allow, Register &rr, int32_t &dr, Register &rb) {
dr = ins->disp();
LIns *base = ins->oprnd1();
rb = getBaseReg(ins->opcode(), base, dr, BaseRegs);
if (ins->isUnusedOrHasUnknownReg()) {
// use a gpr in case we're copying a non-double
rr = prepResultReg(ins, GpRegs & ~rmask(rb));
if (ins->isUnusedOrHasUnknownReg() || !(allow & rmask(ins->getReg()))) {
rr = prepResultReg(ins, allow & ~rmask(rb));
} else {
// keep already assigned register
rr = ins->getReg();
NanoAssert(allow & rmask(rr));
freeRsrcOf(ins, false);
}
}
void Assembler::asm_load64(LIns *ins) {
Register rr, rb;
int32_t dr;
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
// handled by mainline code below for now
break;
case LIR_ld32f:
case LIR_ldc32f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
return;
}
Register rr, rb;
int32_t dr;
regalloc_load(ins, rr, dr, rb);
regalloc_load(ins, GpRegs, rr, dr, rb);
if (IsGpReg(rr)) {
// general 64bit load, 32bit const displacement
MOVQRM(rr, dr, rb);
} else {
NanoAssert(IsFpReg(rr));
// load 64bits into XMM. don't know if double or int64, assume double.
MOVSDRM(rr, dr, rb);
}
break;
case LIR_ld32f:
case LIR_ldc32f:
regalloc_load(ins, FpRegs, rr, dr, rb);
NanoAssert(IsFpReg(rr));
CVTSS2SD(rr, rr);
MOVSSRM(rr, dr, rb);
break;
default:
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
break;
}
}
void Assembler::asm_load32(LIns *ins) {
NanoAssert(!ins->isQuad());
Register r, b;
int32_t d;
regalloc_load(ins, r, d, b);
regalloc_load(ins, GpRegs, r, d, b);
LOpcode op = ins->opcode();
switch(op) {
case LIR_ldzb:
@ -1406,40 +1430,32 @@ namespace nanojit
MOVLRM( r, d, b);
break;
case LIR_ldsb:
case LIR_ldss:
case LIR_ldcsb:
MOVSX8M( r, d, b);
break;
case LIR_ldss:
case LIR_ldcss:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
MOVSX16M( r, d, b);
break;
default:
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
return;
break;
}
}
void Assembler::asm_store64(LOpcode op, LIns *value, int d, LIns *base) {
NanoAssert(value->isQuad());
switch (op) {
case LIR_stqi:
// handled by mainline code below for now
break;
case LIR_st32f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
return;
}
Register b = getBaseReg(LIR_stqi, base, d, BaseRegs);
Register r;
// if we have to choose a register, use a GPR, but not the base reg
Register r;
if (value->isUnusedOrHasUnknownReg()) {
RegisterMask allow;
// If op is LIR_st32f and we have no reg, prefer FPR over GPR: saves an instruction later,
// and the value is almost certainly going to operated on as FP later anyway.
// XXX: isFloat doesn't cover float/fmod! see bug 520208.
if (value->isFloat() || value->isop(LIR_float) || value->isop(LIR_fmod)) {
if (op == LIR_st32f || value->isFloat() || value->isop(LIR_float) || value->isop(LIR_fmod)) {
allow = FpRegs;
} else {
allow = GpRegs;
@ -1449,6 +1465,9 @@ namespace nanojit
r = value->getReg();
}
switch (op) {
case LIR_stqi:
{
if (IsGpReg(r)) {
// gpr store
MOVQMR(r, d, b);
@ -1457,29 +1476,65 @@ namespace nanojit
// xmm store
MOVSDMR(r, d, b);
}
break;
}
case LIR_st32f:
{
// need a scratch FPR reg
Register t = registerAllocTmp(FpRegs & ~rmask(r));
// store
MOVSSMR(t, d, b);
// cvt to single-precision
if (IsGpReg(r))
{
CVTSD2SS(t, t);
MOVQXR(t, r); // xmm <- gpr: use movq xmm, r/m64 (66 REX.W 0F 6E /r)
}
else
{
NanoAssert(IsFpReg(r));
CVTSD2SS(t, r);
}
XORPS(t); // break dependency chains
break;
}
default:
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
break;
}
}
void Assembler::asm_store32(LOpcode op, LIns *value, int d, LIns *base) {
switch (op) {
case LIR_sti:
// handled by mainline code below for now
break;
case LIR_stb:
case LIR_sts:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
return;
}
// quirk of x86-64: reg cannot appear to be ah/bh/ch/dh
// for single-byte stores with REX prefix
const RegisterMask SrcRegs =
(op == LIR_stb) ?
(GpRegs & ~(1<<RSP | 1<<RBP | 1<<RSI | 1<<RDI)) :
GpRegs;
NanoAssert(!value->isQuad());
Register b = getBaseReg(LIR_sti, base, d, BaseRegs);
Register r = findRegFor(value, GpRegs & ~rmask(b));
Register r = findRegFor(value, SrcRegs & ~rmask(b));
// store 32bits to 64bit addr. use rex so we can use all 16 regs
switch (op) {
case LIR_stb:
MOVBMR(r, d, b);
break;
case LIR_sts:
MOVSMR(r, d, b);
break;
case LIR_sti:
MOVLMR(r, d, b);
break;
default:
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
break;
}
}
// generate a 64bit constant, must not affect condition codes!

Просмотреть файл

@ -61,7 +61,7 @@ namespace nanojit
#define NJ_MAX_STACK_ENTRY 256
#define NJ_ALIGN_STACK 16
#define NJ_JTBL_SUPPORTED 1
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 0
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
enum Register {
RAX = 0, // 1st int return, # of sse varargs
@ -191,6 +191,8 @@ namespace nanojit
X64_cmpqr8 = 0x00F8834800000004LL, // 64bit compare r,int64(imm8)
X64_cvtsi2sd= 0xC02A0F40F2000005LL, // convert int32 to double r = (double) b
X64_cvtsq2sd= 0xC02A0F48F2000005LL, // convert int64 to double r = (double) b
X64_cvtss2sd= 0xC05A0F40F3000005LL, // convert float to double r = (double) b
X64_cvtsd2ss= 0xC05A0F40F2000005LL, // convert double to float r = (float) b
X64_divsd = 0xC05E0F40F2000005LL, // divide scalar double r /= b
X64_mulsd = 0xC0590F40F2000005LL, // multiply scalar double r *= b
X64_addsd = 0xC0580F40F2000005LL, // add scalar double r += b
@ -230,6 +232,8 @@ namespace nanojit
X64_learm = 0x00000000808D4007LL, // 32bit load effective addr reg <- disp32+base
X64_learip = 0x00000000058D4807LL, // 64bit RIP-relative lea. reg <- disp32+rip (modrm = 00rrr101 = 05)
X64_movlr = 0xC08B400000000003LL, // 32bit mov r <- b
X64_movbmr = 0x0000000080884007LL, // 8bit store r -> [b+d32]
X64_movsmr = 0x8089406600000004LL, // 16bit store r -> [b+d32]
X64_movlmr = 0x0000000080894007LL, // 32bit store r -> [b+d32]
X64_movlrm = 0x00000000808B4007LL, // 32bit load r <- [b+d32]
X64_movqmr = 0x0000000080894807LL, // 64bit store gpr -> [b+d32]
@ -245,10 +249,14 @@ namespace nanojit
X64_movsdrr = 0xC0100F40F2000005LL, // 64bit mov xmm-r <- xmm-b (upper 64bits unchanged)
X64_movsdrm = 0x80100F40F2000005LL, // 64bit load xmm-r <- [b+d32] (upper 64 cleared)
X64_movsdmr = 0x80110F40F2000005LL, // 64bit store xmm-r -> [b+d32]
X64_movssrm = 0x80100F40F3000005LL, // 32bit load xmm-r <- [b+d32] (upper 96 cleared)
X64_movssmr = 0x80110F40F3000005LL, // 32bit store xmm-r -> [b+d32]
X64_movsxdr = 0xC063480000000003LL, // sign extend i32 to i64 r = (int64)(int32) b
X64_movzx8 = 0xC0B60F4000000004LL, // zero extend i8 to i64 r = (uint64)(uint8) b
X64_movzx8m = 0x80B60F4000000004LL, // zero extend i8 load to i32 r <- [b+d32]
X64_movzx16m= 0x80B70F4000000004LL, // zero extend i16 load to i32 r <- [b+d32]
X64_movsx8m = 0x80BE0F4000000004LL, // sign extend i8 load to i32 r <- [b+d32]
X64_movsx16m= 0x80BF0F4000000004LL, // sign extend i16 load to i32 r <- [b+d32]
X64_neg = 0xD8F7400000000003LL, // 32bit two's compliment b = -b
X64_nop1 = 0x9000000000000001LL, // one byte NOP
X64_nop2 = 0x9066000000000002LL, // two byte NOP
@ -359,6 +367,7 @@ namespace nanojit
void emitr(uint64_t op, Register b) { emitrr(op, (Register)0, b); }\
void emitr8(uint64_t op, Register b) { emitrr8(op, (Register)0, b); }\
void emitprr(uint64_t op, Register r, Register b);\
void emitrm8(uint64_t op, Register r, int32_t d, Register b);\
void emitrm(uint64_t op, Register r, int32_t d, Register b);\
void emitrm_wide(uint64_t op, Register r, int32_t d, Register b);\
uint64_t emit_disp32(uint64_t op, int32_t d);\
@ -380,7 +389,7 @@ namespace nanojit
void asm_arith_imm(LIns*);\
void regalloc_unary(LIns *ins, RegisterMask allow, Register &rr, Register &ra);\
void regalloc_binary(LIns *ins, RegisterMask allow, Register &rr, Register &ra, Register &rb);\
void regalloc_load(LIns *ins, Register &rr, int32_t &d, Register &rb);\
void regalloc_load(LIns *ins, RegisterMask allow, Register &rr, int32_t &d, Register &rb);\
void dis(NIns *p, int bytes);\
void asm_cmp(LIns*);\
void asm_cmp_imm(LIns*);\
@ -460,6 +469,8 @@ namespace nanojit
void SUBSD(Register l, Register r);\
void CVTSQ2SD(Register l, Register r);\
void CVTSI2SD(Register l, Register r);\
void CVTSS2SD(Register l, Register r);\
void CVTSD2SS(Register l, Register r);\
void UCOMISD(Register l, Register r);\
void MOVQRX(Register l, Register r);\
void MOVQXR(Register l, Register r);\
@ -495,12 +506,18 @@ namespace nanojit
void LEAQRM(Register r1, int d, Register r2);\
void MOVLRM(Register r1, int d, Register r2);\
void MOVQRM(Register r1, int d, Register r2);\
void MOVBMR(Register r1, int d, Register r2);\
void MOVSMR(Register r1, int d, Register r2);\
void MOVLMR(Register r1, int d, Register r2);\
void MOVQMR(Register r1, int d, Register r2);\
void MOVZX8M(Register r1, int d, Register r2);\
void MOVZX16M(Register r1, int d, Register r2);\
void MOVSX8M(Register r1, int d, Register r2);\
void MOVSX16M(Register r1, int d, Register r2);\
void MOVSDRM(Register r1, int d, Register r2);\
void MOVSDMR(Register r1, int d, Register r2);\
void MOVSSMR(Register r1, int d, Register r2);\
void MOVSSRM(Register r1, int d, Register r2);\
void JMP8(size_t n, NIns* t);\
void JMP32(size_t n, NIns* t);\
void JMPX(Register indexreg, NIns** table);\