From 88f0a9bfc3883205539ae9f8850b2124b7e35ef1 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 6 Apr 2010 15:55:43 -0700 Subject: [PATCH] Bug 557483 - nanojit: convert i386 codegen macros to functions. r=edwsmith. --HG-- extra : convert_revision : 4062fae8baf26ee9fcdf5c8d1125b2a1fa806515 --- js/src/nanojit/NativeX64.cpp | 10 +- js/src/nanojit/Nativei386.cpp | 783 ++++++++++++++++++++++++- js/src/nanojit/Nativei386.h | 1042 ++++++++------------------------- 3 files changed, 1033 insertions(+), 802 deletions(-) diff --git a/js/src/nanojit/NativeX64.cpp b/js/src/nanojit/NativeX64.cpp index ca9658d66373..a08b0b0a151b 100644 --- a/js/src/nanojit/NativeX64.cpp +++ b/js/src/nanojit/NativeX64.cpp @@ -366,11 +366,11 @@ namespace nanojit #define RL(r) gpRegNames32[(r)] #define RQ(r) gpn(r) -#define R Register -#define I int -#define I32 int32_t -#define U64 uint64_t -#define S size_t + typedef Register R; + typedef int I; + typedef int32_t I32; + typedef uint64_t U64; + typedef size_t S; void Assembler::PUSHR(R r) { emitr(X64_pushr,r); asm_output("push %s", RQ(r)); } void Assembler::POPR( R r) { emitr(X64_popr, r); asm_output("pop %s", RQ(r)); } diff --git a/js/src/nanojit/Nativei386.cpp b/js/src/nanojit/Nativei386.cpp index db7873b7e20e..5e847a581427 100644 --- a/js/src/nanojit/Nativei386.cpp +++ b/js/src/nanojit/Nativei386.cpp @@ -70,6 +70,783 @@ namespace nanojit 0 /* ABI_CDECL */ }; + typedef Register R; + typedef int32_t I32; + + // XXX rearrange NanoAssert() expression to workaround apparent gcc 4.3 bug: + // XXX "error: logical && with non-zero constant will always evaluate as true" + // underrunProtect(6) is necessary for worst-case + inline void Assembler::MODRMs(I32 r, I32 d, R b, I32 l, I32 i) { + NanoAssert(unsigned(i)<8 && unsigned(b)<8 && unsigned(r)<8); + if (d == 0 && b != EBP) { + _nIns -= 2; + _nIns[0] = (uint8_t) ( 0<<6 | r<<3 | 4); + _nIns[1] = (uint8_t) ( l<<6 | i<<3 | b); + } else if (isS8(d)) { + _nIns -= 3; + _nIns[0] = (uint8_t) ( 1<<6 | r<<3 | 4 ); + _nIns[1] = (uint8_t) ( l<<6 | i<<3 | b ); + _nIns[2] = (uint8_t) d; + } else { + IMM32(d); + *(--_nIns) = (uint8_t) ( l<<6 | i<<3 | b ); + *(--_nIns) = (uint8_t) ( 2<<6 | r<<3 | 4 ); + } + } + + // underrunProtect(6) is necessary for worst-case + inline void Assembler::MODRMm(I32 r, I32 d, R b) { + NanoAssert(unsigned(r)<8 && ((b)==UnspecifiedReg || unsigned(b)<8)); + if ((b) == UnspecifiedReg) { + IMM32(d); + *(--_nIns) = (uint8_t) (0<<6 | (r)<<3 | 5); + } else if ((b) == ESP) { + MODRMs(r, d, b, 0, (Register)4); + } else if ( (d) == 0 && (b) != EBP) { + *(--_nIns) = (uint8_t) ( 0<<6 | r<<3 | b ); + } else if (isS8(d)) { + *(--_nIns) = (uint8_t) (d); + *(--_nIns) = (uint8_t) ( 1<<6 | r<<3 | b ); + } else { + IMM32(d); + *(--_nIns) = (uint8_t) ( 2<<6 | r<<3 | b ); + } + } + + inline void Assembler::MODRMSIB(R reg, R base, I32 index, I32 scale, I32 disp) { + if (disp != 0 || base == EBP) { + if (isS8(disp)) { + *(--_nIns) = int8_t(disp); + } else { + IMM32(disp); + } + } + *(--_nIns) = uint8_t( scale<<6 | index<<3 | base ); + if (disp == 0 && base != EBP) { + *(--_nIns) = uint8_t( (reg<<3) | 4); + } else if (isS8(disp)) { + *(--_nIns) = uint8_t( (1<<6) | (reg<<3) | 4 ); + } else { + *(--_nIns) = uint8_t( (2<<6) | (reg<<3) | 4 ); + } + } + + inline void Assembler::MODRMdm(I32 r, I32 addr) { + NanoAssert(unsigned(r)<8); + IMM32(addr); + *(--_nIns) = (uint8_t)( r<<3 | 5 ); + } + + inline void Assembler::ALU0(I32 o) { + underrunProtect(1); + *(--_nIns) = uint8_t(o); + } + + inline void Assembler::ALUm(I32 c, I32 r, I32 d, R b) { + underrunProtect(8); + MODRMm(r, d, b); + *(--_nIns) = uint8_t(c); + } + + inline void Assembler::ALUdm(I32 c, I32 r, I32 addr) { + underrunProtect(6); + MODRMdm(r, addr); + *(--_nIns) = uint8_t(c); + } + + inline void Assembler::ALUsib(I32 c, R r, R base, I32 index, I32 scale, I32 disp) { + underrunProtect(7); + MODRMSIB(r, base, index, scale, disp); + *(--_nIns) = uint8_t(c); + } + + inline void Assembler::ALUm16(I32 c, I32 r, I32 d, R b) { + underrunProtect(9); + MODRMm(r, d, b); + *(--_nIns) = uint8_t(c); + *(--_nIns) = 0x66; + } + + inline void Assembler::ALU2dm(I32 c, I32 r, I32 addr) { + underrunProtect(7); + MODRMdm(r, addr); + *(--_nIns) = uint8_t(c); + *(--_nIns) = uint8_t(c>>8); + } + + inline void Assembler::ALU2m(I32 c, I32 r, I32 d, R b) { + underrunProtect(9); + MODRMm(r, d, b); + *(--_nIns) = uint8_t(c); + *(--_nIns) = uint8_t(c>>8); + } + + inline void Assembler::ALU2sib(I32 c, Register r, R base, I32 index, I32 scale, I32 disp) { + underrunProtect(8); + MODRMSIB(r, base, index, scale, disp); + *(--_nIns) = uint8_t(c); + *(--_nIns) = uint8_t(c>>8); + } + + inline void Assembler::ALUi(I32 c, I32 r, I32 i) { + underrunProtect(6); + NanoAssert(unsigned(r)<8); + if (isS8(i)) { + *(--_nIns) = uint8_t(i); + MODRM(c>>3, r); + *(--_nIns) = uint8_t(0x83); + } else { + IMM32(i); + if ( r == EAX) { + *(--_nIns) = uint8_t(c); + } else { + MODRM((c>>3),(r)); + *(--_nIns) = uint8_t(0x81); + } + } + } + + inline void Assembler::ALUmi(I32 c, I32 d, Register b, I32 i) { + underrunProtect(10); + NanoAssert(((unsigned)b)<8); + if (isS8(i)) { + *(--_nIns) = uint8_t(i); + MODRMm(c>>3, d, b); + *(--_nIns) = uint8_t(0x83); + } else { + IMM32(i); + MODRMm(c>>3, d, b); + *(--_nIns) = uint8_t(0x81); + } + } + + inline void Assembler::ALU2(I32 c, I32 d, I32 s) { + underrunProtect(3); + MODRM((d),(s)); + _nIns -= 2; + _nIns[0] = uint8_t(c>>8); + _nIns[1] = uint8_t(c); + } + + inline void Assembler::LAHF() { count_alu(); ALU0(0x9F); asm_output("lahf"); } + inline void Assembler::SAHF() { count_alu(); ALU0(0x9E); asm_output("sahf"); } + inline void Assembler::OR(R l, R r) { count_alu(); ALU(0x0b, (l),(r)); asm_output("or %s,%s",gpn(l),gpn(r)); } + inline void Assembler::AND(R l, R r) { count_alu(); ALU(0x23, (l),(r)); asm_output("and %s,%s",gpn(l),gpn(r)); } + inline void Assembler::XOR(R l, R r) { count_alu(); ALU(0x33, (l),(r)); asm_output("xor %s,%s",gpn(l),gpn(r)); } + inline void Assembler::ADD(R l, R r) { count_alu(); ALU(0x03, (l),(r)); asm_output("add %s,%s",gpn(l),gpn(r)); } + inline void Assembler::SUB(R l, R r) { count_alu(); ALU(0x2b, (l),(r)); asm_output("sub %s,%s",gpn(l),gpn(r)); } + inline void Assembler::MUL(R l, R r) { count_alu(); ALU2(0x0faf,(l),(r)); asm_output("mul %s,%s",gpn(l),gpn(r)); } + inline void Assembler::DIV(R r) { count_alu(); ALU(0xf7, (Register)7,(r)); asm_output("idiv edx:eax, %s",gpn(r)); } + inline void Assembler::NOT(R r) { count_alu(); ALU(0xf7, (Register)2,(r)); asm_output("not %s",gpn(r)); } + inline void Assembler::NEG(R r) { count_alu(); ALU(0xf7, (Register)3,(r)); asm_output("neg %s",gpn(r)); } + inline void Assembler::SHR(R r, R s) { count_alu(); ALU(0xd3, (Register)5,(r)); asm_output("shr %s,%s",gpn(r),gpn(s)); } + inline void Assembler::SAR(R r, R s) { count_alu(); ALU(0xd3, (Register)7,(r)); asm_output("sar %s,%s",gpn(r),gpn(s)); } + inline void Assembler::SHL(R r, R s) { count_alu(); ALU(0xd3, (Register)4,(r)); asm_output("shl %s,%s",gpn(r),gpn(s)); } + + inline void Assembler::SHIFT(I32 c, R r, I32 i) { + underrunProtect(3); + *--_nIns = (uint8_t)(i); + MODRM((Register)c,r); + *--_nIns = 0xc1; + } + + inline void Assembler::SHLi(R r, I32 i) { count_alu(); SHIFT(4,r,i); asm_output("shl %s,%d", gpn(r),i); } + inline void Assembler::SHRi(R r, I32 i) { count_alu(); SHIFT(5,r,i); asm_output("shr %s,%d", gpn(r),i); } + inline void Assembler::SARi(R r, I32 i) { count_alu(); SHIFT(7,r,i); asm_output("sar %s,%d", gpn(r),i); } + + inline void Assembler::MOVZX8(R d, R s) { count_alu(); ALU2(0x0fb6,d,s); asm_output("movzx %s,%s", gpn(d),gpn(s)); } + + inline void Assembler::SUBi(R r, I32 i) { count_alu(); ALUi(0x2d,r,i); asm_output("sub %s,%d",gpn(r),i); } + inline void Assembler::ADDi(R r, I32 i) { count_alu(); ALUi(0x05,r,i); asm_output("add %s,%d",gpn(r),i); } + inline void Assembler::ANDi(R r, I32 i) { count_alu(); ALUi(0x25,r,i); asm_output("and %s,%d",gpn(r),i); } + inline void Assembler::ORi(R r, I32 i) { count_alu(); ALUi(0x0d,r,i); asm_output("or %s,%d",gpn(r),i); } + inline void Assembler::XORi(R r, I32 i) { count_alu(); ALUi(0x35,r,i); asm_output("xor %s,%d",gpn(r),i); } + + inline void Assembler::ADDmi(I32 d, R b, I32 i) { count_alust(); ALUmi(0x05, d, b, i); asm_output("add %d(%s), %d", d, gpn(b), i); } + + inline void Assembler::TEST(R d, R s) { count_alu(); ALU(0x85,d,s); asm_output("test %s,%s",gpn(d),gpn(s)); } + inline void Assembler::CMP(R l, R r) { count_alu(); ALU(0x3b,l,r); asm_output("cmp %s,%s",gpn(l),gpn(r)); } + inline void Assembler::CMPi(R r, I32 i) { count_alu(); ALUi(0x3d,r,i); asm_output("cmp %s,%d",gpn(r),i); } + + inline void Assembler::LEA(R r, I32 d, R b) { count_alu(); ALUm(0x8d, r,d,b); asm_output("lea %s,%d(%s)",gpn(r),d,gpn(b)); } + // lea %r, d(%i*4) + // This addressing mode is not supported by the MODRMSIB macro. + inline void Assembler::LEAmi4(R r, I32 d, I32 i) { + count_alu(); + IMM32(int32_t(d)); + *(--_nIns) = (2<<6) | ((uint8_t)i<<3) | 5; + *(--_nIns) = (0<<6) | ((uint8_t)r<<3) | 4; + *(--_nIns) = 0x8d; + asm_output("lea %s, %p(%s*4)", gpn(r), (void*)d, gpn(i)); + } + + inline void Assembler::CDQ() { SARi(EDX, 31); MR(EDX, EAX); } + + inline void Assembler::INCLi(I32 p) { + count_alu(); + underrunProtect(6); + IMM32((uint32_t)(ptrdiff_t)p); *(--_nIns) = 0x05; *(--_nIns) = 0xFF; + asm_output("incl (%p)", (void*)p); + } + + inline void Assembler::SETE( R r) { count_alu(); ALU2(0x0f94,(r),(r)); asm_output("sete %s", gpn(r)); } + inline void Assembler::SETNP(R r) { count_alu(); ALU2(0x0f9B,(r),(r)); asm_output("setnp %s",gpn(r)); } + inline void Assembler::SETL( R r) { count_alu(); ALU2(0x0f9C,(r),(r)); asm_output("setl %s", gpn(r)); } + inline void Assembler::SETLE(R r) { count_alu(); ALU2(0x0f9E,(r),(r)); asm_output("setle %s",gpn(r)); } + inline void Assembler::SETG( R r) { count_alu(); ALU2(0x0f9F,(r),(r)); asm_output("setg %s", gpn(r)); } + inline void Assembler::SETGE(R r) { count_alu(); ALU2(0x0f9D,(r),(r)); asm_output("setge %s",gpn(r)); } + inline void Assembler::SETB( R r) { count_alu(); ALU2(0x0f92,(r),(r)); asm_output("setb %s", gpn(r)); } + inline void Assembler::SETBE(R r) { count_alu(); ALU2(0x0f96,(r),(r)); asm_output("setbe %s",gpn(r)); } + inline void Assembler::SETA( R r) { count_alu(); ALU2(0x0f97,(r),(r)); asm_output("seta %s", gpn(r)); } + inline void Assembler::SETAE(R r) { count_alu(); ALU2(0x0f93,(r),(r)); asm_output("setae %s",gpn(r)); } + inline void Assembler::SETO( R r) { count_alu(); ALU2(0x0f92,(r),(r)); asm_output("seto %s", gpn(r)); } + + inline void Assembler::MREQ(R d, R s) { count_alu(); ALU2(0x0f44,d,s); asm_output("cmove %s,%s", gpn(d),gpn(s)); } + inline void Assembler::MRNE(R d, R s) { count_alu(); ALU2(0x0f45,d,s); asm_output("cmovne %s,%s", gpn(d),gpn(s)); } + inline void Assembler::MRL( R d, R s) { count_alu(); ALU2(0x0f4C,d,s); asm_output("cmovl %s,%s", gpn(d),gpn(s)); } + inline void Assembler::MRLE(R d, R s) { count_alu(); ALU2(0x0f4E,d,s); asm_output("cmovle %s,%s", gpn(d),gpn(s)); } + inline void Assembler::MRG( R d, R s) { count_alu(); ALU2(0x0f4F,d,s); asm_output("cmovg %s,%s", gpn(d),gpn(s)); } + inline void Assembler::MRGE(R d, R s) { count_alu(); ALU2(0x0f4D,d,s); asm_output("cmovge %s,%s", gpn(d),gpn(s)); } + inline void Assembler::MRB( R d, R s) { count_alu(); ALU2(0x0f42,d,s); asm_output("cmovb %s,%s", gpn(d),gpn(s)); } + inline void Assembler::MRBE(R d, R s) { count_alu(); ALU2(0x0f46,d,s); asm_output("cmovbe %s,%s", gpn(d),gpn(s)); } + inline void Assembler::MRA( R d, R s) { count_alu(); ALU2(0x0f47,d,s); asm_output("cmova %s,%s", gpn(d),gpn(s)); } + inline void Assembler::MRAE(R d, R s) { count_alu(); ALU2(0x0f43,d,s); asm_output("cmovae %s,%s", gpn(d),gpn(s)); } + inline void Assembler::MRNO(R d, R s) { count_alu(); ALU2(0x0f41,d,s); asm_output("cmovno %s,%s", gpn(d),gpn(s)); } + + // these aren't currently used but left in for reference + //#define LDEQ(r,d,b) do { ALU2m(0x0f44,r,d,b); asm_output("cmove %s,%d(%s)", gpn(r),d,gpn(b)); } while(0) + //#define LDNEQ(r,d,b) do { ALU2m(0x0f45,r,d,b); asm_output("cmovne %s,%d(%s)", gpn(r),d,gpn(b)); } while(0) + + inline void Assembler::LD(R reg, I32 disp, R base) { + count_ld(); + ALUm(0x8b,reg,disp,base); + asm_output("mov %s,%d(%s)",gpn(reg),disp,gpn(base)); + } + + inline void Assembler::LDdm(R reg, I32 addr) { + count_ld(); + ALUdm(0x8b,reg,addr); + asm_output("mov %s,0(%lx)",gpn(reg),(unsigned long)addr); + } + +#define SIBIDX(n) "1248"[n] + + inline void Assembler::LDsib(R reg, I32 disp, R base, I32 index, I32 scale) { + count_ld(); + ALUsib(0x8b, reg, base, index, scale, disp); + asm_output("mov %s,%d(%s+%s*%c)",gpn(reg),disp,gpn(base),gpn(index),SIBIDX(scale)); + } + + // note: movzx/movsx are being output with an 8/16 suffix to indicate the + // size being loaded. This doesn't really match standard intel format + // (though is arguably terser and more obvious in this case) and would + // probably be nice to fix. (Likewise, the 8/16 bit stores being output + // as "mov8" and "mov16" respectively.) + + // Load 16-bit, sign extend. + inline void Assembler::LD16S(R r, I32 d, R b) { + count_ld(); + ALU2m(0x0fbf, r, d, b); + asm_output("movsx16 %s,%d(%s)", gpn(r),d,gpn(b)); + } + + inline void Assembler::LD16Sdm(R r, I32 addr) { + count_ld(); + ALU2dm(0x0fbf, r, addr); + asm_output("movsx16 %s,0(%lx)", gpn(r),(unsigned long)addr); + } + + inline void Assembler::LD16Ssib(R r, I32 disp, R base, I32 index, I32 scale) { + count_ld(); + ALU2sib(0x0fbf, r, base, index, scale, disp); + asm_output("movsx16 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale)); + } + + // Load 16-bit, zero extend. + inline void Assembler::LD16Z(R r, I32 d, R b) { + count_ld(); + ALU2m(0x0fb7, r, d, b); + asm_output("movzx16 %s,%d(%s)", gpn(r),d,gpn(b)); + } + + inline void Assembler::LD16Zdm(R r, I32 addr) { + count_ld(); + ALU2dm(0x0fb7, r, addr); + asm_output("movzx16 %s,0(%lx)", gpn(r),(unsigned long)addr); + } + + inline void Assembler::LD16Zsib(R r, I32 disp, R base, I32 index, I32 scale) { + count_ld(); + ALU2sib(0x0fb7, r, base, index, scale, disp); + asm_output("movzx16 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale)); + } + + // Load 8-bit, zero extend. + inline void Assembler::LD8Z(R r, I32 d, R b) { + count_ld(); + ALU2m(0x0fb6, r, d, b); + asm_output("movzx8 %s,%d(%s)", gpn(r),d,gpn(b)); + } + + inline void Assembler::LD8Zdm(R r, I32 addr) { + count_ld(); + ALU2dm(0x0fb6, r, addr); + asm_output("movzx8 %s,0(%lx)", gpn(r),(long unsigned)addr); + } + + inline void Assembler::LD8Zsib(R r, I32 disp, R base, I32 index, I32 scale) { + count_ld(); + ALU2sib(0x0fb6, r, base, index, scale, disp); + asm_output("movzx8 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale)); + } + + // Load 8-bit, sign extend. + inline void Assembler::LD8S(R r, I32 d, R b) { + count_ld(); + ALU2m(0x0fbe, r, d, b); + asm_output("movsx8 %s,%d(%s)", gpn(r),d,gpn(b)); + } + + inline void Assembler::LD8Sdm(R r, I32 addr) { + count_ld(); + ALU2dm(0x0fbe, r, addr); + asm_output("movsx8 %s,0(%lx)", gpn(r),(long unsigned)addr); + } + + inline void Assembler::LD8Ssib(R r, I32 disp, R base, I32 index, I32 scale) { + count_ld(); + ALU2sib(0x0fbe, r, base, index, scale, disp); + asm_output("movsx8 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale)); + } + + inline void Assembler::LDi(R r, I32 i) { + count_ld(); + underrunProtect(5); + IMM32(i); + NanoAssert(((unsigned)r)<8); + *(--_nIns) = (uint8_t) ( 0xb8 | r ); + asm_output("mov %s,%d",gpn(r),i); + } + + // Quirk of x86-32: reg must be a/b/c/d for byte stores here. + inline void Assembler::ST8(R base, I32 disp, R reg) { + count_st(); + NanoAssert(((unsigned)reg)<4); + ALUm(0x88, reg, disp, base); + asm_output("mov8 %d(%s),%s",disp,base==UnspecifiedReg?"0":gpn(base),gpn(reg)); + } + + inline void Assembler::ST16(R base, I32 disp, R reg) { + count_st(); + ALUm16(0x89, reg, disp, base); + asm_output("mov16 %d(%s),%s",disp,base==UnspecifiedReg?"0":gpn(base),gpn(reg)); + } + + inline void Assembler::ST(R base, I32 disp, R reg) { + count_st(); + ALUm(0x89, reg, disp, base); + asm_output("mov %d(%s),%s",disp,base==UnspecifiedReg?"0":gpn(base),gpn(reg)); + } + + inline void Assembler::ST8i(R base, I32 disp, I32 imm) { + count_st(); + underrunProtect(8); + IMM8(imm); + MODRMm(0, disp, base); + *(--_nIns) = 0xc6; + asm_output("mov8 %d(%s),%d",disp,gpn(base),imm); + } + + inline void Assembler::ST16i(R base, I32 disp, I32 imm) { + count_st(); + underrunProtect(10); + IMM16(imm); + MODRMm(0, disp, base); + *(--_nIns) = 0xc7; + *(--_nIns) = 0x66; + asm_output("mov16 %d(%s),%d",disp,gpn(base),imm); + } + + inline void Assembler::STi(R base, I32 disp, I32 imm) { + count_st(); + underrunProtect(11); + IMM32(imm); + MODRMm(0, disp, base); + *(--_nIns) = 0xc7; + asm_output("mov %d(%s),%d",disp,gpn(base),imm); + } + + inline void Assembler::RET() { count_ret(); ALU0(0xc3); asm_output("ret"); } + inline void Assembler::NOP() { count_alu(); ALU0(0x90); asm_output("nop"); } + inline void Assembler::INT3() { ALU0(0xcc); asm_output("int3"); } + + inline void Assembler::PUSHi(I32 i) { + count_push(); + if (isS8(i)) { + underrunProtect(2); + _nIns-=2; _nIns[0] = 0x6a; _nIns[1] = uint8_t(i); + asm_output("push %d",i); + } else { + PUSHi32(i); + } + } + + inline void Assembler::PUSHi32(I32 i) { + count_push(); + underrunProtect(5); + IMM32(i); + *(--_nIns) = 0x68; + asm_output("push %d",i); + } + + inline void Assembler::PUSHr(R r) { + count_push(); + underrunProtect(1); + NanoAssert(((unsigned)r)<8); + *(--_nIns) = (uint8_t) ( 0x50 | r ); + asm_output("push %s",gpn(r)); + } + + inline void Assembler::PUSHm(I32 d, R b) { + count_pushld(); + ALUm(0xff, 6, d, b); + asm_output("push %d(%s)",d,gpn(b)); + } + + inline void Assembler::POPr(R r) { + count_pop(); + underrunProtect(1); + NanoAssert(((unsigned)r)<8); + *(--_nIns) = (uint8_t) ( 0x58 | (r) ); + asm_output("pop %s",gpn(r)); + } + +#define JCC32 0x0f +#define JMP8 0xeb +#define JMP32 0xe9 + + inline void Assembler::JCC(I32 o, NIns* t, const char* n) { + count_jcc(); + underrunProtect(6); + intptr_t tt = (intptr_t)t - (intptr_t)_nIns; + if (isS8(tt)) { + verbose_only( NIns* next = _nIns; (void)next; ) + _nIns -= 2; + _nIns[0] = uint8_t( 0x70 | o ); + _nIns[1] = uint8_t(tt); + asm_output("%-5s %p",n,next+tt); + } else { + verbose_only( NIns* next = _nIns; ) + IMM32(tt); + _nIns -= 2; + _nIns[0] = JCC32; + _nIns[1] = (uint8_t) ( 0x80 | o ); + asm_output("%-5s %p",n,next+tt); + } + } + + inline void Assembler::JMP_long(NIns* t) { + count_jmp(); + underrunProtect(5); + intptr_t tt = (intptr_t)t - (intptr_t)_nIns; + JMP_long_nochk_offset(tt); + verbose_only( verbose_outputf("%010lx:", (unsigned long)_nIns); ) + } + + inline void Assembler::JMP_indirect(R r) { + underrunProtect(2); + MODRMm(4, 0, r); + *(--_nIns) = 0xff; + asm_output("jmp *(%s)", gpn(r)); + } + + inline void Assembler::JMP_indexed(Register x, I32 ss, NIns** addr) { + underrunProtect(7); + IMM32(int32_t(addr)); + _nIns -= 3; + _nIns[0] = (NIns) 0xff; /* jmp */ + _nIns[1] = (NIns) (0<<6 | 4<<3 | 4); /* modrm: base=sib + disp32 */ + _nIns[2] = (NIns) (ss<<6 | (x)<<3 | 5); /* sib: x<>16) & 0xff); + _nIns[1] = uint8_t((c>>8) & 0xff); + _nIns[2] = uint8_t(c&0xff); + } + + inline void Assembler::SSEm(I32 c, I32 r, I32 d, R b) { + underrunProtect(9); + MODRMm(r, d, b); + _nIns -= 3; + _nIns[0] = uint8_t((c>>16) & 0xff); + _nIns[1] = uint8_t((c>>8) & 0xff); + _nIns[2] = uint8_t(c & 0xff); + } + + inline void Assembler::LDSDm(R r, const double* addr) { + count_ldq(); + underrunProtect(8); + IMM32(int32_t(addr)); + *(--_nIns) = uint8_t(((r)&7)<<3|5); + *(--_nIns) = 0x10; + *(--_nIns) = 0x0f; + *(--_nIns) = 0xf2; + asm_output("movsd %s,(%p) // =%f",gpn(r),(void*)addr,*addr); + } + + inline void Assembler::SSE_LDSD(R r, I32 d, R b) { count_ldq(); SSEm(0xf20f10, r&7, d, b); asm_output("movsd %s,%d(%s)",gpn(r),(d),gpn(b)); } + inline void Assembler::SSE_LDQ( R r, I32 d, R b) { count_ldq(); SSEm(0xf30f7e, r&7, d, b); asm_output("movq %s,%d(%s)",gpn(r),d,gpn(b)); } + inline void Assembler::SSE_LDSS(R r, I32 d, R b) { count_ld(); SSEm(0xf30f10, r&7, d, b); asm_output("movss %s,%d(%s)",gpn(r),d,gpn(b)); } + inline void Assembler::SSE_STSD(I32 d, R b, R r) { count_stq(); SSEm(0xf20f11, r&7, d, b); asm_output("movsd %d(%s),%s",(d),gpn(b),gpn(r)); } + inline void Assembler::SSE_STQ( I32 d, R b, R r) { count_stq(); SSEm(0x660fd6, r&7, d, b); asm_output("movq %d(%s),%s",(d),gpn(b),gpn(r)); } + inline void Assembler::SSE_STSS(I32 d, R b, R r) { count_st(); SSEm(0xf30f11, r&7, d, b); asm_output("movss %d(%s),%s",(d),gpn(b),gpn(r)); } + + inline void Assembler::SSE_CVTSI2SD(R xr, R gr) { count_fpu(); SSE(0xf20f2a, xr&7, gr&7); asm_output("cvtsi2sd %s,%s",gpn(xr),gpn(gr)); } + inline void Assembler::SSE_CVTSD2SI(R gr, R xr) { count_fpu(); SSE(0xf20f2d, gr&7, xr&7); asm_output("cvtsd2si %s,%s",gpn(gr),gpn(xr)); } + inline void Assembler::SSE_CVTSD2SS(R xr, R gr) { count_fpu(); SSE(0xf20f5a, xr&7, gr&7); asm_output("cvtsd2ss %s,%s",gpn(xr),gpn(gr)); } + inline void Assembler::SSE_CVTSS2SD(R xr, R gr) { count_fpu(); SSE(0xf30f5a, xr&7, gr&7); asm_output("cvtss2sd %s,%s",gpn(xr),gpn(gr)); } + inline void Assembler::SSE_CVTDQ2PD(R d, R r) { count_fpu(); SSE(0xf30fe6, d&7, r&7); asm_output("cvtdq2pd %s,%s",gpn(d),gpn(r)); } + + // Move and zero-extend GP reg to XMM reg. + inline void Assembler::SSE_MOVD(R d, R s) { + count_mov(); + if (_is_xmm_reg_(s)) { + NanoAssert(_is_gp_reg_(d)); + SSE(0x660f7e, s&7, d&7); + } else { + NanoAssert(_is_gp_reg_(s)); + NanoAssert(_is_xmm_reg_(d)); + SSE(0x660f6e, d&7, s&7); + } + asm_output("movd %s,%s",gpn(d),gpn(s)); + } + + inline void Assembler::SSE_MOVSD(R rd, R rs) { + count_mov(); + NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs)); + SSE(0xf20f10, rd&7, rs&7); + asm_output("movsd %s,%s",gpn(rd),gpn(rs)); + } + + inline void Assembler::SSE_MOVDm(R d, R b, R xrs) { + count_st(); + NanoAssert(_is_xmm_reg_(xrs) && (_is_gp_reg_(b) || b==FP)); + SSEm(0x660f7e, xrs&7, d, b); + asm_output("movd %d(%s),%s", d, gpn(b), gpn(xrs)); + } + + inline void Assembler::SSE_ADDSD(R rd, R rs) { + count_fpu(); + NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs)); + SSE(0xf20f58, rd&7, rs&7); + asm_output("addsd %s,%s",gpn(rd),gpn(rs)); + } + + inline void Assembler::SSE_ADDSDm(R r, const double* addr) { + count_fpuld(); + underrunProtect(8); + NanoAssert(_is_xmm_reg_(r)); + const double* daddr = addr; + IMM32(int32_t(daddr)); + *(--_nIns) = uint8_t((r&7)<<3 | 5); + *(--_nIns) = 0x58; + *(--_nIns) = 0x0f; + *(--_nIns) = 0xf2; + asm_output("addsd %s,%p // =%f",gpn(r),(void*)daddr,*daddr); + } + + inline void Assembler::SSE_SUBSD(R rd, R rs) { + count_fpu(); + NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs)); + SSE(0xf20f5c, rd&7, rs&7); + asm_output("subsd %s,%s",gpn(rd),gpn(rs)); + } + + inline void Assembler::SSE_MULSD(R rd, R rs) { + count_fpu(); + NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs)); + SSE(0xf20f59, rd&7, rs&7); + asm_output("mulsd %s,%s",gpn(rd),gpn(rs)); + } + + inline void Assembler::SSE_DIVSD(R rd, R rs) { + count_fpu(); + NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs)); + SSE(0xf20f5e, rd&7, rs&7); + asm_output("divsd %s,%s",gpn(rd),gpn(rs)); + } + + inline void Assembler::SSE_UCOMISD(R rl, R rr) { + count_fpu(); + NanoAssert(_is_xmm_reg_(rl) && _is_xmm_reg_(rr)); + SSE(0x660f2e, rl&7, rr&7); + asm_output("ucomisd %s,%s",gpn(rl),gpn(rr)); + } + + inline void Assembler::SSE_CVTSI2SDm(R xr, R d, R b) { + count_fpu(); + NanoAssert(_is_xmm_reg_(xr) && _is_gp_reg_(b)); + SSEm(0xf20f2a, xr&7, d, b); + asm_output("cvtsi2sd %s,%d(%s)",gpn(xr),d,gpn(b)); + } + + inline void Assembler::SSE_XORPD(R r, const uint32_t* maskaddr) { + count_fpuld(); + underrunProtect(8); + IMM32(int32_t(maskaddr)); + *(--_nIns) = uint8_t((r&7)<<3 | 5); + *(--_nIns) = 0x57; + *(--_nIns) = 0x0f; + *(--_nIns) = 0x66; + asm_output("xorpd %s,[%p]",gpn(r),(void*)maskaddr); + } + + inline void Assembler::SSE_XORPDr(R rd, R rs) { + count_fpu(); + SSE(0x660f57, rd&7, rs&7); + asm_output("xorpd %s,%s",gpn(rd),gpn(rs)); + } + + // floating point unit + inline void Assembler::FPUc(I32 o) { + underrunProtect(2); + *(--_nIns) = (uint8_t)(o & 0xff); + *(--_nIns) = (uint8_t)((o>>8) & 0xff); + } + + inline void Assembler::FPUm(I32 o, I32 d, R b) { + underrunProtect(7); + MODRMm(uint8_t(o), d, b); + *(--_nIns) = (uint8_t)(o>>8); + } + + inline void Assembler::FPUdm(I32 o, const double* const m) { + underrunProtect(6); + MODRMdm(uint8_t(o), int32_t(m)); + *(--_nIns) = uint8_t(o>>8); + } + + inline void Assembler::TEST_AH(I32 i) { + count_alu(); + underrunProtect(3); + *(--_nIns) = uint8_t(i); + *(--_nIns) = 0xc4; + *(--_nIns) = 0xf6; + asm_output("test ah, %d",i); + } + + inline void Assembler::TEST_AX(I32 i) { + count_fpu(); + underrunProtect(5); + *(--_nIns) = 0; + *(--_nIns) = uint8_t(i); + *(--_nIns) = uint8_t((i)>>8); + *(--_nIns) = 0; + *(--_nIns) = 0xa9; + asm_output("test ax, %d",i); + } + + inline void Assembler::FNSTSW_AX() { count_fpu(); FPUc(0xdfe0); asm_output("fnstsw_ax"); } + inline void Assembler::FCHS() { count_fpu(); FPUc(0xd9e0); asm_output("fchs"); } + inline void Assembler::FLD1() { count_fpu(); FPUc(0xd9e8); asm_output("fld1"); fpu_push(); } + inline void Assembler::FLDZ() { count_fpu(); FPUc(0xd9ee); asm_output("fldz"); fpu_push(); } + + inline void Assembler::FFREE(R r) { count_fpu(); FPU(0xddc0, r); asm_output("ffree %s",gpn(r)); } + + inline void Assembler::FST32(bool p, I32 d, R b){ count_stq(); FPUm(0xd902|p, d, b); asm_output("fst%s32 %d(%s)",(p?"p":""),d,gpn(b)); if (p) fpu_pop(); } + inline void Assembler::FSTQ(bool p, I32 d, R b) { count_stq(); FPUm(0xdd02|p, d, b); asm_output("fst%sq %d(%s)",(p?"p":""),d,gpn(b)); if (p) fpu_pop(); } + + inline void Assembler::FSTPQ(I32 d, R b) { FSTQ(1, d, b); } + + inline void Assembler::FCOM(bool p, I32 d, R b) { count_fpuld(); FPUm(0xdc02|p, d, b); asm_output("fcom%s %d(%s)",(p?"p":""),d,gpn(b)); if (p) fpu_pop(); } + inline void Assembler::FCOMdm(bool p, const double* dm) { + count_fpuld(); + FPUdm(0xdc02|p, dm); + asm_output("fcom%s (%p)",(p?"p":""),(void*)dm); + if (p) fpu_pop(); + } + + inline void Assembler::FLD32(I32 d, R b) { count_ldq(); FPUm(0xd900, d, b); asm_output("fld32 %d(%s)",d,gpn(b)); fpu_push();} + inline void Assembler::FLDQ(I32 d, R b) { count_ldq(); FPUm(0xdd00, d, b); asm_output("fldq %d(%s)",d,gpn(b)); fpu_push();} + inline void Assembler::FLDQdm(const double* dm) { count_ldq(); FPUdm(0xdd00, dm); asm_output("fldq (%p)",(void*)dm); fpu_push();} + inline void Assembler::FILDQ(I32 d, R b) { count_fpuld(); FPUm(0xdf05, d, b); asm_output("fildq %d(%s)",d,gpn(b)); fpu_push(); } + inline void Assembler::FILD(I32 d, R b) { count_fpuld(); FPUm(0xdb00, d, b); asm_output("fild %d(%s)",d,gpn(b)); fpu_push(); } + + inline void Assembler::FIST(bool p, I32 d, R b) { count_fpu(); FPUm(0xdb02|p, d, b); asm_output("fist%s %d(%s)",(p?"p":""),d,gpn(b)); if(p) fpu_pop(); } + + inline void Assembler::FADD( I32 d, R b) { count_fpu(); FPUm(0xdc00, d, b); asm_output("fadd %d(%s)", d,gpn(b)); } + inline void Assembler::FSUB( I32 d, R b) { count_fpu(); FPUm(0xdc04, d, b); asm_output("fsub %d(%s)", d,gpn(b)); } + inline void Assembler::FSUBR(I32 d, R b) { count_fpu(); FPUm(0xdc05, d, b); asm_output("fsubr %d(%s)",d,gpn(b)); } + inline void Assembler::FMUL( I32 d, R b) { count_fpu(); FPUm(0xdc01, d, b); asm_output("fmul %d(%s)", d,gpn(b)); } + inline void Assembler::FDIV( I32 d, R b) { count_fpu(); FPUm(0xdc06, d, b); asm_output("fdiv %d(%s)", d,gpn(b)); } + inline void Assembler::FDIVR(I32 d, R b) { count_fpu(); FPUm(0xdc07, d, b); asm_output("fdivr %d(%s)",d,gpn(b)); } + + inline void Assembler::FADDdm( const double *dm) { count_ldq(); FPUdm(0xdc00, dm); asm_output("fadd (%p)", (void*)dm); } + inline void Assembler::FSUBRdm(const double* dm) { count_ldq(); FPUdm(0xdc05, dm); asm_output("fsubr (%p)",(void*)dm); } + inline void Assembler::FMULdm( const double* dm) { count_ldq(); FPUdm(0xdc01, dm); asm_output("fmul (%p)", (void*)dm); } + inline void Assembler::FDIVRdm(const double* dm) { count_ldq(); FPUdm(0xdc07, dm); asm_output("fdivr (%p)",(void*)dm); } + + inline void Assembler::FINCSTP() { count_fpu(); FPUc(0xd9f7); asm_output("fincstp"); } + + inline void Assembler::FCOMP() { count_fpu(); FPUc(0xD8D9); asm_output("fcomp"); fpu_pop();} + inline void Assembler::FCOMPP() { count_fpu(); FPUc(0xDED9); asm_output("fcompp"); fpu_pop();fpu_pop();} + inline void Assembler::FLDr(R r) { count_ldq(); FPU(0xd9c0,r); asm_output("fld %s",gpn(r)); fpu_push(); } + inline void Assembler::EMMS() { count_fpu(); FPUc(0x0f77); asm_output("emms"); } + + // standard direct call + inline void Assembler::CALL(const CallInfo* ci) { + count_call(); + underrunProtect(5); + int offset = (ci->_address) - ((int)_nIns); + IMM32( (uint32_t)offset ); + *(--_nIns) = 0xE8; + verbose_only(asm_output("call %s",(ci->_name));) + debug_only(if (ci->returnType()==ARGTYPE_F) fpu_push();) + } + + // indirect call thru register + inline void Assembler::CALLr(const CallInfo* ci, Register r) { + count_calli(); + underrunProtect(2); + ALU(0xff, 2, (r)); + verbose_only(asm_output("call %s",gpn(r));) + debug_only(if (ci->returnType()==ARGTYPE_F) fpu_push();) + } + void Assembler::nInit(AvmCore*) { } @@ -122,7 +899,7 @@ namespace nanojit _epilogue = genEpilogue(); emitJumpTable(si, _epilogue); JMP_indirect(r); - LEAmi4(r, si->table, r); + LEAmi4(r, int32_t(si->table), r); } else { // If the guard already exists, use a simple jump. if (destKnown) { @@ -140,7 +917,7 @@ namespace nanojit // profiling for the exit verbose_only( if (_logc->lcbits & LC_FragProfile) { - INCLi( &guard->record()->profCount ); + INCLi( int32_t(&guard->record()->profCount) ); } ) @@ -1910,7 +2687,7 @@ namespace nanojit verbose_only( void Assembler::asm_inc_m32(uint32_t* pCtr) { - INCLi(pCtr); + INCLi(int32_t(pCtr)); } ) diff --git a/js/src/nanojit/Nativei386.h b/js/src/nanojit/Nativei386.h index 5bf19769fa30..017b0e7ca6f0 100644 --- a/js/src/nanojit/Nativei386.h +++ b/js/src/nanojit/Nativei386.h @@ -112,7 +112,7 @@ namespace nanojit typedef uint8_t NIns; - // Bytes of icache to flush after Assembler::patch + // Bytes of icache to flush after patch const size_t LARGEST_BRANCH_PATCH = 16 * sizeof(NIns); // These are used as register numbers in various parts of the code @@ -191,801 +191,255 @@ namespace nanojit void asm_cmp(LIns *cond); \ void asm_div_mod(LIns *cond); \ void asm_load(int d, Register r); \ - void asm_immf(Register r, uint64_t q, double d, bool canClobberCCs); - - #define IMM8(i) \ - _nIns -= 1; \ - *((int8_t*)_nIns) = (int8_t)(i) - - #define IMM16(i) \ - _nIns -= 2; \ - *((int16_t*)_nIns) = (int16_t)(i) - -#define IMM32(i) \ - _nIns -= 4; \ - *((int32_t*)_nIns) = (int32_t)(i) - -// XXX rearrange NanoAssert() expression to workaround apparent gcc 4.3 bug: -// XXX "error: logical && with non-zero constant will always evaluate as true" -// underrunProtect(6) is necessary for worst-case -#define MODRMs(r,d,b,l,i) \ - NanoAssert(unsigned(i)<8 && unsigned(b)<8 && unsigned(r)<8); \ - if ((d) == 0 && (b) != EBP) { \ + void asm_immf(Register r, uint64_t q, double d, bool canClobberCCs); \ + void IMM8(int32_t i) { \ + _nIns -= 1; \ + *((int8_t*)_nIns) = (int8_t)(i); \ + }; \ + void IMM16(int32_t i) { \ _nIns -= 2; \ - _nIns[0] = (uint8_t) ( 0<<6 | (r)<<3 | 4); \ - _nIns[1] = (uint8_t) ((l)<<6 | (i)<<3 | (b)); \ - } else if (isS8(d)) { \ - _nIns -= 3; \ - _nIns[0] = (uint8_t) ( 1<<6 | (r)<<3 | 4 ); \ - _nIns[1] = (uint8_t) ( (l)<<6 | (i)<<3 | (b) ); \ - _nIns[2] = (uint8_t) (d); \ - } else { \ - IMM32(d); \ - *(--_nIns) = (uint8_t) ( (l)<<6 | (i)<<3 | (b) ); \ - *(--_nIns) = (uint8_t) ( 2<<6 | (r)<<3 | 4 ); \ - } - -// underrunProtect(6) is necessary for worst-case -#define MODRMm(r,d,b) \ - NanoAssert(unsigned(r)<8 && ((b)==UnspecifiedReg || unsigned(b)<8)); \ - if ((b) == UnspecifiedReg) {\ - IMM32(d);\ - *(--_nIns) = (uint8_t) (0<<6 | (r)<<3 | 5);\ - } else if ((b) == ESP) { \ - MODRMs(r, d, b, 0, (Register)4); \ - } \ - else if ( (d) == 0 && (b) != EBP) { \ - *(--_nIns) = (uint8_t) ( 0<<6 | (r)<<3 | (b) ); \ - } else if (isS8(d)) { \ - *(--_nIns) = (uint8_t) (d); \ - *(--_nIns) = (uint8_t) ( 1<<6 | (r)<<3 | (b) ); \ - } else { \ - IMM32(d); \ - *(--_nIns) = (uint8_t) ( 2<<6 | (r)<<3 | (b) ); \ - } - -#define MODRMSIB(reg,base,index,scale,disp) \ - if (disp != 0 || base == EBP) { \ - if (isS8(disp)) { \ - *(--_nIns) = int8_t(disp); \ - } else { \ - IMM32(disp); \ - } \ - } \ - *(--_nIns) = uint8_t((scale)<<6|(index)<<3|(base)); \ - if (disp == 0 && base != EBP) { \ - *(--_nIns) = uint8_t(((reg)<<3)|4); \ - } else { \ - if (isS8(disp)) \ - *(--_nIns) = uint8_t((1<<6)|(reg<<3)|4); \ - else \ - *(--_nIns) = uint8_t((2<<6)|(reg<<3)|4); \ - } - -#define MODRMdm(r,addr) \ - NanoAssert(unsigned(r)<8); \ - IMM32(addr); \ - *(--_nIns) = (uint8_t)( (r)<<3 | 5 ); - -#define MODRM(d,s) \ - NanoAssert(((unsigned)(d))<8 && ((unsigned)(s))<8); \ - *(--_nIns) = (uint8_t) ( 3<<6|(d)<<3|(s) ) - -#define ALU0(o) \ - underrunProtect(1);\ - *(--_nIns) = (uint8_t) (o) - -#define ALUm(c,r,d,b) \ - underrunProtect(8); \ - MODRMm(r,d,b); \ - *(--_nIns) = uint8_t(c) - -#define ALUdm(c,r,addr) \ - underrunProtect(6); \ - MODRMdm(r,addr); \ - *(--_nIns) = uint8_t(c) - -#define ALUsib(c,r,base,index,scale,disp) \ - underrunProtect(7); \ - MODRMSIB(r,base,index,scale,disp); \ - *(--_nIns) = uint8_t(c) - -#define ALUm16(c,r,d,b) \ - underrunProtect(9); \ - MODRMm(r,d,b); \ - *(--_nIns) = uint8_t(c);\ - *(--_nIns) = 0x66 - -#define ALU2dm(c,r,addr) \ - underrunProtect(7); \ - MODRMdm(r,addr); \ - *(--_nIns) = (uint8_t) (c);\ - *(--_nIns) = (uint8_t) ((c)>>8) - -#define ALU2m(c,r,d,b) \ - underrunProtect(9); \ - MODRMm(r,d,b); \ - *(--_nIns) = (uint8_t) (c);\ - *(--_nIns) = (uint8_t) ((c)>>8) - -#define ALU2sib(c,r,base,index,scale,disp) \ - underrunProtect(8); \ - MODRMSIB(r,base,index,scale,disp); \ - *(--_nIns) = (uint8_t) (c); \ - *(--_nIns) = (uint8_t) ((c)>>8) - -#define ALU(c,d,s) \ - underrunProtect(2);\ - MODRM(d,s); \ - *(--_nIns) = (uint8_t) (c) - -#define ALUi(c,r,i) \ - underrunProtect(6); \ - NanoAssert(unsigned(r)<8);\ - if (isS8(i)) { \ - *(--_nIns) = uint8_t(i); \ - MODRM((c>>3),(r)); \ - *(--_nIns) = uint8_t(0x83); \ - } else { \ - IMM32(i); \ - if ( (r) == EAX) { \ - *(--_nIns) = (uint8_t) (c); \ + *((int16_t*)_nIns) = (int16_t)(i); \ + }; \ + void IMM32(int32_t i) { \ + _nIns -= 4; \ + *((int32_t*)_nIns) = (int32_t)(i); \ + }; \ + void MODRMs(int32_t r, int32_t d, Register b, int32_t l, int32_t i); \ + void MODRMm(int32_t r, int32_t d, Register b); \ + void MODRMSIB(Register reg, Register base, int32_t index, int32_t scale, int32_t disp); \ + void MODRMdm(int32_t r, int32_t addr); \ + void MODRM(int32_t d, int32_t s) { \ + NanoAssert((unsigned(d))<8 && (unsigned(s))<8); \ + *(--_nIns) = (uint8_t) ( 3<<6 | d<<3 | s ); \ + }; \ + void ALU0(int32_t o); \ + void ALUm(int32_t c, int32_t r, int32_t d, Register b); \ + void ALUdm(int32_t c, int32_t r, int32_t addr); \ + void ALUsib(int32_t c, Register r, Register base, int32_t index, int32_t scale, int32_t disp); \ + void ALUm16(int32_t c, int32_t r, int32_t d, Register b); \ + void ALU2dm(int32_t c, int32_t r, int32_t addr); \ + void ALU2m(int32_t c, int32_t r, int32_t d, Register b); \ + void ALU2sib(int32_t c, Register r, Register base, int32_t index, int32_t scale, int32_t disp); \ + void ALU(int32_t c, int32_t d, int32_t s) { \ + underrunProtect(2); \ + MODRM(d,s); \ + *(--_nIns) = uint8_t(c); \ + }; \ + void ALUi(int32_t c, int32_t r, int32_t i); \ + void ALUmi(int32_t c, int32_t d, Register b, int32_t i); \ + void ALU2(int32_t c, int32_t d, int32_t s); \ + void LAHF(); \ + void SAHF(); \ + void OR(Register l, Register r); \ + void AND(Register l, Register r); \ + void XOR(Register l, Register r); \ + void ADD(Register l, Register r); \ + void SUB(Register l, Register r); \ + void MUL(Register l, Register r); \ + void DIV(Register r); \ + void NOT(Register r); \ + void NEG(Register r); \ + void SHR(Register r, Register s); \ + void SAR(Register r, Register s); \ + void SHL(Register r, Register s); \ + void SHIFT(int32_t c, Register r, int32_t i); \ + void SHLi(Register r, int32_t i); \ + void SHRi(Register r, int32_t i); \ + void SARi(Register r, int32_t i); \ + void MOVZX8(Register d, Register s); \ + void SUBi(Register r, int32_t i); \ + void ADDi(Register r, int32_t i); \ + void ANDi(Register r, int32_t i); \ + void ORi(Register r, int32_t i); \ + void XORi(Register r, int32_t i); \ + void ADDmi(int32_t d, Register b, int32_t i); \ + void TEST(Register d, Register s); \ + void CMP(Register l, Register r); \ + void CMPi(Register r, int32_t i); \ + void MR(Register d, Register s) { \ + count_mov(); \ + ALU(0x8b,d,s); \ + asm_output("mov %s,%s",gpn(d),gpn(s)); \ + }; \ + void LEA(Register r, int32_t d, Register b); \ + void LEAmi4(Register r, int32_t d, int32_t i); \ + void CDQ(); \ + void INCLi(int32_t p); \ + void SETE( Register r); \ + void SETNP(Register r); \ + void SETL( Register r); \ + void SETLE(Register r); \ + void SETG( Register r); \ + void SETGE(Register r); \ + void SETB( Register r); \ + void SETBE(Register r); \ + void SETA( Register r); \ + void SETAE(Register r); \ + void SETO( Register r); \ + void MREQ(Register d, Register s); \ + void MRNE(Register d, Register s); \ + void MRL( Register d, Register s); \ + void MRLE(Register d, Register s); \ + void MRG( Register d, Register s); \ + void MRGE(Register d, Register s); \ + void MRB( Register d, Register s); \ + void MRBE(Register d, Register s); \ + void MRA( Register d, Register s); \ + void MRAE(Register d, Register s); \ + void MRNO(Register d, Register s); \ + void LD(Register reg, int32_t disp, Register base); \ + void LDdm(Register reg, int32_t addr); \ + void LDsib(Register reg, int32_t disp, Register base, int32_t index, int32_t scale); \ + void LD16S(Register r, int32_t d, Register b); \ + void LD16Sdm(Register r, int32_t addr); \ + void LD16Ssib(Register r, int32_t disp, Register base, int32_t index, int32_t scale); \ + void LD16Z(Register r, int32_t d, Register b); \ + void LD16Zdm(Register r, int32_t addr); \ + void LD16Zsib(Register r, int32_t disp, Register base, int32_t index, int32_t scale); \ + void LD8Z(Register r, int32_t d, Register b); \ + void LD8Zdm(Register r, int32_t addr); \ + void LD8Zsib(Register r, int32_t disp, Register base, int32_t ndex, int32_t scale); \ + void LD8S(Register r, int32_t d, Register b); \ + void LD8Sdm(Register r, int32_t addr); \ + void LD8Ssib(Register r, int32_t disp, Register base, int32_t index, int32_t scale); \ + void LDi(Register r, int32_t i); \ + void ST8(Register base, int32_t disp, Register reg); \ + void ST16(Register base, int32_t disp, Register reg); \ + void ST(Register base, int32_t disp, Register reg); \ + void ST8i(Register base, int32_t disp, int32_t imm); \ + void ST16i(Register base, int32_t disp, int32_t imm); \ + void STi(Register base, int32_t disp, int32_t imm); \ + void RET(); \ + void NOP(); \ + void INT3(); \ + void PUSHi(int32_t i); \ + void PUSHi32(int32_t i); \ + void PUSHr(Register r); \ + void PUSHm(int32_t d, Register b); \ + void POPr(Register r); \ + void JCC(int32_t o, NIns* t, const char* n); \ + void JMP_long(NIns* t); \ + void JMP(NIns* t) { \ + count_jmp(); \ + underrunProtect(5); \ + intptr_t tt = (intptr_t)t - (intptr_t)_nIns; \ + if (isS8(tt)) { \ + verbose_only( NIns* next = _nIns; (void)next; ) \ + _nIns -= 2; \ + _nIns[0] = /*JMP8*/0xeb; \ + _nIns[1] = uint8_t(tt & 0xff); \ + asm_output("jmp %p",next+tt); \ } else { \ - MODRM((c>>3),(r)); \ - *(--_nIns) = uint8_t(0x81); \ + JMP_long_nochk_offset(tt); \ } \ - } - -#define ALUmi(c,d,b,i) \ - underrunProtect(10); \ - NanoAssert(((unsigned)b)<8); \ - if (isS8(i)) { \ - *(--_nIns) = uint8_t(i); \ - MODRMm((c>>3),(d),(b)); \ - *(--_nIns) = uint8_t(0x83); \ - } else { \ - IMM32(i); \ - MODRMm((c>>3),(d),(b)); \ - *(--_nIns) = uint8_t(0x81); \ - } - -#define ALU2(c,d,s) \ - underrunProtect(3); \ - MODRM((d),(s)); \ - _nIns -= 2; \ - _nIns[0] = (uint8_t) ( ((c)>>8) ); \ - _nIns[1] = (uint8_t) ( (c) ) - -#define LAHF() do { count_alu(); ALU0(0x9F); asm_output("lahf"); } while(0) -#define SAHF() do { count_alu(); ALU0(0x9E); asm_output("sahf"); } while(0) -#define OR(l,r) do { count_alu(); ALU(0x0b, (l),(r)); asm_output("or %s,%s",gpn(l),gpn(r)); } while(0) -#define AND(l,r) do { count_alu(); ALU(0x23, (l),(r)); asm_output("and %s,%s",gpn(l),gpn(r)); } while(0) -#define XOR(l,r) do { count_alu(); ALU(0x33, (l),(r)); asm_output("xor %s,%s",gpn(l),gpn(r)); } while(0) -#define ADD(l,r) do { count_alu(); ALU(0x03, (l),(r)); asm_output("add %s,%s",gpn(l),gpn(r)); } while(0) -#define SUB(l,r) do { count_alu(); ALU(0x2b, (l),(r)); asm_output("sub %s,%s",gpn(l),gpn(r)); } while(0) -#define MUL(l,r) do { count_alu(); ALU2(0x0faf,(l),(r)); asm_output("mul %s,%s",gpn(l),gpn(r)); } while(0) -#define DIV(r) do { count_alu(); ALU(0xf7, (Register)7,(r)); asm_output("idiv edx:eax, %s",gpn(r)); } while(0) -#define NOT(r) do { count_alu(); ALU(0xf7, (Register)2,(r)); asm_output("not %s",gpn(r)); } while(0) -#define NEG(r) do { count_alu(); ALU(0xf7, (Register)3,(r)); asm_output("neg %s",gpn(r)); } while(0) -#define SHR(r,s) do { count_alu(); ALU(0xd3, (Register)5,(r)); asm_output("shr %s,%s",gpn(r),gpn(s)); } while(0) -#define SAR(r,s) do { count_alu(); ALU(0xd3, (Register)7,(r)); asm_output("sar %s,%s",gpn(r),gpn(s)); } while(0) -#define SHL(r,s) do { count_alu(); ALU(0xd3, (Register)4,(r)); asm_output("shl %s,%s",gpn(r),gpn(s)); } while(0) - -#define SHIFT(c,r,i) \ - underrunProtect(3);\ - *--_nIns = (uint8_t)(i);\ - MODRM((Register)c,r);\ - *--_nIns = 0xc1; - -#define SHLi(r,i) do { count_alu(); SHIFT(4,r,i); asm_output("shl %s,%d", gpn(r),i); } while(0) -#define SHRi(r,i) do { count_alu(); SHIFT(5,r,i); asm_output("shr %s,%d", gpn(r),i); } while(0) -#define SARi(r,i) do { count_alu(); SHIFT(7,r,i); asm_output("sar %s,%d", gpn(r),i); } while(0) - -#define MOVZX8(d,s) do { count_alu(); ALU2(0x0fb6,d,s); asm_output("movzx %s,%s", gpn(d),gpn(s)); } while(0) - -#define SUBi(r,i) do { count_alu(); ALUi(0x2d,r,i); asm_output("sub %s,%d",gpn(r),i); } while(0) -#define ADDi(r,i) do { count_alu(); ALUi(0x05,r,i); asm_output("add %s,%d",gpn(r),i); } while(0) -#define ANDi(r,i) do { count_alu(); ALUi(0x25,r,i); asm_output("and %s,%d",gpn(r),i); } while(0) -#define ORi(r,i) do { count_alu(); ALUi(0x0d,r,i); asm_output("or %s,%d",gpn(r),i); } while(0) -#define XORi(r,i) do { count_alu(); ALUi(0x35,r,i); asm_output("xor %s,%d",gpn(r),i); } while(0) - -#define ADDmi(d,b,i) do { count_alust(); ALUmi(0x05, d, b, i); asm_output("add %d(%s), %d", d, gpn(b), i); } while(0) - -#define TEST(d,s) do { count_alu(); ALU(0x85,d,s); asm_output("test %s,%s",gpn(d),gpn(s)); } while(0) -#define CMP(l,r) do { count_alu(); ALU(0x3b, (l),(r)); asm_output("cmp %s,%s",gpn(l),gpn(r)); } while(0) -#define CMPi(r,i) do { count_alu(); ALUi(0x3d,r,i); asm_output("cmp %s,%d",gpn(r),i); } while(0) - -#define MR(d,s) do { count_mov(); ALU(0x8b,d,s); asm_output("mov %s,%s",gpn(d),gpn(s)); } while(0) -#define LEA(r,d,b) do { count_alu(); ALUm(0x8d, r,d,b); asm_output("lea %s,%d(%s)",gpn(r),d,gpn(b)); } while(0) -// lea %r, d(%i*4) -// This addressing mode is not supported by the MODRMSIB macro. -#define LEAmi4(r,d,i) do { count_alu(); IMM32(d); *(--_nIns) = (2<<6)|((uint8_t)i<<3)|5; *(--_nIns) = (0<<6)|((uint8_t)r<<3)|4; *(--_nIns) = 0x8d; asm_output("lea %s, %p(%s*4)", gpn(r), (void*)d, gpn(i)); } while(0) - -#define CDQ() do { SARi(EDX, 31); MR(EDX, EAX); } while(0) - -#define INCLi(p) do { count_alu(); \ - underrunProtect(6); \ - IMM32((uint32_t)(ptrdiff_t)p); *(--_nIns) = 0x05; *(--_nIns) = 0xFF; \ - asm_output("incl (%p)", (void*)p); } while (0) - -#define SETE(r) do { count_alu(); ALU2(0x0f94,(r),(r)); asm_output("sete %s",gpn(r)); } while(0) -#define SETNP(r) do { count_alu(); ALU2(0x0f9B,(r),(r)); asm_output("setnp %s",gpn(r)); } while(0) -#define SETL(r) do { count_alu(); ALU2(0x0f9C,(r),(r)); asm_output("setl %s",gpn(r)); } while(0) -#define SETLE(r) do { count_alu(); ALU2(0x0f9E,(r),(r)); asm_output("setle %s",gpn(r)); } while(0) -#define SETG(r) do { count_alu(); ALU2(0x0f9F,(r),(r)); asm_output("setg %s",gpn(r)); } while(0) -#define SETGE(r) do { count_alu(); ALU2(0x0f9D,(r),(r)); asm_output("setge %s",gpn(r)); } while(0) -#define SETB(r) do { count_alu(); ALU2(0x0f92,(r),(r)); asm_output("setb %s",gpn(r)); } while(0) -#define SETBE(r) do { count_alu(); ALU2(0x0f96,(r),(r)); asm_output("setbe %s",gpn(r)); } while(0) -#define SETA(r) do { count_alu(); ALU2(0x0f97,(r),(r)); asm_output("seta %s",gpn(r)); } while(0) -#define SETAE(r) do { count_alu(); ALU2(0x0f93,(r),(r)); asm_output("setae %s",gpn(r)); } while(0) -#define SETO(r) do { count_alu(); ALU2(0x0f92,(r),(r)); asm_output("seto %s",gpn(r)); } while(0) - -#define MREQ(dr,sr) do { count_alu(); ALU2(0x0f44,dr,sr); asm_output("cmove %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRNE(dr,sr) do { count_alu(); ALU2(0x0f45,dr,sr); asm_output("cmovne %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRL(dr,sr) do { count_alu(); ALU2(0x0f4C,dr,sr); asm_output("cmovl %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRLE(dr,sr) do { count_alu(); ALU2(0x0f4E,dr,sr); asm_output("cmovle %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRG(dr,sr) do { count_alu(); ALU2(0x0f4F,dr,sr); asm_output("cmovg %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRGE(dr,sr) do { count_alu(); ALU2(0x0f4D,dr,sr); asm_output("cmovge %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRB(dr,sr) do { count_alu(); ALU2(0x0f42,dr,sr); asm_output("cmovb %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRBE(dr,sr) do { count_alu(); ALU2(0x0f46,dr,sr); asm_output("cmovbe %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRA(dr,sr) do { count_alu(); ALU2(0x0f47,dr,sr); asm_output("cmova %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRAE(dr,sr) do { count_alu(); ALU2(0x0f43,dr,sr); asm_output("cmovae %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRNO(dr,sr) do { count_alu(); ALU2(0x0f41,dr,sr); asm_output("cmovno %s,%s", gpn(dr),gpn(sr)); } while(0) - -// these aren't currently used but left in for reference -//#define LDEQ(r,d,b) do { ALU2m(0x0f44,r,d,b); asm_output("cmove %s,%d(%s)", gpn(r),d,gpn(b)); } while(0) -//#define LDNEQ(r,d,b) do { ALU2m(0x0f45,r,d,b); asm_output("cmovne %s,%d(%s)", gpn(r),d,gpn(b)); } while(0) - -#define LD(reg,disp,base) do { \ - count_ld();\ - ALUm(0x8b,reg,disp,base); \ - asm_output("mov %s,%d(%s)",gpn(reg),disp,gpn(base)); } while(0) - -#define LDdm(reg,addr) do { \ - count_ld(); \ - ALUdm(0x8b,reg,addr); \ - asm_output("mov %s,0(%lx)",gpn(reg),(unsigned long)addr); \ - } while (0) - - -#define SIBIDX(n) "1248"[n] - -#define LDsib(reg,disp,base,index,scale) do { \ - count_ld(); \ - ALUsib(0x8b,reg,base,index,scale,disp); \ - asm_output("mov %s,%d(%s+%s*%c)",gpn(reg),disp,gpn(base),gpn(index),SIBIDX(scale)); \ - } while (0) - -// note: movzx/movsx are being output with an 8/16 suffix to indicate the size -// being loaded. this doesn't really match standard intel format (though is arguably -// terser and more obvious in this case) and would probably be nice to fix. -// (likewise, the 8/16 bit stores being output as "mov8" and "mov16" respectively.) - -// load 16-bit, sign extend -#define LD16S(r,d,b) do { count_ld(); ALU2m(0x0fbf,r,d,b); asm_output("movsx16 %s,%d(%s)", gpn(r),d,gpn(b)); } while(0) - -#define LD16Sdm(r,addr) do { count_ld(); ALU2dm(0x0fbf,r,addr); asm_output("movsx16 %s,0(%lx)", gpn(r),(unsigned long)addr); } while (0) - -#define LD16Ssib(r,disp,base,index,scale) do { \ - count_ld(); \ - ALU2sib(0x0fbf,r,base,index,scale,disp); \ - asm_output("movsx16 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale)); \ - } while (0) - -// load 16-bit, zero extend -#define LD16Z(r,d,b) do { count_ld(); ALU2m(0x0fb7,r,d,b); asm_output("movzx16 %s,%d(%s)", gpn(r),d,gpn(b)); } while(0) - -#define LD16Zdm(r,addr) do { count_ld(); ALU2dm(0x0fb7,r,addr); asm_output("movzx16 %s,0(%lx)", gpn(r),(unsigned long)addr); } while (0) - -#define LD16Zsib(r,disp,base,index,scale) do { \ - count_ld(); \ - ALU2sib(0x0fb7,r,base,index,scale,disp); \ - asm_output("movzx16 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale)); \ - } while (0) - -// load 8-bit, zero extend -#define LD8Z(r,d,b) do { count_ld(); ALU2m(0x0fb6,r,d,b); asm_output("movzx8 %s,%d(%s)", gpn(r),d,gpn(b)); } while(0) - -#define LD8Zdm(r,addr) do { \ - count_ld(); \ - ALU2dm(0x0fb6,r,addr); \ - asm_output("movzx8 %s,0(%lx)", gpn(r),(long unsigned)addr); \ - } while(0) - -#define LD8Zsib(r,disp,base,index,scale) do { \ - count_ld(); \ - ALU2sib(0x0fb6,r,base,index,scale,disp); \ - asm_output("movzx8 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale)); \ - } while(0) - -// load 8-bit, sign extend -#define LD8S(r,d,b) do { count_ld(); ALU2m(0x0fbe,r,d,b); asm_output("movsx8 %s,%d(%s)", gpn(r),d,gpn(b)); } while(0) - -#define LD8Sdm(r,addr) do { \ - count_ld(); \ - ALU2dm(0x0fbe,r,addr); \ - asm_output("movsx8 %s,0(%lx)", gpn(r),(long unsigned)addr); \ - } while(0) - -#define LD8Ssib(r,disp,base,index,scale) do { \ - count_ld(); \ - ALU2sib(0x0fbe,r,base,index,scale,disp); \ - asm_output("movsx8 %s,%d(%s+%s*%c)",gpn(r),disp,gpn(base),gpn(index),SIBIDX(scale)); \ - } while(0) - -#define LDi(r,i) do { \ - count_ld();\ - underrunProtect(5); \ - IMM32(i); \ - NanoAssert(((unsigned)r)<8); \ - *(--_nIns) = (uint8_t) (0xb8 | (r) ); \ - asm_output("mov %s,%d",gpn(r),i); } while(0) - -// quirk of x86-32: reg must be a/b/c/d for byte stores here -#define ST8(base,disp,reg) do { \ - count_st();\ - NanoAssert(((unsigned)reg)<4); \ - ALUm(0x88,reg,disp,base); \ - asm_output("mov8 %d(%s),%s",disp,base==UnspecifiedReg?"0":gpn(base),gpn(reg)); } while(0) - -#define ST16(base,disp,reg) do { \ - count_st();\ - ALUm16(0x89,reg,disp,base); \ - asm_output("mov16 %d(%s),%s",disp,base==UnspecifiedReg?"0":gpn(base),gpn(reg)); } while(0) - -#define ST(base,disp,reg) do { \ - count_st();\ - ALUm(0x89,reg,disp,base); \ - asm_output("mov %d(%s),%s",disp,base==UnspecifiedReg?"0":gpn(base),gpn(reg)); } while(0) - -#define ST8i(base,disp,imm) do { \ - count_st();\ - underrunProtect(8); \ - IMM8(imm); \ - MODRMm(0, disp, base); \ - *(--_nIns) = 0xc6; \ - asm_output("mov8 %d(%s),%d",disp,gpn(base),imm); } while(0) - -#define ST16i(base,disp,imm) do { \ - count_st();\ - underrunProtect(10); \ - IMM16(imm); \ - MODRMm(0, disp, base); \ - *(--_nIns) = 0xc7; \ - *(--_nIns) = 0x66; \ - asm_output("mov16 %d(%s),%d",disp,gpn(base),imm); } while(0) - -#define STi(base,disp,imm) do { \ - count_st();\ - underrunProtect(11); \ - IMM32(imm); \ - MODRMm(0, disp, base); \ - *(--_nIns) = 0xc7; \ - asm_output("mov %d(%s),%d",disp,gpn(base),imm); } while(0) - -#define RET() do { count_ret(); ALU0(0xc3); asm_output("ret"); } while(0) -#define NOP() do { count_alu(); ALU0(0x90); asm_output("nop"); } while(0) -#define INT3() do { ALU0(0xcc); asm_output("int3"); } while(0) - -#define PUSHi(i) do { \ - count_push();\ - if (isS8(i)) { \ - underrunProtect(2); \ - _nIns-=2; _nIns[0] = 0x6a; _nIns[1] = (uint8_t)(i); \ - asm_output("push %d",i); \ - } else \ - { PUSHi32(i); } } while(0) - -#define PUSHi32(i) do { \ - count_push();\ - underrunProtect(5); \ - IMM32(i); \ - *(--_nIns) = 0x68; \ - asm_output("push %d",i); } while(0) - -#define PUSHr(r) do { \ - count_push();\ - underrunProtect(1); \ - NanoAssert(((unsigned)r)<8); \ - *(--_nIns) = (uint8_t) ( 0x50 | (r) ); \ - asm_output("push %s",gpn(r)); } while(0) - -#define PUSHm(d,b) do { \ - count_pushld();\ - ALUm(0xff, 6, d, b); \ - asm_output("push %d(%s)",d,gpn(b)); } while(0) - -#define POPr(r) do { \ - count_pop();\ - underrunProtect(1); \ - NanoAssert(((unsigned)r)<8); \ - *(--_nIns) = (uint8_t) ( 0x58 | (r) ); \ - asm_output("pop %s",gpn(r)); } while(0) - -#define JCC32 0x0f -#define JMP8 0xeb -#define JMP32 0xe9 - -#define JCC(o,t,n) do { \ - count_jcc();\ - underrunProtect(6); \ - intptr_t tt = (intptr_t)t - (intptr_t)_nIns; \ - if (isS8(tt)) { \ - verbose_only( NIns* next = _nIns; (void)next; ) \ - _nIns -= 2; \ - _nIns[0] = (uint8_t) ( 0x70 | (o) ); \ - _nIns[1] = (uint8_t) (tt); \ - asm_output("%-5s %p",(n),(next+tt)); \ - } else { \ - verbose_only( NIns* next = _nIns; ) \ - IMM32(tt); \ - _nIns -= 2; \ - _nIns[0] = JCC32; \ - _nIns[1] = (uint8_t) ( 0x80 | (o) ); \ - asm_output("%-5s %p",(n),(next+tt)); \ - } } while(0) - -#define JMP_long(t) do { \ - count_jmp();\ - underrunProtect(5); \ - intptr_t tt = (intptr_t)t - (intptr_t)_nIns; \ - JMP_long_nochk_offset(tt); \ - verbose_only( verbose_outputf("%010lx:", (unsigned long)_nIns); ) \ - } while(0) - -#define JMP(t) do { \ - count_jmp();\ - underrunProtect(5); \ - intptr_t tt = (intptr_t)t - (intptr_t)_nIns; \ - if (isS8(tt)) { \ - verbose_only( NIns* next = _nIns; (void)next; ) \ - _nIns -= 2; \ - _nIns[0] = JMP8; \ - _nIns[1] = (uint8_t) ( (tt)&0xff ); \ - asm_output("jmp %p",(next+tt)); \ - } else { \ - JMP_long_nochk_offset(tt); \ - } } while(0) - -// this should only be used when you can guarantee there is enough room on the page -#define JMP_long_nochk_offset(o) do {\ - verbose_only( NIns* next = _nIns; (void)next; ) \ - IMM32((o)); \ - *(--_nIns) = JMP32; \ - asm_output("jmp %p",(next+(o))); } while(0) - -#define JMP_indirect(r) do { \ - underrunProtect(2); \ - MODRMm(4, 0, r); \ - *(--_nIns) = 0xff; \ - asm_output("jmp *(%s)", gpn(r)); } while (0) - -#define JMP_indexed(x, ss, addr) do { \ - underrunProtect(7); \ - IMM32(addr); \ - _nIns -= 3;\ - _nIns[0] = (NIns) 0xff; /* jmp */ \ - _nIns[1] = (NIns) (0<<6 | 4<<3 | 4); /* modrm: base=sib + disp32 */ \ - _nIns[2] = (NIns) ((ss)<<6 | (x)<<3 | 5); /* sib: x<>16)&0xff); \ - _nIns[1] = (uint8_t)(((c)>>8)&0xff); \ - _nIns[2] = (uint8_t)((c)&0xff) - -#define SSEm(c,r,d,b) \ - underrunProtect(9); \ - MODRMm((r),(d),(b)); \ - _nIns -= 3; \ - _nIns[0] = (uint8_t)(((c)>>16)&0xff); \ - _nIns[1] = (uint8_t)(((c)>>8)&0xff); \ - _nIns[2] = (uint8_t)((c)&0xff) - -#define LDSD(r,d,b)do { \ - count_ldq();\ - SSEm(0xf20f10, (r)&7, (d), (b)); \ - asm_output("movsd %s,%d(%s)",gpn(r),(d),gpn(b)); \ - } while(0) - -#define LDSDm(r,addr)do { \ - count_ldq();\ - underrunProtect(8); \ - const double* daddr = addr; \ - IMM32(int32_t(daddr));\ - *(--_nIns) = uint8_t(((r)&7)<<3|5); \ - *(--_nIns) = 0x10;\ - *(--_nIns) = 0x0f;\ - *(--_nIns) = 0xf2;\ - asm_output("movsd %s,(%p) // =%f",gpn(r),(void*)daddr,*daddr); \ - } while(0) - -#define STSD(d,b,r)do { \ - count_stq();\ - SSEm(0xf20f11, (r)&7, (d), (b)); \ - asm_output("movsd %d(%s),%s",(d),gpn(b),gpn(r)); \ - } while(0) - -#define SSE_LDQ(r,d,b)do { \ - count_ldq();\ - SSEm(0xf30f7e, (r)&7, (d), (b)); \ - asm_output("movq %s,%d(%s)",gpn(r),d,gpn(b)); \ - } while(0) - -#define SSE_STQ(d,b,r)do { \ - count_stq();\ - SSEm(0x660fd6, (r)&7, (d), (b)); \ - asm_output("movq %d(%s),%s",(d),gpn(b),gpn(r)); \ - } while(0) - -#define SSE_LDSS(r,d,b)do { \ - count_ld();\ - SSEm(0xf30f10, (r)&7, (d), (b)); \ - asm_output("movss %s,%d(%s)",gpn(r),d,gpn(b)); \ - } while(0) - -#define SSE_STSS(d,b,r)do { \ - count_st();\ - SSEm(0xf30f11, (r)&7, (d), (b)); \ - asm_output("movss %d(%s),%s",(d),gpn(b),gpn(r)); \ - } while(0) - -#define SSE_CVTSI2SD(xr,gr) do{ \ - count_fpu();\ - SSE(0xf20f2a, (xr)&7, (gr)&7); \ - asm_output("cvtsi2sd %s,%s",gpn(xr),gpn(gr)); \ - } while(0) - - #define SSE_CVTSD2SI(gr,xr) do{ \ - count_fpu();\ - SSE(0xf20f2d, (gr)&7, (xr)&7); \ - asm_output("cvtsd2si %s,%s",gpn(gr),gpn(xr)); \ - } while(0) - -#define SSE_CVTSD2SS(xr,gr) do{ \ - count_fpu();\ - SSE(0xf20f5a, (xr)&7, (gr)&7); \ - asm_output("cvtsd2ss %s,%s",gpn(xr),gpn(gr)); \ - } while(0) - -#define SSE_CVTSS2SD(xr,gr) do{ \ - count_fpu();\ - SSE(0xf30f5a, (xr)&7, (gr)&7); \ - asm_output("cvtss2sd %s,%s",gpn(xr),gpn(gr)); \ - } while(0) - -#define CVTDQ2PD(dstr,srcr) do{ \ - count_fpu();\ - SSE(0xf30fe6, (dstr)&7, (srcr)&7); \ - asm_output("cvtdq2pd %s,%s",gpn(dstr),gpn(srcr)); \ - } while(0) - -// move and zero-extend gpreg to xmm reg -#define SSE_MOVD(d,s) do{ \ - count_mov();\ - if (_is_xmm_reg_(s)) { \ - NanoAssert(_is_gp_reg_(d)); \ - SSE(0x660f7e, (s)&7, (d)&7); \ - } else { \ - NanoAssert(_is_gp_reg_(s)); \ - NanoAssert(_is_xmm_reg_(d)); \ - SSE(0x660f6e, (d)&7, (s)&7); \ - } \ - asm_output("movd %s,%s",gpn(d),gpn(s)); \ - } while(0) - -#define SSE_MOVSD(rd,rs) do{ \ - count_mov();\ - NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\ - SSE(0xf20f10, (rd)&7, (rs)&7); \ - asm_output("movsd %s,%s",gpn(rd),gpn(rs)); \ - } while(0) - -#define SSE_MOVDm(d,b,xrs) do {\ - count_st();\ - NanoAssert(_is_xmm_reg_(xrs) && (_is_gp_reg_(b) || b==FP));\ - SSEm(0x660f7e, (xrs)&7, d, b);\ - asm_output("movd %d(%s),%s", d, gpn(b), gpn(xrs));\ - } while(0) - -#define SSE_ADDSD(rd,rs) do{ \ - count_fpu();\ - NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\ - SSE(0xf20f58, (rd)&7, (rs)&7); \ - asm_output("addsd %s,%s",gpn(rd),gpn(rs)); \ - } while(0) - -#define SSE_ADDSDm(r,addr)do { \ - count_fpuld();\ - underrunProtect(8); \ - NanoAssert(_is_xmm_reg_(r));\ - const double* daddr = addr; \ - IMM32(int32_t(daddr));\ - *(--_nIns) = uint8_t(((r)&7)<<3|5); \ - *(--_nIns) = 0x58;\ - *(--_nIns) = 0x0f;\ - *(--_nIns) = 0xf2;\ - asm_output("addsd %s,%p // =%f",gpn(r),(void*)daddr,*daddr); \ - } while(0) - -#define SSE_SUBSD(rd,rs) do{ \ - count_fpu();\ - NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\ - SSE(0xf20f5c, (rd)&7, (rs)&7); \ - asm_output("subsd %s,%s",gpn(rd),gpn(rs)); \ - } while(0) -#define SSE_MULSD(rd,rs) do{ \ - count_fpu();\ - NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\ - SSE(0xf20f59, (rd)&7, (rs)&7); \ - asm_output("mulsd %s,%s",gpn(rd),gpn(rs)); \ - } while(0) -#define SSE_DIVSD(rd,rs) do{ \ - count_fpu();\ - NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\ - SSE(0xf20f5e, (rd)&7, (rs)&7); \ - asm_output("divsd %s,%s",gpn(rd),gpn(rs)); \ - } while(0) -#define SSE_UCOMISD(rl,rr) do{ \ - count_fpu();\ - NanoAssert(_is_xmm_reg_(rl) && _is_xmm_reg_(rr));\ - SSE(0x660f2e, (rl)&7, (rr)&7); \ - asm_output("ucomisd %s,%s",gpn(rl),gpn(rr)); \ - } while(0) - -#define CVTSI2SDm(xr,d,b) do{ \ - count_fpu();\ - NanoAssert(_is_xmm_reg_(xr) && _is_gp_reg_(b));\ - SSEm(0xf20f2a, (xr)&7, (d), (b)); \ - asm_output("cvtsi2sd %s,%d(%s)",gpn(xr),(d),gpn(b)); \ - } while(0) - -#define SSE_XORPD(r, maskaddr) do {\ - count_fpuld();\ - underrunProtect(8); \ - IMM32(maskaddr);\ - *(--_nIns) = uint8_t(((r)&7)<<3|5); \ - *(--_nIns) = 0x57;\ - *(--_nIns) = 0x0f;\ - *(--_nIns) = 0x66;\ - asm_output("xorpd %s,[%p]",gpn(r),(void*)(maskaddr));\ - } while(0) - -#define SSE_XORPDr(rd,rs) do{ \ - count_fpu();\ - SSE(0x660f57, (rd)&7, (rs)&7); \ - asm_output("xorpd %s,%s",gpn(rd),gpn(rs)); \ - } while(0) - -// floating point unit -#define FPUc(o) \ - underrunProtect(2); \ - *(--_nIns) = ((uint8_t)(o)&0xff); \ - *(--_nIns) = (uint8_t)(((o)>>8)&0xff) - -#define FPU(o,r) \ - underrunProtect(2); \ - *(--_nIns) = uint8_t(((uint8_t)(o)&0xff) | (r&7));\ - *(--_nIns) = (uint8_t)(((o)>>8)&0xff) - -#define FPUm(o,d,b) \ - underrunProtect(7); \ - MODRMm((uint8_t)(o), d, b); \ - *(--_nIns) = (uint8_t)((o)>>8) - -#define FPUdm(o, m) \ - underrunProtect(6); \ - MODRMdm((uint8_t)(o), m); \ - *(--_nIns) = (uint8_t)((o)>>8) - -#define TEST_AH(i) do { \ - count_alu();\ - underrunProtect(3); \ - *(--_nIns) = ((uint8_t)(i)); \ - *(--_nIns) = 0xc4; \ - *(--_nIns) = 0xf6; \ - asm_output("test ah, %d",i); } while(0) - -#define TEST_AX(i) do { \ - count_fpu();\ - underrunProtect(5); \ - *(--_nIns) = (0); \ - *(--_nIns) = ((uint8_t)(i)); \ - *(--_nIns) = ((uint8_t)((i)>>8)); \ - *(--_nIns) = (0); \ - *(--_nIns) = 0xa9; \ - asm_output("test ax, %d",i); } while(0) - -#define FNSTSW_AX() do { count_fpu(); FPUc(0xdfe0); asm_output("fnstsw_ax"); } while(0) -#define FCHS() do { count_fpu(); FPUc(0xd9e0); asm_output("fchs"); } while(0) -#define FLD1() do { count_fpu(); FPUc(0xd9e8); asm_output("fld1"); fpu_push(); } while(0) -#define FLDZ() do { count_fpu(); FPUc(0xd9ee); asm_output("fldz"); fpu_push(); } while(0) -#define FFREE(r) do { count_fpu(); FPU(0xddc0, r); asm_output("ffree %s",gpn(r)); } while(0) -#define FST32(p,d,b) do { count_stq(); FPUm(0xd902|(p), d, b); asm_output("fst%s32 %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0) -#define FSTQ(p,d,b) do { count_stq(); FPUm(0xdd02|(p), d, b); asm_output("fst%sq %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0) -#define FSTPQ(d,b) FSTQ(1,d,b) -#define FCOM(p,d,b) do { count_fpuld(); FPUm(0xdc02|(p), d, b); asm_output("fcom%s %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0) -#define FCOMdm(p,m) do { const double* const dm = m; \ - count_fpuld(); FPUdm(0xdc02|(p), dm); asm_output("fcom%s (%p)",((p)?"p":""),(void*)dm); if (p) fpu_pop(); } while(0) -#define FLD32(d,b) do { count_ldq(); FPUm(0xd900, d, b); asm_output("fld32 %d(%s)",d,gpn(b)); fpu_push();} while(0) -#define FLDQ(d,b) do { count_ldq(); FPUm(0xdd00, d, b); asm_output("fldq %d(%s)",d,gpn(b)); fpu_push();} while(0) -#define FLDQdm(m) do { const double* const dm = m; \ - count_ldq(); FPUdm(0xdd00, dm); asm_output("fldq (%p)",(void*)dm); fpu_push();} while(0) -#define FILDQ(d,b) do { count_fpuld(); FPUm(0xdf05, d, b); asm_output("fildq %d(%s)",d,gpn(b)); fpu_push(); } while(0) -#define FILD(d,b) do { count_fpuld(); FPUm(0xdb00, d, b); asm_output("fild %d(%s)",d,gpn(b)); fpu_push(); } while(0) -#define FIST(p,d,b) do { count_fpu(); FPUm(0xdb02|(p), d, b); asm_output("fist%s %d(%s)",((p)?"p":""),d,gpn(b)); if(p) fpu_pop(); } while(0) -#define FADD(d,b) do { count_fpu(); FPUm(0xdc00, d, b); asm_output("fadd %d(%s)",d,gpn(b)); } while(0) -#define FADDdm(m) do { const double* const dm = m; \ - count_ldq(); FPUdm(0xdc00, dm); asm_output("fadd (%p)",(void*)dm); } while(0) -#define FSUB(d,b) do { count_fpu(); FPUm(0xdc04, d, b); asm_output("fsub %d(%s)",d,gpn(b)); } while(0) -#define FSUBR(d,b) do { count_fpu(); FPUm(0xdc05, d, b); asm_output("fsubr %d(%s)",d,gpn(b)); } while(0) -#define FSUBRdm(m) do { const double* const dm = m; \ - count_ldq(); FPUdm(0xdc05, dm); asm_output("fsubr (%p)",(void*)dm); } while(0) -#define FMUL(d,b) do { count_fpu(); FPUm(0xdc01, d, b); asm_output("fmul %d(%s)",d,gpn(b)); } while(0) -#define FMULdm(m) do { const double* const dm = m; \ - count_ldq(); FPUdm(0xdc01, dm); asm_output("fmul (%p)",(void*)dm); } while(0) -#define FDIV(d,b) do { count_fpu(); FPUm(0xdc06, d, b); asm_output("fdiv %d(%s)",d,gpn(b)); } while(0) -#define FDIVR(d,b) do { count_fpu(); FPUm(0xdc07, d, b); asm_output("fdivr %d(%s)",d,gpn(b)); } while(0) -#define FDIVRdm(m) do { const double* const dm = m; \ - count_ldq(); FPUdm(0xdc07, dm); asm_output("fdivr (%p)",(void*)dm); } while(0) -#define FINCSTP() do { count_fpu(); FPUc(0xd9f7); asm_output("fincstp"); } while(0) -#define FSTP(r) do { count_fpu(); FPU(0xddd8, r&7); asm_output("fstp %s",gpn(r)); fpu_pop();} while(0) -#define FCOMP() do { count_fpu(); FPUc(0xD8D9); asm_output("fcomp"); fpu_pop();} while(0) -#define FCOMPP() do { count_fpu(); FPUc(0xDED9); asm_output("fcompp"); fpu_pop();fpu_pop();} while(0) -#define FLDr(r) do { count_ldq(); FPU(0xd9c0,r); asm_output("fld %s",gpn(r)); fpu_push(); } while(0) -#define EMMS() do { count_fpu(); FPUc(0x0f77); asm_output("emms"); } while (0) - -// standard direct call -#define CALL(ci) do { \ - count_call();\ - underrunProtect(5); \ - int offset = (ci->_address) - ((int)_nIns); \ - IMM32( (uint32_t)offset ); \ - *(--_nIns) = 0xE8; \ - verbose_only(asm_output("call %s",(ci->_name));) \ - debug_only(if (ci->returnType()==ARGTYPE_F) fpu_push();)\ -} while (0) - -// indirect call thru register -#define CALLr(ci,r) do { \ - count_calli();\ - underrunProtect(2);\ - ALU(0xff, 2, (r));\ - verbose_only(asm_output("call %s",gpn(r));) \ - debug_only(if (ci->returnType()==ARGTYPE_F) fpu_push();)\ -} while (0) - + }; \ + /* This should only be used when you can guarantee there is enough room on the page. */ \ + void JMP_long_nochk_offset(int32_t o) { \ + verbose_only( NIns* next = _nIns; (void)next; ) \ + IMM32(o); \ + *(--_nIns) = /*JMP32*/0xe9; \ + asm_output("jmp %p",next+o); \ + }; \ + void JMP_indirect(Register r); \ + void JMP_indexed(Register x, int32_t ss, NIns** addr); \ + void JE(NIns* t); \ + void JNE(NIns* t); \ + void JP(NIns* t); \ + void JNP(NIns* t); \ + void JB(NIns* t); \ + void JNB(NIns* t); \ + void JBE(NIns* t); \ + void JNBE(NIns* t); \ + void JA(NIns* t); \ + void JNA(NIns* t); \ + void JAE(NIns* t); \ + void JNAE(NIns* t); \ + void JL(NIns* t); \ + void JNL(NIns* t); \ + void JLE(NIns* t); \ + void JNLE(NIns* t); \ + void JG(NIns* t); \ + void JNG(NIns* t); \ + void JGE(NIns* t); \ + void JNGE(NIns* t); \ + void JO(NIns* t); \ + void JNO(NIns* t); \ + void SSE(int32_t c, int32_t d, int32_t s); \ + void SSEm(int32_t c, int32_t r, int32_t d, Register b); \ + void LDSDm(Register r, const double* addr); \ + void SSE_LDSD(Register r, int32_t d, Register b); \ + void SSE_LDQ( Register r, int32_t d, Register b); \ + void SSE_LDSS(Register r, int32_t d, Register b); \ + void SSE_STSD(int32_t d, Register b, Register r); \ + void SSE_STQ( int32_t d, Register b, Register r); \ + void SSE_STSS(int32_t d, Register b, Register r); \ + void SSE_CVTSI2SD(Register xr, Register gr); \ + void SSE_CVTSD2SI(Register gr, Register xr); \ + void SSE_CVTSD2SS(Register xr, Register gr); \ + void SSE_CVTSS2SD(Register xr, Register gr); \ + void SSE_CVTDQ2PD(Register d, Register r); \ + void SSE_MOVD(Register d, Register s); \ + void SSE_MOVSD(Register rd, Register rs); \ + void SSE_MOVDm(Register d, Register b, Register xrs); \ + void SSE_ADDSD(Register rd, Register rs); \ + void SSE_ADDSDm(Register r, const double* addr); \ + void SSE_SUBSD(Register rd, Register rs); \ + void SSE_MULSD(Register rd, Register rs); \ + void SSE_DIVSD(Register rd, Register rs); \ + void SSE_UCOMISD(Register rl, Register rr); \ + void SSE_CVTSI2SDm(Register xr, Register d, Register b); \ + void SSE_XORPD(Register r, const uint32_t* maskaddr); \ + void SSE_XORPDr(Register rd, Register rs); \ + void FPUc(int32_t o); \ + void FPU(int32_t o, Register r) { \ + underrunProtect(2); \ + *(--_nIns) = uint8_t(((uint8_t)(o)&0xff) | (r&7)); \ + *(--_nIns) = (uint8_t)((o>>8)&0xff); \ + }; \ + void FPUm(int32_t o, int32_t d, Register b); \ + void FPUdm(int32_t o, const double* const m); \ + void TEST_AH(int32_t i); \ + void TEST_AX(int32_t i); \ + void FNSTSW_AX(); \ + void FCHS(); \ + void FLD1(); \ + void FLDZ(); \ + void FFREE(Register r); \ + void FST32(bool p, int32_t d, Register b); \ + void FSTQ(bool p, int32_t d, Register b); \ + void FSTPQ(int32_t d, Register b); \ + void FCOM(bool p, int32_t d, Register b); \ + void FCOMdm(bool p, const double* dm); \ + void FLD32(int32_t d, Register b); \ + void FLDQ(int32_t d, Register b); \ + void FLDQdm(const double* dm); \ + void FILDQ(int32_t d, Register b); \ + void FILD(int32_t d, Register b); \ + void FIST(bool p, int32_t d, Register b); \ + void FADD( int32_t d, Register b); \ + void FSUB( int32_t d, Register b); \ + void FSUBR(int32_t d, Register b); \ + void FMUL( int32_t d, Register b); \ + void FDIV( int32_t d, Register b); \ + void FDIVR(int32_t d, Register b); \ + void FADDdm( const double *dm); \ + void FSUBRdm(const double* dm); \ + void FMULdm( const double* dm); \ + void FDIVRdm(const double* dm); \ + void FINCSTP(); \ + void FSTP(Register r) { \ + count_fpu(); \ + FPU(0xddd8, r); \ + asm_output("fstp %s",gpn(r)); fpu_pop(); \ + }; \ + void FCOMP(); \ + void FCOMPP(); \ + void FLDr(Register r); \ + void EMMS(); \ + void CALL(const CallInfo* ci); \ + void CALLr(const CallInfo* ci, Register r); } + + + #endif // __nanojit_Nativei386__