From 18402713d98644ea6471ef1b0a9f0418e3cd3377 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 1 Dec 2010 14:23:44 -0800 Subject: [PATCH] Bug 580515 - TM: LIR_cmovd mishandled with X86_FORCE_SSE2=no. r=edwsmith. --HG-- extra : convert_revision : 4effe362e918583ec7b98b08da24f02c0833d306 --- js/src/nanojit/NativeX64.cpp | 125 ++++++++++++++++++---------------- js/src/nanojit/NativeX64.h | 9 ++- js/src/nanojit/Nativei386.cpp | 107 +++++++++++++++++------------ js/src/nanojit/Nativei386.h | 9 +-- 4 files changed, 142 insertions(+), 108 deletions(-) diff --git a/js/src/nanojit/NativeX64.cpp b/js/src/nanojit/NativeX64.cpp index e33fe748a0a1..8b0e77616237 100644 --- a/js/src/nanojit/NativeX64.cpp +++ b/js/src/nanojit/NativeX64.cpp @@ -1173,20 +1173,24 @@ namespace nanojit Register rf = findRegFor(iffalse, allow & ~rmask(rr)); if (ins->isop(LIR_cmovd)) { + // See Nativei386.cpp:asm_cmov() for an explanation of the subtleties here. NIns* target = _nIns; asm_nongp_copy(rr, rf); - asm_branch(false, cond, target); + asm_branch_helper(false, cond, target); // If 'iftrue' isn't in a register, it can be clobbered by 'ins'. Register rt = iftrue->isInReg() ? iftrue->getReg() : rr; if (rr != rt) asm_nongp_copy(rr, rt); + freeResourcesOf(ins); if (!iftrue->isInReg()) { NanoAssert(rt == rr); findSpecificRegForUnallocated(iftrue, rr); } + + asm_cmp(cond); return; } @@ -1194,8 +1198,8 @@ namespace nanojit Register rt = iftrue->isInReg() ? iftrue->getReg() : rr; // WARNING: We cannot generate any code that affects the condition - // codes between the MRcc generation here and the asm_cmp() call - // below. See asm_cmp() for more details. + // codes between the MRcc generation here and the asm_cmpi() call + // below. See asm_cmpi() for more details. LOpcode condop = cond->opcode(); if (ins->isop(LIR_cmovi)) { switch (condop) { @@ -1234,30 +1238,36 @@ namespace nanojit findSpecificRegForUnallocated(iftrue, rr); } - asm_cmp(cond); + asm_cmpi(cond); } - NIns* Assembler::asm_branch(bool onFalse, LIns *cond, NIns *target) { - NanoAssert(cond->isCmp()); - LOpcode condop = cond->opcode(); + NIns* Assembler::asm_branch(bool onFalse, LIns* cond, NIns* target) { + NIns* patch = asm_branch_helper(onFalse, cond, target); + asm_cmp(cond); + return patch; + } + NIns* Assembler::asm_branch_helper(bool onFalse, LIns *cond, NIns *target) { if (target && !isTargetWithinS32(target)) { - // conditional jumps beyond 32bit range, so invert the branch/compare - // and emit an unconditional jump to the target + // A conditional jump beyond 32-bit range, so invert the + // branch/compare and emit an unconditional jump to the target: // j(inverted) B1 // jmp target // B1: NIns* shortTarget = _nIns; JMP(target); target = shortTarget; - onFalse = !onFalse; } - if (isCmpDOpcode(condop)) - return asm_branchd(onFalse, cond, target); + return isCmpDOpcode(cond->opcode()) + ? asm_branchd_helper(onFalse, cond, target) + : asm_branchi_helper(onFalse, cond, target); + } + NIns* Assembler::asm_branchi_helper(bool onFalse, LIns *cond, NIns *target) { // We must ensure there's room for the instruction before calculating // the offset. And the offset determines the opcode (8bit or 32bit). + LOpcode condop = cond->opcode(); if (target && isTargetWithinS8(target)) { if (onFalse) { switch (condop) { @@ -1315,9 +1325,7 @@ namespace nanojit } } } - NIns *patch = _nIns; // address of instruction to patch - asm_cmp(cond); - return patch; + return _nIns; // address of instruction to patch } NIns* Assembler::asm_branch_ov(LOpcode, NIns* target) { @@ -1334,13 +1342,17 @@ namespace nanojit return _nIns; } + void Assembler::asm_cmp(LIns *cond) { + isCmpDOpcode(cond->opcode()) ? asm_cmpd(cond) : asm_cmpi(cond); + } + // WARNING: this function cannot generate code that will affect the // condition codes prior to the generation of the test/cmp. See - // Nativei386.cpp:asm_cmp() for details. - void Assembler::asm_cmp(LIns *cond) { + // Nativei386.cpp:asm_cmpi() for details. + void Assembler::asm_cmpi(LIns *cond) { LIns *b = cond->oprnd2(); if (isImm32(b)) { - asm_cmp_imm(cond); + asm_cmpi_imm(cond); return; } LIns *a = cond->oprnd1(); @@ -1361,7 +1373,7 @@ namespace nanojit } } - void Assembler::asm_cmp_imm(LIns *cond) { + void Assembler::asm_cmpi_imm(LIns *cond) { LOpcode condop = cond->opcode(); LIns *a = cond->oprnd1(); LIns *b = cond->oprnd2(); @@ -1399,11 +1411,9 @@ namespace nanojit // LIR_jt jae ja swap+jae swap+ja jp over je // LIR_jf jb jbe swap+jb swap+jbe jne+jp - NIns* Assembler::asm_branchd(bool onFalse, LIns *cond, NIns *target) { + NIns* Assembler::asm_branchd_helper(bool onFalse, LIns *cond, NIns *target) { LOpcode condop = cond->opcode(); NIns *patch; - LIns *a = cond->oprnd1(); - LIns *b = cond->oprnd2(); if (condop == LIR_eqd) { if (onFalse) { // branch if unordered or != @@ -1422,34 +1432,23 @@ namespace nanojit } } else { - if (condop == LIR_ltd) { - condop = LIR_gtd; - LIns *t = a; a = b; b = t; - } else if (condop == LIR_led) { - condop = LIR_ged; - LIns *t = a; a = b; b = t; - } - if (condop == LIR_gtd) { - if (onFalse) - JBE(8, target); - else - JA(8, target); - } else { // LIR_ged - if (onFalse) - JB(8, target); - else - JAE(8, target); + // LIR_ltd and LIR_gtd are handled by the same case because + // asm_cmpd() converts LIR_ltd(a,b) to LIR_gtd(b,a). Likewise for + // LIR_led/LIR_ged. + switch (condop) { + case LIR_ltd: + case LIR_gtd: if (onFalse) JBE(8, target); else JA(8, target); break; + case LIR_led: + case LIR_ged: if (onFalse) JB(8, target); else JAE(8, target); break; + default: NanoAssert(0); break; } patch = _nIns; } - asm_cmpd(a, b); return patch; } void Assembler::asm_condd(LIns *ins) { LOpcode op = ins->opcode(); - LIns *a = ins->oprnd1(); - LIns *b = ins->oprnd2(); if (op == LIR_eqd) { // result = ZF & !PF, must do logic on flags // r = al|bl|cl|dl, can only use rh without rex prefix @@ -1460,30 +1459,40 @@ namespace nanojit X86_SETNP(r); // setnp rh rh = !PF X86_SETE(r); // sete rl rl = ZF } else { - if (op == LIR_ltd) { - op = LIR_gtd; - LIns *t = a; a = b; b = t; - } else if (op == LIR_led) { - op = LIR_ged; - LIns *t = a; a = b; b = t; - } + // LIR_ltd and LIR_gtd are handled by the same case because + // asm_cmpd() converts LIR_ltd(a,b) to LIR_gtd(b,a). Likewise for + // LIR_led/LIR_ged. Register r = prepareResultReg(ins, GpRegs); // x64 can use any GPR as setcc target MOVZX8(r, r); - if (op == LIR_gtd) - SETA(r); - else - SETAE(r); + switch (op) { + case LIR_ltd: + case LIR_gtd: SETA(r); break; + case LIR_led: + case LIR_ged: SETAE(r); break; + default: NanoAssert(0); break; + } } freeResourcesOf(ins); - asm_cmpd(a, b); + asm_cmpd(ins); } // WARNING: This function cannot generate any code that will affect the - // condition codes prior to the generation of the ucomisd. See asm_cmp() + // condition codes prior to the generation of the ucomisd. See asm_cmpi() // for more details. - void Assembler::asm_cmpd(LIns *a, LIns *b) { + void Assembler::asm_cmpd(LIns *cond) { + LOpcode opcode = cond->opcode(); + LIns* a = cond->oprnd1(); + LIns* b = cond->oprnd2(); + // First, we convert (a < b) into (b > a), and (a <= b) into (b >= a). + if (opcode == LIR_ltd) { + opcode = LIR_gtd; + LIns* t = a; a = b; b = t; + } else if (opcode == LIR_led) { + opcode = LIR_ged; + LIns* t = a; a = b; b = t; + } Register ra, rb; findRegFor2(FpRegs, a, ra, FpRegs, b, rb); UCOMISD(ra, rb); @@ -1518,7 +1527,7 @@ namespace nanojit } // WARNING: the code generated by this function must not affect the - // condition codes. See asm_cmp() for details. + // condition codes. See asm_cmpi() for details. void Assembler::asm_restore(LIns *ins, Register r) { if (ins->isop(LIR_allocp)) { int d = arDisp(ins); @@ -1587,7 +1596,7 @@ namespace nanojit } freeResourcesOf(ins); - asm_cmp(ins); + asm_cmpi(ins); } void Assembler::asm_ret(LIns *ins) { diff --git a/js/src/nanojit/NativeX64.h b/js/src/nanojit/NativeX64.h index 37dbc5de4a9d..872b5f20ee1a 100644 --- a/js/src/nanojit/NativeX64.h +++ b/js/src/nanojit/NativeX64.h @@ -423,9 +423,12 @@ namespace nanojit void endLoadRegs(LIns *ins);\ void dis(NIns *p, int bytes);\ void asm_cmp(LIns*);\ - void asm_cmp_imm(LIns*);\ - void asm_cmpd(LIns*, LIns*);\ - NIns* asm_branchd(bool, LIns*, NIns*);\ + void asm_cmpi(LIns*);\ + void asm_cmpi_imm(LIns*);\ + void asm_cmpd(LIns*);\ + NIns* asm_branch_helper(bool, LIns*, NIns*);\ + NIns* asm_branchi_helper(bool, LIns*, NIns*);\ + NIns* asm_branchd_helper(bool, LIns*, NIns*);\ void asm_div(LIns *ins);\ void asm_div_mod(LIns *ins);\ int max_stk_used;\ diff --git a/js/src/nanojit/Nativei386.cpp b/js/src/nanojit/Nativei386.cpp index 289a1f752614..d5df6de7bb3c 100644 --- a/js/src/nanojit/Nativei386.cpp +++ b/js/src/nanojit/Nativei386.cpp @@ -854,8 +854,6 @@ namespace nanojit inline void Assembler::FLD1() { count_fpu(); FPUc(0xd9e8); asm_output("fld1"); fpu_push(); } inline void Assembler::FLDZ() { count_fpu(); FPUc(0xd9ee); asm_output("fldz"); fpu_push(); } - inline void Assembler::FFREE(R r) { count_fpu(); FPU(0xddc0, r); asm_output("ffree %s",gpn(r)); } - inline void Assembler::FST32(bool p, I32 d, R b){ count_stq(); FPUm(0xd902|(p?1:0), d, b); asm_output("fst%s32 %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); } inline void Assembler::FSTQ(bool p, I32 d, R b) { count_stq(); FPUm(0xdd02|(p?1:0), d, b); asm_output("fst%sq %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); } @@ -894,8 +892,6 @@ namespace nanojit inline void Assembler::FMULdm( const double* dm) { count_ldq(); FPUdm(0xdc01, dm); asm_output("fmul (%p)", (void*)dm); } inline void Assembler::FDIVRdm(const double* dm) { count_ldq(); FPUdm(0xdc07, dm); asm_output("fdivr (%p)", (void*)dm); } - inline void Assembler::FINCSTP() { count_fpu(); FPUc(0xd9f7); asm_output("fincstp"); fpu_pop(); } - inline void Assembler::FCOMP() { count_fpu(); FPUc(0xD8D9); asm_output("fcomp"); fpu_pop();} inline void Assembler::FCOMPP() { count_fpu(); FPUc(0xDED9); asm_output("fcompp"); fpu_pop();fpu_pop();} inline void Assembler::FLDr(R r) { count_ldq(); FPU(0xd9c0, r); asm_output("fld %s", gpn(r)); fpu_push(); } @@ -1208,7 +1204,7 @@ namespace nanojit } // WARNING: the code generated by this function must not affect the - // condition codes. See asm_cmp(). + // condition codes. See asm_cmpi(). void Assembler::asm_restore(LIns* ins, Register r) { NanoAssert(ins->getReg() == r); @@ -1521,19 +1517,18 @@ namespace nanojit } } - NIns* Assembler::asm_branch(bool branchOnFalse, LIns* cond, NIns* targ) + NIns* Assembler::asm_branch_helper(bool branchOnFalse, LIns* cond, NIns* targ) { - LOpcode condop = cond->opcode(); - NanoAssert(cond->isCmp()); - - // Handle float conditions separately. - if (isCmpDOpcode(condop)) { - return asm_branchd(branchOnFalse, cond, targ); - } + return isCmpDOpcode(cond->opcode()) + ? asm_branchd_helper(branchOnFalse, cond, targ) + : asm_branchi_helper(branchOnFalse, cond, targ); + } + NIns* Assembler::asm_branchi_helper(bool branchOnFalse, LIns* cond, NIns* targ) + { if (branchOnFalse) { // op == LIR_xf/LIR_jf - switch (condop) { + switch (cond->opcode()) { case LIR_eqi: JNE(targ); break; case LIR_lti: JNL(targ); break; case LIR_lei: JNLE(targ); break; @@ -1547,7 +1542,7 @@ namespace nanojit } } else { // op == LIR_xt/LIR_jt - switch (condop) { + switch (cond->opcode()) { case LIR_eqi: JE(targ); break; case LIR_lti: JL(targ); break; case LIR_lei: JLE(targ); break; @@ -1560,7 +1555,12 @@ namespace nanojit default: NanoAssert(0); break; } } - NIns* at = _nIns; + return _nIns; + } + + NIns* Assembler::asm_branch(bool branchOnFalse, LIns* cond, NIns* targ) + { + NIns* at = asm_branch_helper(branchOnFalse, cond, targ); asm_cmp(cond); return at; } @@ -1584,6 +1584,11 @@ namespace nanojit JMP_indexed(indexreg, 2, table); } + void Assembler::asm_cmp(LIns *cond) + { + isCmpDOpcode(cond->opcode()) ? asm_cmpd(cond) : asm_cmpi(cond); + } + // This generates a 'test' or 'cmp' instruction for a condition, which // causes the condition codes to be set appropriately. It's used with // conditional branches, conditional moves, and when generating @@ -1623,7 +1628,7 @@ namespace nanojit // asm_restore(), that means that asm_restore() cannot generate code which // affects the condition codes. // - void Assembler::asm_cmp(LIns *cond) + void Assembler::asm_cmpi(LIns *cond) { LIns* lhs = cond->oprnd1(); LIns* rhs = cond->oprnd2(); @@ -1734,7 +1739,7 @@ namespace nanojit freeResourcesOf(ins); - asm_cmp(ins); + asm_cmpi(ins); } // Two example cases for "ins = add lhs, rhs". '*' lines are those @@ -2051,11 +2056,10 @@ namespace nanojit (ins->isop(LIR_cmovd) && iftrue->isD() && iffalse->isD())); if (!_config.i386_sse2 && ins->isop(LIR_cmovd)) { + // See the SSE2 case below for an explanation of the subtleties here. debug_only( Register rr = ) prepareResultReg(ins, x87Regs); NanoAssert(FST0 == rr); - NanoAssert(!iftrue->isInReg() || iftrue->getReg() == FST0); - - NanoAssert(!iffalse->isInReg()); + NanoAssert(!iftrue->isInReg() && !iffalse->isInReg()); NIns* target = _nIns; @@ -2065,52 +2069,73 @@ namespace nanojit int df = findMemFor(iffalse); FLDQ(df, FP); } + FSTP(FST0); // pop the stack + asm_branch_helper(false, condval, target); - FINCSTP(); - // Its not sufficient to merely decrement the FP stack pointer, we have to - // also free FST0, otherwise the load above fails. - FFREE(FST0); - asm_branch(false, condval, target); - + NanoAssert(ins->getReg() == rr); freeResourcesOf(ins); if (!iftrue->isInReg()) findSpecificRegForUnallocated(iftrue, FST0); + asm_cmp(condval); + return; } RegisterMask allow = ins->isD() ? XmmRegs : GpRegs; - Register rr = prepareResultReg(ins, allow); - Register rf = findRegFor(iffalse, allow & ~rmask(rr)); if (ins->isop(LIR_cmovd)) { + // The obvious way to handle this is as follows: + // + // mov rr, rt # only needed if rt is live afterwards + // do comparison + // jt end + // mov rr, rf + // end: + // + // The problem with this is that doing the comparison can cause + // registers to be evicted, possibly including 'rr', which holds + // 'ins'. And that screws things up. So instead we do this: + // + // do comparison + // mov rr, rt # only needed if rt is live afterwards + // jt end + // mov rr, rf + // end: + // + // Putting the 'mov' between the comparison and the jump is ok + // because move instructions don't modify the condition codes. + // NIns* target = _nIns; asm_nongp_copy(rr, rf); - asm_branch(false, condval, target); + asm_branch_helper(false, condval, target); // If 'iftrue' isn't in a register, it can be clobbered by 'ins'. Register rt = iftrue->isInReg() ? iftrue->getReg() : rr; if (rr != rt) asm_nongp_copy(rr, rt); + + NanoAssert(ins->getReg() == rr); freeResourcesOf(ins); if (!iftrue->isInReg()) { NanoAssert(rt == rr); findSpecificRegForUnallocated(iftrue, rr); } + + asm_cmp(condval); return; } // If 'iftrue' isn't in a register, it can be clobbered by 'ins'. Register rt = iftrue->isInReg() ? iftrue->getReg() : rr; - NanoAssert(ins->isop(LIR_cmovi)); // WARNING: We cannot generate any code that affects the condition - // codes between the MRcc generation here and the asm_cmp() call - // below. See asm_cmp() for more details. + // codes between the MRcc generation here and the asm_cmpi() call + // below. See asm_cmpi() for more details. switch (condval->opcode()) { // Note that these are all opposites... case LIR_eqi: MRNE(rr, rf); break; @@ -2128,6 +2153,7 @@ namespace nanojit if (rr != rt) MR(rr, rt); + NanoAssert(ins->getReg() == rr); freeResourcesOf(ins); if (!iftrue->isInReg()) { NanoAssert(rt == rr); @@ -2614,7 +2640,7 @@ namespace nanojit } } - NIns* Assembler::asm_branchd(bool branchOnFalse, LIns *cond, NIns *targ) + NIns* Assembler::asm_branchd_helper(bool branchOnFalse, LIns* cond, NIns *targ) { NIns* at = 0; LOpcode opcode = cond->opcode(); @@ -2673,14 +2699,13 @@ namespace nanojit if (!at) at = _nIns; - asm_cmpd(cond); return at; } // WARNING: This function cannot generate any code that will affect the // condition codes prior to the generation of the - // ucomisd/fcompp/fcmop/fcom. See asm_cmp() for more details. + // ucomisd/fcompp/fcmop/fcom. See asm_cmpi() for more details. void Assembler::asm_cmpd(LIns *cond) { LOpcode condop = cond->opcode(); @@ -2699,14 +2724,13 @@ namespace nanojit LIns* t = lhs; lhs = rhs; rhs = t; } - // LIR_eqd, if lhs == rhs: // ucomisd ZPC outcome (SETNP/JNP succeeds if P==0) // ------- --- ------- // UNORDERED 111 SETNP/JNP fails // EQUAL 100 SETNP/JNP succeeds // - // LIR_eqd, if lsh != rhs; + // LIR_eqd, if lhs != rhs; // ucomisd ZPC outcome (SETP/JP succeeds if P==0, // SETE/JE succeeds if Z==0) // ------- --- ------- @@ -2810,13 +2834,10 @@ namespace nanojit } else { TEST_AH(mask); FNSTSW_AX(); // requires rEAX to be free - if (rhs->isImmD()) - { + if (rhs->isImmD()) { const uint64_t* p = findImmDFromPool(rhs->immDasQ()); FCOMdm(pop, (const double*)p); - } - else - { + } else { int d = findMemFor(rhs); FCOM(pop, d, FP); } diff --git a/js/src/nanojit/Nativei386.h b/js/src/nanojit/Nativei386.h index f3ca5006f207..a88dbf1fdd95 100644 --- a/js/src/nanojit/Nativei386.h +++ b/js/src/nanojit/Nativei386.h @@ -199,9 +199,12 @@ namespace nanojit void asm_farg(LIns*, int32_t& stkd);\ void asm_arg(ArgType ty, LIns* p, Register r, int32_t& stkd);\ void asm_pusharg(LIns*);\ - void asm_cmpd(LIns *cond);\ - NIns* asm_branchd(bool, LIns*, NIns*);\ void asm_cmp(LIns *cond); \ + void asm_cmpi(LIns *cond); \ + void asm_cmpd(LIns *cond);\ + NIns* asm_branch_helper(bool, LIns* cond, NIns*);\ + NIns* asm_branchi_helper(bool, LIns* cond, NIns*);\ + NIns* asm_branchd_helper(bool, LIns* cond, NIns*);\ void asm_div_mod(LIns *cond); \ void asm_load(int d, Register r); \ void asm_immd(Register r, uint64_t q, double d, bool canClobberCCs); \ @@ -429,7 +432,6 @@ namespace nanojit void FCHS(); \ void FLD1(); \ void FLDZ(); \ - void FFREE(Register r); \ void FST32(bool p, int32_t d, Register b); \ void FSTQ(bool p, int32_t d, Register b); \ void FSTPQ(int32_t d, Register b); \ @@ -451,7 +453,6 @@ namespace nanojit void FSUBRdm(const double* dm); \ void FMULdm( const double* dm); \ void FDIVRdm(const double* dm); \ - void FINCSTP(); \ void FSTP(Register r) { \ count_fpu(); \ FPU(0xddd8, r); \