From 92f5085fc726d4dd1e9d044f9742eb6a32c703ca Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 1 Jul 2013 21:24:08 -0700 Subject: [PATCH] Bug 876064 - Implement constant pools for double constants on x64. r=sstangl --- js/src/ion/shared/MacroAssembler-x86-shared.h | 25 ++++----- js/src/ion/x64/MacroAssembler-x64.cpp | 52 +++++++++++++++++++ js/src/ion/x64/MacroAssembler-x64.h | 29 +++++++---- js/src/ion/x86/MacroAssembler-x86.cpp | 21 +++++--- js/src/ion/x86/MacroAssembler-x86.h | 4 +- 5 files changed, 95 insertions(+), 36 deletions(-) diff --git a/js/src/ion/shared/MacroAssembler-x86-shared.h b/js/src/ion/shared/MacroAssembler-x86-shared.h index 36acefc6746d..3e9301bd39af 100644 --- a/js/src/ion/shared/MacroAssembler-x86-shared.h +++ b/js/src/ion/shared/MacroAssembler-x86-shared.h @@ -386,27 +386,22 @@ class MacroAssemblerX86Shared : public Assembler bind(&done); } - bool maybeInlineDouble(uint64_t u, const FloatRegister &dest) { - // This implements parts of "13.4 Generating constants" of - // "2. Optimizing subroutines in assembly language" by Agner Fog, - // generalized to handle any case that can use a pcmpeqw and - // up to two shifts. + bool maybeInlineDouble(double d, const FloatRegister &dest) { + uint64_t u = mozilla::BitwiseCast(d); + // Loading zero with xor is specially optimized in hardware. if (u == 0) { xorpd(dest, dest); return true; } - int tz = js_bitscan_ctz64(u); - int lz = js_bitscan_clz64(u); - if (u == (~uint64_t(0) << (lz + tz) >> lz)) { - pcmpeqw(dest, dest); - if (tz != 0) - psllq(Imm32(lz + tz), dest); - if (lz != 0) - psrlq(Imm32(lz), dest); - return true; - } + // It is also possible to load several common constants using pcmpeqw + // to get all ones and then psllq and psrlq to get zeros at the ends, + // as described in "13.4 Generating constants" of + // "2. Optimizing subroutines in assembly language" by Agner Fog, and as + // previously implemented here. However, with x86 and x64 both using + // constant pool loads for double constants, this is probably only + // worthwhile in cases where a load is likely to be delayed. return false; } diff --git a/js/src/ion/x64/MacroAssembler-x64.cpp b/js/src/ion/x64/MacroAssembler-x64.cpp index 764309201546..07a8d1f5b6e0 100644 --- a/js/src/ion/x64/MacroAssembler-x64.cpp +++ b/js/src/ion/x64/MacroAssembler-x64.cpp @@ -8,12 +8,64 @@ #include "ion/BaselineFrame.h" #include "ion/MoveEmitter.h" #include "ion/IonFrames.h" +#include "mozilla/Casting.h" #include "jsscriptinlines.h" using namespace js; using namespace js::ion; +void +MacroAssemblerX64::loadConstantDouble(double d, const FloatRegister &dest) +{ + if (maybeInlineDouble(d, dest)) + return; + + if (!doubleMap_.initialized()) { + enoughMemory_ &= doubleMap_.init(); + if (!enoughMemory_) + return; + } + size_t doubleIndex; + if (DoubleMap::AddPtr p = doubleMap_.lookupForAdd(d)) { + doubleIndex = p->value; + } else { + doubleIndex = doubles_.length(); + enoughMemory_ &= doubles_.append(Double(d)); + enoughMemory_ &= doubleMap_.add(p, d, doubleIndex); + if (!enoughMemory_) + return; + } + Double &dbl = doubles_[doubleIndex]; + JS_ASSERT(!dbl.uses.bound()); + + // The constants will be stored in a pool appended to the text (see + // finish()), so they will always be a fixed distance from the + // instructions which reference them. This allows the instructions to use + // PC-relative addressing. Use "jump" label support code, because we need + // the same PC-relative address patching that jumps use. + JmpSrc j = masm.movsd_ripr(dest.code()); + JmpSrc prev = JmpSrc(dbl.uses.use(j.offset())); + masm.setNextJump(j, prev); +} + +void +MacroAssemblerX64::finish() +{ + JS_STATIC_ASSERT(CodeAlignment >= sizeof(double)); + + if (!doubles_.empty()) + masm.align(sizeof(double)); + + for (size_t i = 0; i < doubles_.length(); i++) { + Double &dbl = doubles_[i]; + bind(&dbl.uses); + masm.doubleConstant(dbl.value); + } + + MacroAssemblerX86Shared::finish(); +} + void MacroAssemblerX64::setupABICall(uint32_t args) { diff --git a/js/src/ion/x64/MacroAssembler-x64.h b/js/src/ion/x64/MacroAssembler-x64.h index 585d9fd78ecc..94e619704922 100644 --- a/js/src/ion/x64/MacroAssembler-x64.h +++ b/js/src/ion/x64/MacroAssembler-x64.h @@ -45,6 +45,19 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared bool dynamicAlignment_; bool enoughMemory_; + // These use SystemAllocPolicy since asm.js releases memory after each + // function is compiled, and these need to live until after all functions + // are compiled. + struct Double { + double value; + NonAssertingLabel uses; + Double(double value) : value(value) {} + }; + Vector doubles_; + + typedef HashMap, SystemAllocPolicy> DoubleMap; + DoubleMap doubleMap_; + void setupABICall(uint32_t arg); protected: @@ -71,6 +84,10 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared { } + // The buffer is about to be linked, make sure any constant pools or excess + // bookkeeping has been flushed to the instruction stream. + void finish(); + bool oom() const { return MacroAssemblerX86Shared::oom() || !enoughMemory_; } @@ -938,17 +955,7 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared cvtsi2sd(operand.valueReg(), dest); } - void loadConstantDouble(double d, const FloatRegister &dest) { - union DoublePun { - uint64_t u; - double d; - } pun; - pun.d = d; - if (!maybeInlineDouble(pun.u, dest)) { - mov(ImmWord(pun.u), ScratchReg); - movqsd(ScratchReg, dest); - } - } + void loadConstantDouble(double d, const FloatRegister &dest); void loadStaticDouble(const double *dp, const FloatRegister &dest) { loadConstantDouble(*dp, dest); } diff --git a/js/src/ion/x86/MacroAssembler-x86.cpp b/js/src/ion/x86/MacroAssembler-x86.cpp index 3390da5f0a6a..b9c4ed904a9e 100644 --- a/js/src/ion/x86/MacroAssembler-x86.cpp +++ b/js/src/ion/x86/MacroAssembler-x86.cpp @@ -8,6 +8,7 @@ #include "ion/BaselineFrame.h" #include "ion/MoveEmitter.h" #include "ion/IonFrames.h" +#include "mozilla/Casting.h" #include "jsscriptinlines.h" @@ -17,12 +18,7 @@ using namespace js::ion; void MacroAssemblerX86::loadConstantDouble(double d, const FloatRegister &dest) { - union DoublePun { - uint64_t u; - double d; - } dpun; - dpun.d = d; - if (maybeInlineDouble(dpun.u, dest)) + if (maybeInlineDouble(d, dest)) return; if (!doubleMap_.initialized()) { @@ -42,10 +38,21 @@ MacroAssemblerX86::loadConstantDouble(double d, const FloatRegister &dest) return; } Double &dbl = doubles_[doubleIndex]; - masm.movsd_mr(reinterpret_cast(dbl.uses.prev()), dest.code()); + JS_ASSERT(!dbl.uses.bound()); + + masm.movsd_mr(reinterpret_cast(dbl.uses.prev()), dest.code()); dbl.uses.setPrev(masm.size()); } +void +MacroAssemblerX86::loadStaticDouble(const double *dp, const FloatRegister &dest) { + if (maybeInlineDouble(*dp, dest)) + return; + + // x86 can just load from any old immediate address. + movsd(dp, dest); +} + void MacroAssemblerX86::finish() { diff --git a/js/src/ion/x86/MacroAssembler-x86.h b/js/src/ion/x86/MacroAssembler-x86.h index 2f6756bf7c39..65f90ad1dd20 100644 --- a/js/src/ion/x86/MacroAssembler-x86.h +++ b/js/src/ion/x86/MacroAssembler-x86.h @@ -793,9 +793,7 @@ class MacroAssemblerX86 : public MacroAssemblerX86Shared } void loadConstantDouble(double d, const FloatRegister &dest); - void loadStaticDouble(const double *dp, const FloatRegister &dest) { - movsd(dp, dest); - } + void loadStaticDouble(const double *dp, const FloatRegister &dest); void branchTruncateDouble(const FloatRegister &src, const Register &dest, Label *fail) { const uint32_t IndefiniteIntegerValue = 0x80000000;