From d63d8245bb8ab0794363e10eb0a8d2fe8f59c0a8 Mon Sep 17 00:00:00 2001 From: Lars T Hansen Date: Tue, 20 Apr 2021 06:45:49 +0000 Subject: [PATCH] Bug 1678239 - Shrink WasmCheckedTailEntryOffset. r=jseward It turns out that WasmCheckedTailEntryOffset -- which defines an entry point for a jump from a trampoline that needs to perform a signature check but has already allocated a frame header, and which may require some nop-fill in the function prologue so as to place the entry point at a constant offset -- only needs to be large enough to accomodate every possible callable prologue (architecture-dependent). The value can therefore be made smaller than it currently is on x86, x64, arm, and arm64, leading to more compact prologues on those platforms, conserving code size. In addition, the code in GenerateFunctionPrologue and associated functions that uses this constant can be tidied up a little bit and commented better. Differential Revision: https://phabricator.services.mozilla.com/D112173 --- js/src/jit/arm/Assembler-arm.h | 6 +-- js/src/jit/arm64/Assembler-arm64.h | 6 +-- js/src/jit/mips32/Assembler-mips32.h | 4 +- js/src/jit/mips64/Assembler-mips64.h | 4 +- js/src/jit/none/Architecture-none.h | 5 +-- js/src/jit/x64/Assembler-x64.h | 6 +-- js/src/jit/x86/Assembler-x86.h | 8 ++-- js/src/wasm/WasmFrameIter.cpp | 61 +++++++++++++++++----------- 8 files changed, 58 insertions(+), 42 deletions(-) diff --git a/js/src/jit/arm/Assembler-arm.h b/js/src/jit/arm/Assembler-arm.h index f8f2f4152123..f5652d5a7cbf 100644 --- a/js/src/jit/arm/Assembler-arm.h +++ b/js/src/jit/arm/Assembler-arm.h @@ -305,10 +305,10 @@ static_assert(JitStackAlignment % SimdMemoryAlignment == 0, static const uint32_t WasmStackAlignment = SimdMemoryAlignment; static const uint32_t WasmTrapInstructionLength = 4; -// The offsets are dynamically asserted during -// code generation in the prologue/epilogue. +// See comments in wasm::GenerateFunctionPrologue. The difference between these +// is the size of the largest callable prologue on the platform. static constexpr uint32_t WasmCheckedCallEntryOffset = 0u; -static constexpr uint32_t WasmCheckedTailEntryOffset = 16u; +static constexpr uint32_t WasmCheckedTailEntryOffset = 12u; static const Scale ScalePointer = TimesFour; diff --git a/js/src/jit/arm64/Assembler-arm64.h b/js/src/jit/arm64/Assembler-arm64.h index 0e3df119bf67..37767d757006 100644 --- a/js/src/jit/arm64/Assembler-arm64.h +++ b/js/src/jit/arm64/Assembler-arm64.h @@ -433,10 +433,10 @@ static_assert(CodeAlignment % SimdMemoryAlignment == 0, static const uint32_t WasmStackAlignment = SimdMemoryAlignment; static const uint32_t WasmTrapInstructionLength = 4; -// The offsets are dynamically asserted during -// code generation in the prologue/epilogue. +// See comments in wasm::GenerateFunctionPrologue. The difference between these +// is the size of the largest callable prologue on the platform. static constexpr uint32_t WasmCheckedCallEntryOffset = 0u; -static constexpr uint32_t WasmCheckedTailEntryOffset = 32u; +static constexpr uint32_t WasmCheckedTailEntryOffset = 16u; class Assembler : public vixl::Assembler { public: diff --git a/js/src/jit/mips32/Assembler-mips32.h b/js/src/jit/mips32/Assembler-mips32.h index fd4211b474d8..bd7e75d64fed 100644 --- a/js/src/jit/mips32/Assembler-mips32.h +++ b/js/src/jit/mips32/Assembler-mips32.h @@ -161,8 +161,8 @@ static constexpr uint32_t SimdMemoryAlignment = 8; static constexpr uint32_t WasmStackAlignment = SimdMemoryAlignment; static const uint32_t WasmTrapInstructionLength = 4; -// The offsets are dynamically asserted during -// code generation in the prologue/epilogue. +// See comments in wasm::GenerateFunctionPrologue. The difference between these +// is the size of the largest callable prologue on the platform. static constexpr uint32_t WasmCheckedCallEntryOffset = 0u; static constexpr uint32_t WasmCheckedTailEntryOffset = 16u; diff --git a/js/src/jit/mips64/Assembler-mips64.h b/js/src/jit/mips64/Assembler-mips64.h index 5390cf671e9e..90d875cfaa66 100644 --- a/js/src/jit/mips64/Assembler-mips64.h +++ b/js/src/jit/mips64/Assembler-mips64.h @@ -188,8 +188,8 @@ static constexpr uint32_t SimdMemoryAlignment = 16; static constexpr uint32_t WasmStackAlignment = SimdMemoryAlignment; static const uint32_t WasmTrapInstructionLength = 4; -// The offsets are dynamically asserted during -// code generation in the prologue/epilogue. +// See comments in wasm::GenerateFunctionPrologue. The difference between these +// is the size of the largest callable prologue on the platform. static constexpr uint32_t WasmCheckedCallEntryOffset = 0u; static constexpr uint32_t WasmCheckedTailEntryOffset = 16u; diff --git a/js/src/jit/none/Architecture-none.h b/js/src/jit/none/Architecture-none.h index 25dcb0eac6b1..8523ded6ec93 100644 --- a/js/src/jit/none/Architecture-none.h +++ b/js/src/jit/none/Architecture-none.h @@ -21,10 +21,9 @@ static const uint32_t SimdMemoryAlignment = static const uint32_t WasmStackAlignment = 8; static const uint32_t WasmTrapInstructionLength = 0; -// The offsets are dynamically asserted during -// code generation in the prologue/epilogue. +// See comments in wasm::GenerateFunctionPrologue. static constexpr uint32_t WasmCheckedCallEntryOffset = 0u; -static constexpr uint32_t WasmCheckedTailEntryOffset = 0u; +static constexpr uint32_t WasmCheckedTailEntryOffset = 1u; class Registers { public: diff --git a/js/src/jit/x64/Assembler-x64.h b/js/src/jit/x64/Assembler-x64.h index 55a36bdb4d96..aa902f99d0ec 100644 --- a/js/src/jit/x64/Assembler-x64.h +++ b/js/src/jit/x64/Assembler-x64.h @@ -270,10 +270,10 @@ static_assert(JitStackAlignment % SimdMemoryAlignment == 0, static constexpr uint32_t WasmStackAlignment = SimdMemoryAlignment; static constexpr uint32_t WasmTrapInstructionLength = 2; -// The offsets are dynamically asserted during -// code generation in the prologue/epilogue. +// See comments in wasm::GenerateFunctionPrologue. The difference between these +// is the size of the largest callable prologue on the platform. static constexpr uint32_t WasmCheckedCallEntryOffset = 0u; -static constexpr uint32_t WasmCheckedTailEntryOffset = 16u; +static constexpr uint32_t WasmCheckedTailEntryOffset = 4u; static constexpr Scale ScalePointer = TimesEight; diff --git a/js/src/jit/x86/Assembler-x86.h b/js/src/jit/x86/Assembler-x86.h index a117e9df3ef4..ddf5c9b5e218 100644 --- a/js/src/jit/x86/Assembler-x86.h +++ b/js/src/jit/x86/Assembler-x86.h @@ -182,10 +182,12 @@ static_assert(JitStackAlignment % SimdMemoryAlignment == 0, static constexpr uint32_t WasmStackAlignment = SimdMemoryAlignment; static constexpr uint32_t WasmTrapInstructionLength = 2; -// The offsets are dynamically asserted during -// code generation in the prologue/epilogue. +// See comments in wasm::GenerateFunctionPrologue. The difference between these +// is the size of the largest callable prologue on the platform. (We could make +// the tail offset 3, but I have opted for 4 as that results in a better-aligned +// branch target.) static constexpr uint32_t WasmCheckedCallEntryOffset = 0u; -static constexpr uint32_t WasmCheckedTailEntryOffset = 16u; +static constexpr uint32_t WasmCheckedTailEntryOffset = 4u; struct ImmTag : public Imm32 { explicit ImmTag(JSValueTag mask) : Imm32(int32_t(mask)) {} diff --git a/js/src/wasm/WasmFrameIter.cpp b/js/src/wasm/WasmFrameIter.cpp index 1dadff545790..30a0684d424e 100644 --- a/js/src/wasm/WasmFrameIter.cpp +++ b/js/src/wasm/WasmFrameIter.cpp @@ -412,6 +412,12 @@ static void GenerateCallablePrologue(MacroAssembler& masm, uint32_t* entry) { // constants and assert that they match the actual codegen below. On ARM, // this requires AutoForbidPoolsAndNops to prevent a constant pool from being // randomly inserted between two instructions. + + // The size of the prologue is constrained to be no larger than the difference + // between WasmCheckedTailEntryOffset and WasmCheckedCallEntryOffset; to + // conserve code space / avoid excessive padding, this difference is made as + // tight as possible. + #if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) { *entry = masm.currentOffset(); @@ -455,7 +461,7 @@ static void GenerateCallablePrologue(MacroAssembler& masm, uint32_t* entry) { { # if defined(JS_CODEGEN_ARM) AutoForbidPoolsAndNops afp(&masm, - /* number of instructions in scope = */ 6); + /* number of instructions in scope = */ 3); *entry = masm.currentOffset(); @@ -549,35 +555,29 @@ static void GenerateCallableEpilogue(MacroAssembler& masm, unsigned framePushed, MOZ_ASSERT_IF(!masm.oom(), PoppedFP == *ret - poppedFP); } -static void EnsureOffset(MacroAssembler& masm, uint32_t base, - uint32_t targetOffset) { - MOZ_ASSERT(targetOffset % CodeAlignment == 0); - MOZ_ASSERT_IF(!masm.oom(), masm.currentOffset() - base <= targetOffset); - - while (masm.currentOffset() - base < targetOffset) { - masm.nopAlign(CodeAlignment); - if (masm.currentOffset() - base < targetOffset) { - masm.nop(); - } - } - - MOZ_ASSERT_IF(!masm.oom(), masm.currentOffset() - base == targetOffset); -} - void wasm::GenerateFunctionPrologue(MacroAssembler& masm, const TypeIdDesc& funcTypeId, const Maybe& tier1FuncIndex, FuncOffsets* offsets) { - // These constants reflect statically-determined offsets - // between a function's checked call entry and a tail's entry. + // These constants reflect statically-determined offsets between a function's + // checked call entry and the checked tail's entry, see diagram below. The + // Entry is a call target, so must have CodeAlignment, but the TailEntry is + // only a jump target from a stub. + // + // The CheckedCallEntryOffset is normally zero. + // + // CheckedTailEntryOffset > CheckedCallEntryOffset, and if CPSIZE is the size + // of the callable prologue then TailEntryOffset - CallEntryOffset >= CPSIZE. + // It is a goal to keep that difference as small as possible to reduce the + // amount of padding inserted in the prologue. static_assert(WasmCheckedCallEntryOffset % CodeAlignment == 0, "code aligned"); - static_assert(WasmCheckedTailEntryOffset % CodeAlignment == 0, - "code aligned"); + static_assert(WasmCheckedTailEntryOffset > WasmCheckedCallEntryOffset); // Flush pending pools so they do not get dumped between the 'begin' and // 'uncheckedCallEntry' offsets since the difference must be less than // UINT8_MAX to be stored in CodeRange::funcbeginToUncheckedCallEntry_. + // (Pending pools can be large.) masm.flushBuffer(); masm.haltingAlign(CodeAlignment); @@ -591,8 +591,10 @@ void wasm::GenerateFunctionPrologue(MacroAssembler& masm, // ----------------------------------------------- // checked call entry - used for call_indirect when we have to check the // signature. - // checked tail entry - used by trampolines which already had pushed Frame - // on the callee’s behalf. + // + // checked tail entry - used by indirect call trampolines which already + // had pushed Frame on the callee’s behalf. + // // unchecked call entry - used for regular direct same-instance calls. Label functionBody; @@ -605,7 +607,20 @@ void wasm::GenerateFunctionPrologue(MacroAssembler& masm, uint32_t dummy; GenerateCallablePrologue(masm, &dummy); - EnsureOffset(masm, offsets->begin, WasmCheckedTailEntryOffset); + // Check that we did not overshoot the space budget for the prologue. + MOZ_ASSERT_IF(!masm.oom(), masm.currentOffset() - offsets->begin <= + WasmCheckedTailEntryOffset); + + // Pad to WasmCheckedTailEntryOffset. Don't use nopAlign because the target + // offset is not necessarily a power of two. The expected number of NOPs here + // is very small. + while (masm.currentOffset() - offsets->begin < WasmCheckedTailEntryOffset) { + masm.nop(); + } + + // Signature check starts at WasmCheckedTailEntryOffset. + MOZ_ASSERT_IF(!masm.oom(), masm.currentOffset() - offsets->begin == + WasmCheckedTailEntryOffset); switch (funcTypeId.kind()) { case TypeIdDescKind::Global: { Register scratch = WasmTableCallScratchReg0;