Bug 1678239 - Shrink WasmCheckedTailEntryOffset. r=jseward

It turns out that WasmCheckedTailEntryOffset -- which defines an entry
point for a jump from a trampoline that needs to perform a signature
check but has already allocated a frame header, and which may require
some nop-fill in the function prologue so as to place the entry point
at a constant offset -- only needs to be large enough to accomodate
every possible callable prologue (architecture-dependent).  The value
can therefore be made smaller than it currently is on x86, x64, arm,
and arm64, leading to more compact prologues on those platforms,
conserving code size.

In addition, the code in GenerateFunctionPrologue and associated
functions that uses this constant can be tidied up a little bit and
commented better.

Differential Revision: https://phabricator.services.mozilla.com/D112173
This commit is contained in:
Lars T Hansen 2021-04-20 06:45:49 +00:00
Родитель 7ce5bcb8bc
Коммит d63d8245bb
8 изменённых файлов: 58 добавлений и 42 удалений

Просмотреть файл

@ -305,10 +305,10 @@ static_assert(JitStackAlignment % SimdMemoryAlignment == 0,
static const uint32_t WasmStackAlignment = SimdMemoryAlignment;
static const uint32_t WasmTrapInstructionLength = 4;
// The offsets are dynamically asserted during
// code generation in the prologue/epilogue.
// See comments in wasm::GenerateFunctionPrologue. The difference between these
// is the size of the largest callable prologue on the platform.
static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
static constexpr uint32_t WasmCheckedTailEntryOffset = 16u;
static constexpr uint32_t WasmCheckedTailEntryOffset = 12u;
static const Scale ScalePointer = TimesFour;

Просмотреть файл

@ -433,10 +433,10 @@ static_assert(CodeAlignment % SimdMemoryAlignment == 0,
static const uint32_t WasmStackAlignment = SimdMemoryAlignment;
static const uint32_t WasmTrapInstructionLength = 4;
// The offsets are dynamically asserted during
// code generation in the prologue/epilogue.
// See comments in wasm::GenerateFunctionPrologue. The difference between these
// is the size of the largest callable prologue on the platform.
static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
static constexpr uint32_t WasmCheckedTailEntryOffset = 32u;
static constexpr uint32_t WasmCheckedTailEntryOffset = 16u;
class Assembler : public vixl::Assembler {
public:

Просмотреть файл

@ -161,8 +161,8 @@ static constexpr uint32_t SimdMemoryAlignment = 8;
static constexpr uint32_t WasmStackAlignment = SimdMemoryAlignment;
static const uint32_t WasmTrapInstructionLength = 4;
// The offsets are dynamically asserted during
// code generation in the prologue/epilogue.
// See comments in wasm::GenerateFunctionPrologue. The difference between these
// is the size of the largest callable prologue on the platform.
static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
static constexpr uint32_t WasmCheckedTailEntryOffset = 16u;

Просмотреть файл

@ -188,8 +188,8 @@ static constexpr uint32_t SimdMemoryAlignment = 16;
static constexpr uint32_t WasmStackAlignment = SimdMemoryAlignment;
static const uint32_t WasmTrapInstructionLength = 4;
// The offsets are dynamically asserted during
// code generation in the prologue/epilogue.
// See comments in wasm::GenerateFunctionPrologue. The difference between these
// is the size of the largest callable prologue on the platform.
static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
static constexpr uint32_t WasmCheckedTailEntryOffset = 16u;

Просмотреть файл

@ -21,10 +21,9 @@ static const uint32_t SimdMemoryAlignment =
static const uint32_t WasmStackAlignment = 8;
static const uint32_t WasmTrapInstructionLength = 0;
// The offsets are dynamically asserted during
// code generation in the prologue/epilogue.
// See comments in wasm::GenerateFunctionPrologue.
static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
static constexpr uint32_t WasmCheckedTailEntryOffset = 0u;
static constexpr uint32_t WasmCheckedTailEntryOffset = 1u;
class Registers {
public:

Просмотреть файл

@ -270,10 +270,10 @@ static_assert(JitStackAlignment % SimdMemoryAlignment == 0,
static constexpr uint32_t WasmStackAlignment = SimdMemoryAlignment;
static constexpr uint32_t WasmTrapInstructionLength = 2;
// The offsets are dynamically asserted during
// code generation in the prologue/epilogue.
// See comments in wasm::GenerateFunctionPrologue. The difference between these
// is the size of the largest callable prologue on the platform.
static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
static constexpr uint32_t WasmCheckedTailEntryOffset = 16u;
static constexpr uint32_t WasmCheckedTailEntryOffset = 4u;
static constexpr Scale ScalePointer = TimesEight;

Просмотреть файл

@ -182,10 +182,12 @@ static_assert(JitStackAlignment % SimdMemoryAlignment == 0,
static constexpr uint32_t WasmStackAlignment = SimdMemoryAlignment;
static constexpr uint32_t WasmTrapInstructionLength = 2;
// The offsets are dynamically asserted during
// code generation in the prologue/epilogue.
// See comments in wasm::GenerateFunctionPrologue. The difference between these
// is the size of the largest callable prologue on the platform. (We could make
// the tail offset 3, but I have opted for 4 as that results in a better-aligned
// branch target.)
static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
static constexpr uint32_t WasmCheckedTailEntryOffset = 16u;
static constexpr uint32_t WasmCheckedTailEntryOffset = 4u;
struct ImmTag : public Imm32 {
explicit ImmTag(JSValueTag mask) : Imm32(int32_t(mask)) {}

Просмотреть файл

@ -412,6 +412,12 @@ static void GenerateCallablePrologue(MacroAssembler& masm, uint32_t* entry) {
// constants and assert that they match the actual codegen below. On ARM,
// this requires AutoForbidPoolsAndNops to prevent a constant pool from being
// randomly inserted between two instructions.
// The size of the prologue is constrained to be no larger than the difference
// between WasmCheckedTailEntryOffset and WasmCheckedCallEntryOffset; to
// conserve code space / avoid excessive padding, this difference is made as
// tight as possible.
#if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
{
*entry = masm.currentOffset();
@ -455,7 +461,7 @@ static void GenerateCallablePrologue(MacroAssembler& masm, uint32_t* entry) {
{
# if defined(JS_CODEGEN_ARM)
AutoForbidPoolsAndNops afp(&masm,
/* number of instructions in scope = */ 6);
/* number of instructions in scope = */ 3);
*entry = masm.currentOffset();
@ -549,35 +555,29 @@ static void GenerateCallableEpilogue(MacroAssembler& masm, unsigned framePushed,
MOZ_ASSERT_IF(!masm.oom(), PoppedFP == *ret - poppedFP);
}
static void EnsureOffset(MacroAssembler& masm, uint32_t base,
uint32_t targetOffset) {
MOZ_ASSERT(targetOffset % CodeAlignment == 0);
MOZ_ASSERT_IF(!masm.oom(), masm.currentOffset() - base <= targetOffset);
while (masm.currentOffset() - base < targetOffset) {
masm.nopAlign(CodeAlignment);
if (masm.currentOffset() - base < targetOffset) {
masm.nop();
}
}
MOZ_ASSERT_IF(!masm.oom(), masm.currentOffset() - base == targetOffset);
}
void wasm::GenerateFunctionPrologue(MacroAssembler& masm,
const TypeIdDesc& funcTypeId,
const Maybe<uint32_t>& tier1FuncIndex,
FuncOffsets* offsets) {
// These constants reflect statically-determined offsets
// between a function's checked call entry and a tail's entry.
// These constants reflect statically-determined offsets between a function's
// checked call entry and the checked tail's entry, see diagram below. The
// Entry is a call target, so must have CodeAlignment, but the TailEntry is
// only a jump target from a stub.
//
// The CheckedCallEntryOffset is normally zero.
//
// CheckedTailEntryOffset > CheckedCallEntryOffset, and if CPSIZE is the size
// of the callable prologue then TailEntryOffset - CallEntryOffset >= CPSIZE.
// It is a goal to keep that difference as small as possible to reduce the
// amount of padding inserted in the prologue.
static_assert(WasmCheckedCallEntryOffset % CodeAlignment == 0,
"code aligned");
static_assert(WasmCheckedTailEntryOffset % CodeAlignment == 0,
"code aligned");
static_assert(WasmCheckedTailEntryOffset > WasmCheckedCallEntryOffset);
// Flush pending pools so they do not get dumped between the 'begin' and
// 'uncheckedCallEntry' offsets since the difference must be less than
// UINT8_MAX to be stored in CodeRange::funcbeginToUncheckedCallEntry_.
// (Pending pools can be large.)
masm.flushBuffer();
masm.haltingAlign(CodeAlignment);
@ -591,8 +591,10 @@ void wasm::GenerateFunctionPrologue(MacroAssembler& masm,
// -----------------------------------------------
// checked call entry - used for call_indirect when we have to check the
// signature.
// checked tail entry - used by trampolines which already had pushed Frame
// on the callees behalf.
//
// checked tail entry - used by indirect call trampolines which already
// had pushed Frame on the callees behalf.
//
// unchecked call entry - used for regular direct same-instance calls.
Label functionBody;
@ -605,7 +607,20 @@ void wasm::GenerateFunctionPrologue(MacroAssembler& masm,
uint32_t dummy;
GenerateCallablePrologue(masm, &dummy);
EnsureOffset(masm, offsets->begin, WasmCheckedTailEntryOffset);
// Check that we did not overshoot the space budget for the prologue.
MOZ_ASSERT_IF(!masm.oom(), masm.currentOffset() - offsets->begin <=
WasmCheckedTailEntryOffset);
// Pad to WasmCheckedTailEntryOffset. Don't use nopAlign because the target
// offset is not necessarily a power of two. The expected number of NOPs here
// is very small.
while (masm.currentOffset() - offsets->begin < WasmCheckedTailEntryOffset) {
masm.nop();
}
// Signature check starts at WasmCheckedTailEntryOffset.
MOZ_ASSERT_IF(!masm.oom(), masm.currentOffset() - offsets->begin ==
WasmCheckedTailEntryOffset);
switch (funcTypeId.kind()) {
case TypeIdDescKind::Global: {
Register scratch = WasmTableCallScratchReg0;