зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1380033 - Tiering policy with space proxy. r=luke
--HG-- extra : rebase_source : df238cefe35aae6d90e1ad29c8768389a7bdf9a6 extra : amend_source : b7c5abf772eeab5a8f46491c0dc7748b1ad521f2
This commit is contained in:
Родитель
ba64ef4717
Коммит
71a8cbdccf
|
@ -619,6 +619,13 @@ js::jit::ReleaseProcessExecutableMemory()
|
|||
execMemory.release();
|
||||
}
|
||||
|
||||
size_t
|
||||
js::jit::LikelyAvailableExecutableMemory()
|
||||
{
|
||||
// Round down available memory to the closest MB.
|
||||
return MaxCodeBytesPerProcess - AlignBytes(execMemory.bytesAllocated(), 0x100000U);
|
||||
}
|
||||
|
||||
bool
|
||||
js::jit::CanLikelyAllocateMoreExecutableMemory()
|
||||
{
|
||||
|
|
|
@ -50,6 +50,11 @@ extern void DeallocateExecutableMemory(void* addr, size_t bytes);
|
|||
// function.
|
||||
extern bool CanLikelyAllocateMoreExecutableMemory();
|
||||
|
||||
// Returns a rough guess of how much executable memory remains available,
|
||||
// rounded down to MB limit. Note this can fluctuate as other threads within
|
||||
// the process allocate executable memory.
|
||||
extern size_t LikelyAvailableExecutableMemory();
|
||||
|
||||
} // namespace jit
|
||||
} // namespace js
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
#include "jsprf.h"
|
||||
|
||||
#include "jit/ProcessExecutableMemory.h"
|
||||
#include "wasm/WasmBaselineCompile.h"
|
||||
#include "wasm/WasmBinaryIterator.h"
|
||||
#include "wasm/WasmGenerator.h"
|
||||
|
@ -103,10 +104,277 @@ CompileArgs::initFromContext(JSContext* cx, ScriptedCaller&& scriptedCaller)
|
|||
return assumptions.initBuildIdFromContext(cx);
|
||||
}
|
||||
|
||||
static bool
|
||||
BackgroundWorkPossible()
|
||||
// Classify the current system as one of a set of recognizable classes. This
|
||||
// really needs to get our tier-1 systems right.
|
||||
//
|
||||
// TODO: We don't yet have a good measure of how fast a system is. We
|
||||
// distinguish between mobile and desktop because these are very different kinds
|
||||
// of systems, but we could further distinguish between low / medium / high end
|
||||
// within those major classes. If we do so, then constants below would be
|
||||
// provided for each (class, architecture, system-tier) combination, not just
|
||||
// (class, architecture) as now.
|
||||
//
|
||||
// CPU clock speed is not by itself a good predictor of system performance, as
|
||||
// there are high-performance systems with slow clocks (recent Intel) and
|
||||
// low-performance systems with fast clocks (older AMD). We can also use
|
||||
// physical memory, core configuration, OS details, CPU class and family, and
|
||||
// CPU manufacturer to disambiguate.
|
||||
|
||||
enum class SystemClass
|
||||
{
|
||||
return CanUseExtraThreads() && HelperThreadState().cpuCount > 1;
|
||||
DesktopX86,
|
||||
DesktopX64,
|
||||
DesktopUnknown32,
|
||||
DesktopUnknown64,
|
||||
MobileX86,
|
||||
MobileArm32,
|
||||
MobileArm64,
|
||||
MobileUnknown32,
|
||||
MobileUnknown64
|
||||
};
|
||||
|
||||
static SystemClass
|
||||
Classify()
|
||||
{
|
||||
bool isDesktop;
|
||||
|
||||
#if defined(ANDROID) || defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
|
||||
isDesktop = false;
|
||||
#else
|
||||
isDesktop = true;
|
||||
#endif
|
||||
|
||||
if (isDesktop) {
|
||||
#if defined(JS_CODEGEN_X64)
|
||||
return SystemClass::DesktopX64;
|
||||
#elif defined(JS_CODEGEN_X86)
|
||||
return SystemClass::DesktopX86;
|
||||
#elif defined(JS_64BIT)
|
||||
return SystemClass::DesktopUnknown64;
|
||||
#else
|
||||
return SystemClass::DesktopUnknown32;
|
||||
#endif
|
||||
} else {
|
||||
#if defined(JS_CODEGEN_X86)
|
||||
return SystemClass::MobileX86;
|
||||
#elif defined(JS_CODEGEN_ARM)
|
||||
return SystemClass::MobileArm32;
|
||||
#elif defined(JS_CODEGEN_ARM64)
|
||||
return SystemClass::MobileArm64;
|
||||
#elif defined(JS_64BIT)
|
||||
return SystemClass::MobileUnknown64;
|
||||
#else
|
||||
return SystemClass::MobileUnknown32;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef JS_64BIT
|
||||
|
||||
// Code sizes in machine code bytes per bytecode byte, again empirical except
|
||||
// where marked as "Guess".
|
||||
|
||||
static const double x64Tox86Inflation = 1.25;
|
||||
|
||||
static const double x64IonBytesPerBytecode = 2.45;
|
||||
static const double x86IonBytesPerBytecode = x64IonBytesPerBytecode * x64Tox86Inflation;
|
||||
static const double arm32IonBytesPerBytecode = 3.3;
|
||||
static const double arm64IonBytesPerBytecode = 3.0; // Guess
|
||||
|
||||
static const double x64BaselineBytesPerBytecode = x64IonBytesPerBytecode * 1.43;
|
||||
static const double x86BaselineBytesPerBytecode = x64BaselineBytesPerBytecode * x64Tox86Inflation;
|
||||
static const double arm32BaselineBytesPerBytecode = arm32IonBytesPerBytecode * 1.39;
|
||||
static const double arm64BaselineBytesPerBytecode = arm64IonBytesPerBytecode * 1.39; // Guess
|
||||
|
||||
static double
|
||||
IonBytesPerBytecode(SystemClass cls)
|
||||
{
|
||||
switch (cls) {
|
||||
case SystemClass::DesktopX86:
|
||||
case SystemClass::MobileX86:
|
||||
case SystemClass::DesktopUnknown32:
|
||||
return x86IonBytesPerBytecode;
|
||||
case SystemClass::DesktopX64:
|
||||
case SystemClass::DesktopUnknown64:
|
||||
return x64IonBytesPerBytecode;
|
||||
case SystemClass::MobileArm32:
|
||||
case SystemClass::MobileUnknown32:
|
||||
return arm32IonBytesPerBytecode;
|
||||
case SystemClass::MobileArm64:
|
||||
case SystemClass::MobileUnknown64:
|
||||
return arm64IonBytesPerBytecode;
|
||||
default:
|
||||
MOZ_CRASH();
|
||||
}
|
||||
}
|
||||
|
||||
static double
|
||||
BaselineBytesPerBytecode(SystemClass cls)
|
||||
{
|
||||
switch (cls) {
|
||||
case SystemClass::DesktopX86:
|
||||
case SystemClass::MobileX86:
|
||||
case SystemClass::DesktopUnknown32:
|
||||
return x86BaselineBytesPerBytecode;
|
||||
case SystemClass::DesktopX64:
|
||||
case SystemClass::DesktopUnknown64:
|
||||
return x64BaselineBytesPerBytecode;
|
||||
case SystemClass::MobileArm32:
|
||||
case SystemClass::MobileUnknown32:
|
||||
return arm32BaselineBytesPerBytecode;
|
||||
case SystemClass::MobileArm64:
|
||||
case SystemClass::MobileUnknown64:
|
||||
return arm64BaselineBytesPerBytecode;
|
||||
default:
|
||||
MOZ_CRASH();
|
||||
}
|
||||
}
|
||||
|
||||
#endif // !JS_64BIT
|
||||
|
||||
// If parallel Ion compilation is going to take longer than this, we should tier.
|
||||
|
||||
static const double tierCutoffMs = 250;
|
||||
|
||||
// Compilation rate values are empirical except when noted, the reference
|
||||
// systems are:
|
||||
//
|
||||
// Late-2013 MacBook Pro (2.6GHz quad hyperthreaded Haswell)
|
||||
// Late-2015 Nexus 5X (1.4GHz quad Cortex-A53 + 1.8GHz dual Cortex-A57)
|
||||
|
||||
static const double x64BytecodesPerMs = 2100;
|
||||
static const double x86BytecodesPerMs = 1500;
|
||||
static const double arm32BytecodesPerMs = 450;
|
||||
static const double arm64BytecodesPerMs = 650; // Guess
|
||||
|
||||
// Tiering cutoff values: if code section sizes are below these values (when
|
||||
// divided by the effective number of cores) we do not tier, because we guess
|
||||
// that parallel Ion compilation will be fast enough.
|
||||
|
||||
static const double x64DesktopTierCutoff = x64BytecodesPerMs * tierCutoffMs;
|
||||
static const double x86DesktopTierCutoff = x86BytecodesPerMs * tierCutoffMs;
|
||||
static const double x86MobileTierCutoff = x86DesktopTierCutoff / 2; // Guess
|
||||
static const double arm32MobileTierCutoff = arm32BytecodesPerMs * tierCutoffMs;
|
||||
static const double arm64MobileTierCutoff = arm64BytecodesPerMs * tierCutoffMs;
|
||||
|
||||
static double
|
||||
CodesizeCutoff(SystemClass cls, uint32_t codeSize)
|
||||
{
|
||||
switch (cls) {
|
||||
case SystemClass::DesktopX86:
|
||||
case SystemClass::DesktopUnknown32:
|
||||
return x86DesktopTierCutoff;
|
||||
case SystemClass::DesktopX64:
|
||||
case SystemClass::DesktopUnknown64:
|
||||
return x64DesktopTierCutoff;
|
||||
case SystemClass::MobileX86:
|
||||
return x86MobileTierCutoff;
|
||||
case SystemClass::MobileArm32:
|
||||
case SystemClass::MobileUnknown32:
|
||||
return arm32MobileTierCutoff;
|
||||
case SystemClass::MobileArm64:
|
||||
case SystemClass::MobileUnknown64:
|
||||
return arm64MobileTierCutoff;
|
||||
default:
|
||||
MOZ_CRASH();
|
||||
}
|
||||
}
|
||||
|
||||
// As the number of cores grows the effectiveness of each core dwindles (on the
|
||||
// systems we care about for SpiderMonkey).
|
||||
//
|
||||
// The data are empirical, computed from the observed compilation time of the
|
||||
// Tanks demo code on a variable number of cores.
|
||||
//
|
||||
// The heuristic may fail on NUMA systems where the core count is high but the
|
||||
// performance increase is nil or negative once the program moves beyond one
|
||||
// socket. However, few browser users have such systems.
|
||||
|
||||
static double
|
||||
EffectiveCores(SystemClass cls, uint32_t cores)
|
||||
{
|
||||
if (cores <= 3)
|
||||
return pow(cores, 0.9);
|
||||
return pow(cores, 0.75);
|
||||
}
|
||||
|
||||
#ifndef JS_64BIT
|
||||
// Don't tier if tiering will fill code memory to more to more than this
|
||||
// fraction.
|
||||
|
||||
static const double spaceCutoffPct = 0.9;
|
||||
#endif
|
||||
|
||||
// Figure out whether we should use tiered compilation or not.
|
||||
static bool
|
||||
GetTieringEnabled(uint32_t codeSize)
|
||||
{
|
||||
if (!CanUseExtraThreads())
|
||||
return false;
|
||||
|
||||
uint32_t cpuCount = HelperThreadState().cpuCount;
|
||||
MOZ_ASSERT(cpuCount > 0);
|
||||
|
||||
// It's mostly sensible not to background compile when there's only one
|
||||
// hardware thread as we want foreground computation to have access to that.
|
||||
// However, if wasm background compilation helper threads can be given lower
|
||||
// priority then background compilation on single-core systems still makes
|
||||
// some kind of sense. That said, this is a non-issue: as of September 2017
|
||||
// 1-core was down to 3.5% of our population and falling.
|
||||
|
||||
if (cpuCount == 1)
|
||||
return false;
|
||||
|
||||
MOZ_ASSERT(HelperThreadState().threadCount >= cpuCount);
|
||||
|
||||
// Compute the max number of threads available to do actual background
|
||||
// compilation work.
|
||||
|
||||
uint32_t workers = HelperThreadState().maxWasmCompilationThreads();
|
||||
|
||||
// The number of cores we will use is bounded both by the CPU count and the
|
||||
// worker count.
|
||||
|
||||
uint32_t cores = Min(cpuCount, workers);
|
||||
|
||||
SystemClass cls = Classify();
|
||||
|
||||
// Ion compilation on available cores must take long enough to be worth the
|
||||
// bother.
|
||||
|
||||
double cutoffSize = CodesizeCutoff(cls, codeSize);
|
||||
double effectiveCores = EffectiveCores(cls, cores);
|
||||
|
||||
if ((codeSize / effectiveCores) < cutoffSize)
|
||||
return false;
|
||||
|
||||
// Do not implement a size cutoff for 64-bit systems since the code size
|
||||
// budget for 64 bit is so large that it will hardly ever be an issue.
|
||||
// (Also the cutoff percentage might be different on 64-bit.)
|
||||
|
||||
#ifndef JS_64BIT
|
||||
// If the amount of executable code for baseline compilation jeopardizes the
|
||||
// availability of executable memory for ion code then do not tier, for now.
|
||||
//
|
||||
// TODO: For now we consider this module in isolation. We should really
|
||||
// worry about what else is going on in this process and might be filling up
|
||||
// the code memory. It's like we need some kind of code memory reservation
|
||||
// system or JIT compilation for large modules.
|
||||
|
||||
double ionRatio = IonBytesPerBytecode(cls);
|
||||
double baselineRatio = BaselineBytesPerBytecode(cls);
|
||||
double needMemory = codeSize * (ionRatio + baselineRatio);
|
||||
double availMemory = LikelyAvailableExecutableMemory();
|
||||
double cutoff = spaceCutoffPct * MaxCodeBytesPerProcess;
|
||||
|
||||
// If the sum of baseline and ion code makes us exceeds some set percentage
|
||||
// of the executable memory then disable tiering.
|
||||
|
||||
if ((MaxCodeBytesPerProcess - availMemory) + needMemory > cutoff)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
SharedModule
|
||||
|
@ -118,25 +386,30 @@ wasm::CompileInitialTier(const ShareableBytes& bytecode, const CompileArgs& args
|
|||
bool debugEnabled = BaselineCanCompile() && args.debugEnabled;
|
||||
bool ionEnabled = args.ionEnabled || !baselineEnabled;
|
||||
|
||||
CompileMode mode;
|
||||
Tier tier;
|
||||
DebugEnabled debug;
|
||||
if (BackgroundWorkPossible() && baselineEnabled && ionEnabled && !debugEnabled) {
|
||||
mode = CompileMode::Tier1;
|
||||
tier = Tier::Baseline;
|
||||
debug = DebugEnabled::False;
|
||||
} else {
|
||||
mode = CompileMode::Once;
|
||||
tier = debugEnabled || !ionEnabled ? Tier::Baseline : Tier::Ion;
|
||||
debug = debugEnabled ? DebugEnabled::True : DebugEnabled::False;
|
||||
}
|
||||
DebugEnabled debug = debugEnabled ? DebugEnabled::True : DebugEnabled::False;
|
||||
|
||||
ModuleEnvironment env(mode, tier, debug);
|
||||
ModuleEnvironment env(ModuleEnvironment::UnknownMode, ModuleEnvironment::UnknownTier, debug);
|
||||
|
||||
Decoder d(bytecode.bytes, error);
|
||||
if (!DecodeModuleEnvironment(d, &env))
|
||||
return nullptr;
|
||||
|
||||
uint32_t codeSize;
|
||||
if (!d.peekSectionSize(SectionId::Code, &env, "code", &codeSize))
|
||||
codeSize = 0;
|
||||
|
||||
CompileMode mode;
|
||||
Tier tier;
|
||||
if (baselineEnabled && ionEnabled && !debugEnabled && GetTieringEnabled(codeSize)) {
|
||||
mode = CompileMode::Tier1;
|
||||
tier = Tier::Baseline;
|
||||
} else {
|
||||
mode = CompileMode::Once;
|
||||
tier = debugEnabled || !ionEnabled ? Tier::Baseline : Tier::Ion;
|
||||
}
|
||||
|
||||
env.setModeAndTier(mode, tier);
|
||||
|
||||
ModuleGenerator mg(args, &env, nullptr, error);
|
||||
if (!mg.init())
|
||||
return nullptr;
|
||||
|
|
|
@ -144,10 +144,10 @@ class CompileTask
|
|||
return units_;
|
||||
}
|
||||
Tier tier() const {
|
||||
return env_.tier;
|
||||
return env_.tier();
|
||||
}
|
||||
CompileMode mode() const {
|
||||
return env_.mode;
|
||||
return env_.mode();
|
||||
}
|
||||
bool debugEnabled() const {
|
||||
return env_.debug == DebugEnabled::True;
|
||||
|
@ -243,8 +243,8 @@ class MOZ_STACK_CLASS ModuleGenerator
|
|||
MOZ_MUST_USE bool initWasm();
|
||||
|
||||
bool isAsmJS() const { return env_->isAsmJS(); }
|
||||
Tier tier() const { return env_->tier; }
|
||||
CompileMode mode() const { return env_->mode; }
|
||||
Tier tier() const { return env_->tier(); }
|
||||
CompileMode mode() const { return env_->mode(); }
|
||||
bool debugEnabled() const { return env_->debugEnabled(); }
|
||||
|
||||
public:
|
||||
|
|
|
@ -60,7 +60,7 @@ Decoder::fail(size_t errorOffset, const char* msg)
|
|||
|
||||
bool
|
||||
Decoder::startSection(SectionId id, ModuleEnvironment* env, uint32_t* sectionStart,
|
||||
uint32_t* sectionSize, const char* sectionName)
|
||||
uint32_t* sectionSize, const char* sectionName, bool peeking)
|
||||
{
|
||||
// Record state at beginning of section to allow rewinding to this point
|
||||
// if, after skipping through several custom sections, we don't find the
|
||||
|
@ -85,8 +85,11 @@ Decoder::startSection(SectionId id, ModuleEnvironment* env, uint32_t* sectionSta
|
|||
// Rewind to the beginning of the current section since this is what
|
||||
// skipCustomSection() assumes.
|
||||
cur_ = currentSectionStart;
|
||||
if (!skipCustomSection(env))
|
||||
if (!skipCustomSection(env)) {
|
||||
if (peeking)
|
||||
goto rewind;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Having successfully skipped a custom section, consider the next
|
||||
// section.
|
||||
|
@ -97,22 +100,39 @@ Decoder::startSection(SectionId id, ModuleEnvironment* env, uint32_t* sectionSta
|
|||
|
||||
// Found it, now start the section.
|
||||
|
||||
if (!readVarU32(sectionSize) || bytesRemain() < *sectionSize)
|
||||
if (!readVarU32(sectionSize) || bytesRemain() < *sectionSize) {
|
||||
if (peeking)
|
||||
goto rewind;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
*sectionStart = cur_ - beg_;
|
||||
if (peeking)
|
||||
goto rewind_peeking;
|
||||
return true;
|
||||
|
||||
rewind:
|
||||
peeking = false;
|
||||
rewind_peeking:
|
||||
cur_ = initialCur;
|
||||
env->customSections.shrinkTo(initialCustomSectionsLength);
|
||||
*sectionStart = NotStarted;
|
||||
if (!peeking)
|
||||
*sectionStart = NotStarted;
|
||||
return true;
|
||||
|
||||
fail:
|
||||
return failf("failed to start %s section", sectionName);
|
||||
}
|
||||
|
||||
bool
|
||||
Decoder::peekSectionSize(SectionId id, ModuleEnvironment* env, const char* sectionName, uint32_t* sectionSize)
|
||||
{
|
||||
uint32_t sectionStart;
|
||||
if (!startSection(id, env, §ionStart, sectionSize, sectionName, /*peeking=*/true))
|
||||
return false;
|
||||
return sectionStart != NotStarted;
|
||||
}
|
||||
|
||||
bool
|
||||
Decoder::finishSection(uint32_t sectionStart, uint32_t sectionSize, const char* sectionName)
|
||||
{
|
||||
|
|
|
@ -36,11 +36,14 @@ namespace wasm {
|
|||
struct ModuleEnvironment
|
||||
{
|
||||
// Constant parameters for the entire compilation:
|
||||
const CompileMode mode;
|
||||
const Tier tier;
|
||||
const DebugEnabled debug;
|
||||
const ModuleKind kind;
|
||||
|
||||
// Constant parameters determined no later than at the start of the code
|
||||
// section:
|
||||
CompileMode mode_;
|
||||
Tier tier_;
|
||||
|
||||
// Module fields filled out incrementally during decoding:
|
||||
MemoryUsage memoryUsage;
|
||||
Atomic<uint32_t> minMemoryLength;
|
||||
|
@ -59,18 +62,35 @@ struct ModuleEnvironment
|
|||
NameInBytecodeVector funcNames;
|
||||
CustomSectionVector customSections;
|
||||
|
||||
static const CompileMode UnknownMode = (CompileMode)-1;
|
||||
static const Tier UnknownTier = (Tier)-1;
|
||||
|
||||
explicit ModuleEnvironment(CompileMode mode = CompileMode::Once,
|
||||
Tier tier = Tier::Ion,
|
||||
DebugEnabled debug = DebugEnabled::False,
|
||||
ModuleKind kind = ModuleKind::Wasm)
|
||||
: mode(mode),
|
||||
tier(tier),
|
||||
debug(debug),
|
||||
: debug(debug),
|
||||
kind(kind),
|
||||
mode_(mode),
|
||||
tier_(tier),
|
||||
memoryUsage(MemoryUsage::None),
|
||||
minMemoryLength(0)
|
||||
{}
|
||||
|
||||
CompileMode mode() const {
|
||||
MOZ_ASSERT(mode_ != UnknownMode);
|
||||
return mode_;
|
||||
}
|
||||
Tier tier() const {
|
||||
MOZ_ASSERT(tier_ != UnknownTier);
|
||||
return tier_;
|
||||
}
|
||||
void setModeAndTier(CompileMode mode, Tier tier) {
|
||||
MOZ_ASSERT(mode_ == UnknownMode);
|
||||
MOZ_ASSERT(tier_ == UnknownTier);
|
||||
mode_ = mode;
|
||||
tier_ = tier;
|
||||
}
|
||||
size_t numTables() const {
|
||||
return tables.length();
|
||||
}
|
||||
|
@ -546,10 +566,15 @@ class Decoder
|
|||
ModuleEnvironment* env,
|
||||
uint32_t* sectionStart,
|
||||
uint32_t* sectionSize,
|
||||
const char* sectionName);
|
||||
const char* sectionName,
|
||||
bool peeking = false);
|
||||
MOZ_MUST_USE bool finishSection(uint32_t sectionStart,
|
||||
uint32_t sectionSize,
|
||||
const char* sectionName);
|
||||
MOZ_MUST_USE bool peekSectionSize(SectionId id,
|
||||
ModuleEnvironment* env,
|
||||
const char* sectionName,
|
||||
uint32_t* sectionSize);
|
||||
|
||||
// Custom sections do not cause validation errors unless the error is in
|
||||
// the section header itself.
|
||||
|
|
Загрузка…
Ссылка в новой задаче