gecko-dev/tools/profiler/public/ProfilerThreadRegistrationD...

538 строки
22 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// This header contains classes that hold data related to thread profiling:
// Data members are stored `protected` in `ThreadRegistrationData`.
// Non-virtual sub-classes of ProfilerThreadRegistrationData provide layers of
// public accessors to subsets of the data. Each level builds on the previous
// one and adds further access to more data, but always with the appropriate
// guards where necessary.
// These classes have protected constructors, so only some trusted classes
// `ThreadRegistration` and `ThreadRegistry` will be able to construct them, and
// then give limited access depending on who asks (the owning thread or another
// one), and how much data they actually need.
//
// The hierarchy is, from base to most derived:
// - ThreadRegistrationData
// - ThreadRegistrationUnlockedConstReader
// - ThreadRegistrationUnlockedConstReaderAndAtomicRW
// - ThreadRegistrationUnlockedRWForLockedProfiler
// - ThreadRegistrationUnlockedReaderAndAtomicRWOnThread
// - ThreadRegistrationLockedRWFromAnyThread
// - ThreadRegistrationLockedRWOnThread
// - ThreadRegistration::EmbeddedData (actual data member in ThreadRegistration)
//
// Tech detail: These classes need to be a single hierarchy so that
// `ThreadRegistration` can contain the most-derived class, and from there can
// publish references to base classes without relying on Undefined Behavior.
// (It's not allowed to have some object and give a reference to a sub-class,
// unless that object was *really* constructed as that sub-class at least, even
// if that sub-class only adds member functions!)
// And where appropriate, these references will come along with the required
// lock.
#ifndef ProfilerThreadRegistrationData_h
#define ProfilerThreadRegistrationData_h
#include "js/ProfilingFrameIterator.h"
#include "js/ProfilingStack.h"
#include "mozilla/Atomics.h"
#include "mozilla/BaseProfilerDetail.h"
#include "mozilla/MemoryReporting.h"
#include "mozilla/ProfilerThreadPlatformData.h"
#include "mozilla/ProfilerThreadRegistrationInfo.h"
#include "nsCOMPtr.h"
#include "nsIThread.h"
class ProfiledThreadData;
class PSAutoLock;
struct JSContext;
// Enum listing which profiling features are active for a single thread.
enum class ThreadProfilingFeatures : uint32_t {
// The thread is not being profiled at all (either the profiler is not
// running, or this thread is not examined during profiling.)
NotProfiled = 0u,
// Single features, binary exclusive. May be `Combine()`d.
CPUUtilization = 1u << 0,
Sampling = 1u << 1,
Markers = 1u << 2,
// All possible features. Usually used as a mask to see if any feature is
// active at a given time.
Any = CPUUtilization | Sampling | Markers
};
// Binary OR of one of more ThreadProfilingFeatures, to mix all arguments.
template <typename... Ts>
[[nodiscard]] constexpr ThreadProfilingFeatures Combine(
ThreadProfilingFeatures a1, Ts... as) {
static_assert((true && ... &&
std::is_same_v<std::remove_cv_t<std::remove_reference_t<Ts>>,
ThreadProfilingFeatures>));
return static_cast<ThreadProfilingFeatures>(
(static_cast<std::underlying_type_t<ThreadProfilingFeatures>>(a1) | ... |
static_cast<std::underlying_type_t<ThreadProfilingFeatures>>(as)));
}
// Binary AND of one of more ThreadProfilingFeatures, to find features common to
// all arguments.
template <typename... Ts>
[[nodiscard]] constexpr ThreadProfilingFeatures Intersect(
ThreadProfilingFeatures a1, Ts... as) {
static_assert((true && ... &&
std::is_same_v<std::remove_cv_t<std::remove_reference_t<Ts>>,
ThreadProfilingFeatures>));
return static_cast<ThreadProfilingFeatures>(
(static_cast<std::underlying_type_t<ThreadProfilingFeatures>>(a1) & ... &
static_cast<std::underlying_type_t<ThreadProfilingFeatures>>(as)));
}
// Are there features in common between the two given sets?
// Mostly useful to test if any of a set of features is present in another set.
template <typename... Ts>
[[nodiscard]] constexpr bool DoFeaturesIntersect(ThreadProfilingFeatures a1,
ThreadProfilingFeatures a2) {
return Intersect(a1, a2) != ThreadProfilingFeatures::NotProfiled;
}
namespace mozilla::profiler {
// All data members related to thread profiling are stored here.
// See derived classes below, which give limited unlocked/locked read/write
// access in different situations, and will be available through
// ThreadRegistration and ThreadRegistry.
class ThreadRegistrationData {
public:
// No public accessors here. See derived classes for accessors, and
// Get.../With... functions for who can use these accessors.
size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
// Not including data that is not fully owned here.
return 0;
}
size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
}
static constexpr size_t MAX_JS_FRAMES = 1024;
using JsFrame = JS::ProfilingFrameIterator::Frame;
using JsFrameBuffer = JsFrame[MAX_JS_FRAMES];
// `protected` to allow derived classes to read all data members.
protected:
ThreadRegistrationData(const char* aName, const void* aStackTop);
#ifdef DEBUG
// Destructor only used to check invariants.
~ThreadRegistrationData() {
MOZ_ASSERT((mProfilingFeatures != ThreadProfilingFeatures::NotProfiled) ==
!!mProfiledThreadData);
MOZ_ASSERT(!mProfiledThreadData,
"mProfiledThreadData pointer should have been reset before "
"~ThreadRegistrationData");
}
#endif // DEBUG
// Permanent thread information.
// Set at construction, read from anywhere, moved-from at destruction.
ThreadRegistrationInfo mInfo;
// Contains profiler labels and JS frames.
// Deep-written on thread only, deep-read from thread and suspended thread.
ProfilingStack mProfilingStack;
// In practice, only read from thread and suspended thread.
PlatformData mPlatformData;
// Only read from thread and suspended thread.
const void* const mStackTop;
// Written from thread, read from thread and suspended thread.
nsCOMPtr<nsIThread> mThread;
// If this is a JS thread, this is its JSContext, which is required for any
// JS sampling.
// Written from thread, read from thread and suspended thread.
JSContext* mJSContext = nullptr;
// If mJSContext is not null AND the thread is being profiled, this points at
// the start of a JsFrameBuffer to be used for on-thread synchronous sampling.
JsFrame* mJsFrameBuffer = nullptr;
// The profiler needs to start and stop JS sampling of JS threads at various
// times. However, the JS engine can only do the required actions on the
// JS thread itself ("on-thread"), not from another thread ("off-thread").
// Therefore, we have the following two-step process.
//
// - The profiler requests (on-thread or off-thread) that the JS sampling be
// started/stopped, by changing mJSSampling to the appropriate REQUESTED
// state.
//
// - The relevant JS thread polls (on-thread) for changes to mJSSampling.
// When it sees a REQUESTED state, it performs the appropriate actions to
// actually start/stop JS sampling, and changes mJSSampling out of the
// REQUESTED state.
//
// The state machine is as follows.
//
// INACTIVE --> ACTIVE_REQUESTED
// ^ ^ |
// | _/ |
// | _/ |
// | / |
// | v v
// INACTIVE_REQUESTED <-- ACTIVE
//
// The polling is done in the following two ways.
//
// - Via the interrupt callback mechanism; the JS thread must call
// profiler_js_interrupt_callback() from its own interrupt callback.
// This is how sampling must be started/stopped for threads where the
// request was made off-thread.
//
// - When {Start,Stop}JSSampling() is called on-thread, we can immediately
// follow it with a PollJSSampling() call to avoid the delay between the
// two steps. Likewise, setJSContext() calls PollJSSampling().
//
// One non-obvious thing about all this: these JS sampling requests are made
// on all threads, even non-JS threads. mContext needs to also be set (via
// setJSContext(), which can only happen for JS threads) for any JS sampling
// to actually happen.
//
enum {
INACTIVE = 0,
ACTIVE_REQUESTED = 1,
ACTIVE = 2,
INACTIVE_REQUESTED = 3,
} mJSSampling = INACTIVE;
uint32_t mJSFlags = 0;
// Flags to conveniently track various JS instrumentations.
enum class JSInstrumentationFlags {
StackSampling = 0x1,
Allocations = 0x2,
};
[[nodiscard]] bool JSAllocationsEnabled() const {
return mJSFlags & uint32_t(JSInstrumentationFlags::Allocations);
}
// The following members may be modified from another thread.
// They need to be atomic, because LockData() does not prevent reads from
// the owning thread.
// mSleep tracks whether the thread is sleeping, and if so, whether it has
// been previously observed. This is used for an optimization: in some
// cases, when a thread is asleep, we duplicate the previous sample, which
// is cheaper than taking a new sample.
//
// mSleep is atomic because it is accessed from multiple threads.
//
// - It is written only by this thread, via setSleeping() and setAwake().
//
// - It is read by SamplerThread::Run().
//
// There are two cases where racing between threads can cause an issue.
//
// - If CanDuplicateLastSampleDueToSleep() returns false but that result is
// invalidated before being acted upon, we will take a full sample
// unnecessarily. This is additional work but won't cause any correctness
// issues. (In actual fact, this case is impossible. In order to go from
// CanDuplicateLastSampleDueToSleep() returning false to it returning true
// requires an intermediate call to it in order for mSleep to go from
// SLEEPING_NOT_OBSERVED to SLEEPING_OBSERVED.)
//
// - If CanDuplicateLastSampleDueToSleep() returns true but that result is
// invalidated before being acted upon -- i.e. the thread wakes up before
// DuplicateLastSample() is called -- we will duplicate the previous
// sample. This is inaccurate, but only slightly... we will effectively
// treat the thread as having slept a tiny bit longer than it really did.
//
// This latter inaccuracy could be avoided by moving the
// CanDuplicateLastSampleDueToSleep() check within the thread-freezing code,
// e.g. the section where Tick() is called. But that would reduce the
// effectiveness of the optimization because more code would have to be run
// before we can tell that duplication is allowed.
//
static const int AWAKE = 0;
static const int SLEEPING_NOT_OBSERVED = 1;
static const int SLEEPING_OBSERVED = 2;
// Read&written from thread and suspended thread.
Atomic<int> mSleep{AWAKE};
Atomic<uint64_t> mThreadCpuTimeInNsAtLastSleep{0};
#ifdef NIGHTLY_BUILD
// The first wake is the thread creation.
Atomic<uint64_t, MemoryOrdering::Relaxed> mWakeCount{1};
mutable baseprofiler::detail::BaseProfilerMutex mRecordWakeCountMutex;
mutable uint64_t mAlreadyRecordedWakeCount = 0;
mutable uint64_t mAlreadyRecordedCpuTimeInMs = 0;
#endif
// Is this thread currently being profiled, and with which features?
// Written from profiler, read from any thread.
// Invariant: `!!mProfilingFeatures == !!mProfiledThreadData` (set together.)
Atomic<ThreadProfilingFeatures, MemoryOrdering::Relaxed> mProfilingFeatures{
ThreadProfilingFeatures::NotProfiled};
// If the profiler is active and this thread is selected for profiling, this
// points at the relevant ProfiledThreadData.
// Fully controlled by the profiler.
// Invariant: `!!mProfilingFeatures == !!mProfiledThreadData` (set together).
ProfiledThreadData* mProfiledThreadData = nullptr;
};
// Accessing const data from any thread.
class ThreadRegistrationUnlockedConstReader : public ThreadRegistrationData {
public:
[[nodiscard]] const ThreadRegistrationInfo& Info() const { return mInfo; }
[[nodiscard]] const PlatformData& PlatformDataCRef() const {
return mPlatformData;
}
[[nodiscard]] const void* StackTop() const { return mStackTop; }
protected:
ThreadRegistrationUnlockedConstReader(const char* aName,
const void* aStackTop)
: ThreadRegistrationData(aName, aStackTop) {}
};
// Accessing atomic data from any thread.
class ThreadRegistrationUnlockedConstReaderAndAtomicRW
: public ThreadRegistrationUnlockedConstReader {
public:
[[nodiscard]] const ProfilingStack& ProfilingStackCRef() const {
return mProfilingStack;
}
[[nodiscard]] ProfilingStack& ProfilingStackRef() { return mProfilingStack; }
// Similar to `profiler_is_active()`, this atomic flag may become out-of-date.
// It should only be used as an indication to know whether this thread is
// probably being profiled (with some specific features), to avoid doing
// expensive operations otherwise. Edge cases:
// - This thread could get `NotProfiled`, but the profiler has just started,
// so some very early data may be missing. No real impact on profiling.
// - This thread could see profiled features, but the profiled has just
// stopped, so some some work will be done and then discarded when finally
// attempting to write to the buffer. No impact on profiling.
// - This thread could see profiled features, but the profiler will quickly
// stop and restart, so this thread will write information relevant to the
// previous profiling session. Very rare, and little impact on profiling.
[[nodiscard]] ThreadProfilingFeatures ProfilingFeatures() const {
return mProfilingFeatures;
}
// Call this whenever the current thread sleeps. Calling it twice in a row
// without an intervening setAwake() call is an error.
void SetSleeping() {
MOZ_ASSERT(mSleep == AWAKE);
mSleep = SLEEPING_NOT_OBSERVED;
}
// Call this whenever the current thread wakes. Calling it twice in a row
// without an intervening setSleeping() call is an error.
void SetAwake() {
MOZ_ASSERT(mSleep != AWAKE);
mSleep = AWAKE;
#ifdef NIGHTLY_BUILD
++mWakeCount;
#endif
}
// Returns the CPU time used by the thread since the previous call to this
// method or since the thread was started if this is the first call.
uint64_t GetNewCpuTimeInNs() {
uint64_t newCpuTimeNs;
if (!GetCpuTimeSinceThreadStartInNs(&newCpuTimeNs, PlatformDataCRef())) {
newCpuTimeNs = 0;
}
uint64_t before = mThreadCpuTimeInNsAtLastSleep;
uint64_t result =
MOZ_LIKELY(newCpuTimeNs > before) ? newCpuTimeNs - before : 0;
mThreadCpuTimeInNsAtLastSleep = newCpuTimeNs;
return result;
}
#ifdef NIGHTLY_BUILD
void RecordWakeCount() const;
#endif
// This is called on every profiler restart. Put things that should happen
// at that time here.
void ReinitializeOnResume() {
// This is needed to cause an initial sample to be taken from sleeping
// threads that had been observed prior to the profiler stopping and
// restarting. Otherwise sleeping threads would not have any samples to
// copy forward while sleeping.
(void)mSleep.compareExchange(SLEEPING_OBSERVED, SLEEPING_NOT_OBSERVED);
}
// This returns true for the second and subsequent calls in each sleep
// cycle, so that the sampler can skip its full sampling and reuse the first
// asleep sample instead.
[[nodiscard]] bool CanDuplicateLastSampleDueToSleep() {
if (mSleep == AWAKE) {
return false;
}
if (mSleep.compareExchange(SLEEPING_NOT_OBSERVED, SLEEPING_OBSERVED)) {
return false;
}
return true;
}
[[nodiscard]] bool IsSleeping() const { return mSleep != AWAKE; }
protected:
ThreadRegistrationUnlockedConstReaderAndAtomicRW(const char* aName,
const void* aStackTop)
: ThreadRegistrationUnlockedConstReader(aName, aStackTop) {}
};
// Like above, with special PSAutoLock-guarded accessors.
class ThreadRegistrationUnlockedRWForLockedProfiler
: public ThreadRegistrationUnlockedConstReaderAndAtomicRW {
public:
// IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT!
// Only add functions that take a `const PSAutoLock&` proof-of-lock.
// (Because there is no other lock.)
[[nodiscard]] const ProfiledThreadData* GetProfiledThreadData(
const PSAutoLock&) const {
return mProfiledThreadData;
}
[[nodiscard]] ProfiledThreadData* GetProfiledThreadData(const PSAutoLock&) {
return mProfiledThreadData;
}
protected:
ThreadRegistrationUnlockedRWForLockedProfiler(const char* aName,
const void* aStackTop)
: ThreadRegistrationUnlockedConstReaderAndAtomicRW(aName, aStackTop) {}
};
// Reading data, unlocked from the thread, or locked otherwise.
// This data MUST only be written from the thread with lock (i.e., in
// LockedRWOnThread through RWOnThreadWithLock.)
class ThreadRegistrationUnlockedReaderAndAtomicRWOnThread
: public ThreadRegistrationUnlockedRWForLockedProfiler {
public:
// IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT!
// Non-atomic members read here MUST be written from LockedRWOnThread (to
// guarantee that they are only modified on this thread.)
[[nodiscard]] JSContext* GetJSContext() const { return mJSContext; }
protected:
ThreadRegistrationUnlockedReaderAndAtomicRWOnThread(const char* aName,
const void* aStackTop)
: ThreadRegistrationUnlockedRWForLockedProfiler(aName, aStackTop) {}
};
// Accessing locked data from the thread, or from any thread through the locked
// profiler:
// Like above, and profiler can also read&write mutex-protected members.
class ThreadRegistrationLockedRWFromAnyThread
: public ThreadRegistrationUnlockedReaderAndAtomicRWOnThread {
public:
void SetProfilingFeaturesAndData(ThreadProfilingFeatures aProfilingFeatures,
ProfiledThreadData* aProfiledThreadData,
const PSAutoLock&);
void ClearProfilingFeaturesAndData(const PSAutoLock&);
// Not null when JSContext is not null AND this thread is being profiled.
// Points at the start of JsFrameBuffer.
[[nodiscard]] JsFrame* GetJsFrameBuffer() const { return mJsFrameBuffer; }
[[nodiscard]] const nsCOMPtr<nsIEventTarget> GetEventTarget() const {
return mThread;
}
void ResetMainThread(nsIThread* aThread) { mThread = aThread; }
// aDelay is the time the event that is currently running on the thread was
// queued before starting to run (if a PrioritizedEventQueue
// (i.e. MainThread), this will be 0 for any event at a lower priority
// than Input).
// aRunning is the time the event has been running. If no event is running
// these will both be TimeDuration() (i.e. 0). Both are out params, and are
// always set. Their initial value is discarded.
void GetRunningEventDelay(const TimeStamp& aNow, TimeDuration& aDelay,
TimeDuration& aRunning) {
if (mThread) { // can be null right at the start of a process
TimeStamp start;
mThread->GetRunningEventDelay(&aDelay, &start);
if (!start.IsNull()) {
// Note: the timestamp used here will be from when we started to
// suspend and sample the thread; which is also the timestamp
// associated with the sample.
aRunning = aNow - start;
return;
}
}
aDelay = TimeDuration();
aRunning = TimeDuration();
}
// Request that this thread start JS sampling. JS sampling won't actually
// start until a subsequent PollJSSampling() call occurs *and* mContext has
// been set.
void StartJSSampling(uint32_t aJSFlags) {
// This function runs on-thread or off-thread.
MOZ_RELEASE_ASSERT(mJSSampling == INACTIVE ||
mJSSampling == INACTIVE_REQUESTED);
mJSSampling = ACTIVE_REQUESTED;
mJSFlags = aJSFlags;
}
// Request that this thread stop JS sampling. JS sampling won't actually
// stop until a subsequent PollJSSampling() call occurs.
void StopJSSampling() {
// This function runs on-thread or off-thread.
MOZ_RELEASE_ASSERT(mJSSampling == ACTIVE ||
mJSSampling == ACTIVE_REQUESTED);
mJSSampling = INACTIVE_REQUESTED;
}
protected:
ThreadRegistrationLockedRWFromAnyThread(const char* aName,
const void* aStackTop)
: ThreadRegistrationUnlockedReaderAndAtomicRWOnThread(aName, aStackTop) {}
};
// Accessing data, locked, from the thread.
// If any non-atomic data is readable from UnlockedReaderAndAtomicRWOnThread,
// it must be written from here, and not in base classes: Since this data is
// only written on the thread, it can be read from the same thread without
// lock; but writing must be locked so that other threads can safely read it,
// typically from LockedRWFromAnyThread.
class ThreadRegistrationLockedRWOnThread
: public ThreadRegistrationLockedRWFromAnyThread {
public:
void SetJSContext(JSContext* aJSContext);
void ClearJSContext();
// Poll to see if JS sampling should be started/stopped.
void PollJSSampling();
public:
ThreadRegistrationLockedRWOnThread(const char* aName, const void* aStackTop)
: ThreadRegistrationLockedRWFromAnyThread(aName, aStackTop) {}
};
} // namespace mozilla::profiler
#endif // ProfilerThreadRegistrationData_h