Backed out changeset bd63a8fecf00 (bug 1380081)

This commit is contained in:
Sebastian Hengst 2017-08-11 09:28:47 +02:00
Родитель ebcb175d14
Коммит 655e0abe18
10 изменённых файлов: 803 добавлений и 12 удалений

Просмотреть файл

@ -148,6 +148,7 @@ public:
#if defined(MOZ_GECKO_PROFILER)
static void DoStackCapture(const nsACString& aKey);
#endif
static void RecordThreadHangStats(Telemetry::ThreadHangStats&& aStats);
size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf);
struct Stat {
uint32_t hitCount;
@ -202,6 +203,10 @@ private:
KeyedStackCapturer mStackCapturer;
#endif
// mThreadHangStats stores recorded, inactive thread hang stats
Vector<Telemetry::ThreadHangStats> mThreadHangStats;
Mutex mThreadHangStatsMutex;
CombinedStacks mLateWritesStacks; // This is collected out of the main thread.
bool mCachedTelemetryData;
uint32_t mLastShutdownTime;
@ -480,6 +485,7 @@ TelemetryImpl::TelemetryImpl()
, mHangReportsMutex("Telemetry::mHangReportsMutex")
, mCanRecordBase(false)
, mCanRecordExtended(false)
, mThreadHangStatsMutex("Telemetry::mThreadHangStatsMutex")
, mCachedTelemetryData(false)
, mLastShutdownTime(0)
, mFailedLockCount(0)
@ -496,6 +502,7 @@ TelemetryImpl::~TelemetryImpl() {
// We will fix this in bug 1367344.
MutexAutoLock hashLock(mHashMutex);
MutexAutoLock hangReportsLock(mHangReportsMutex);
MutexAutoLock threadHangsLock(mThreadHangStatsMutex);
}
void
@ -1058,6 +1065,43 @@ ReadStack(const char *aFileName, Telemetry::ProcessedStack &aStack)
aStack = stack;
}
NS_IMETHODIMP
TelemetryImpl::GetThreadHangStats(JSContext* cx, JS::MutableHandle<JS::Value> ret)
{
JS::RootedObject retObj(cx, JS_NewArrayObject(cx, 0));
if (!retObj) {
return NS_ERROR_FAILURE;
}
size_t threadIndex = 0;
if (!BackgroundHangMonitor::IsDisabled()) {
/* First add active threads; we need to hold |iter| (and its lock)
throughout this method to avoid a race condition where a thread can
be recorded twice if the thread is destroyed while this method is
running */
BackgroundHangMonitor::ThreadHangStatsIterator iter;
for (Telemetry::ThreadHangStats* histogram = iter.GetNext();
histogram; histogram = iter.GetNext()) {
JS::RootedObject obj(cx, CreateJSThreadHangStats(cx, *histogram));
if (!JS_DefineElement(cx, retObj, threadIndex++, obj, JSPROP_ENUMERATE)) {
return NS_ERROR_FAILURE;
}
}
}
// Add saved threads next
MutexAutoLock autoLock(mThreadHangStatsMutex);
for (auto & stat : mThreadHangStats) {
JS::RootedObject obj(cx,
CreateJSThreadHangStats(cx, stat));
if (!JS_DefineElement(cx, retObj, threadIndex++, obj, JSPROP_ENUMERATE)) {
return NS_ERROR_FAILURE;
}
}
ret.setObject(*retObj);
return NS_OK;
}
void
TelemetryImpl::ReadLateWritesStacks(nsIFile* aProfileDir)
{
@ -1586,6 +1630,18 @@ TelemetryImpl::CaptureStack(const nsACString& aKey) {
return NS_OK;
}
void
TelemetryImpl::RecordThreadHangStats(Telemetry::ThreadHangStats&& aStats)
{
if (!sTelemetry || !TelemetryHistogram::CanRecordExtended())
return;
MutexAutoLock autoLock(sTelemetry->mThreadHangStatsMutex);
// Ignore OOM.
mozilla::Unused << sTelemetry->mThreadHangStats.append(Move(aStats));
}
bool
TelemetryImpl::CanRecordBase()
{
@ -1803,6 +1859,10 @@ TelemetryImpl::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
MutexAutoLock lock(mHangReportsMutex);
n += mHangReports.SizeOfExcludingThis(aMallocSizeOf);
}
{ // Scope for mThreadHangStatsMutex lock
MutexAutoLock lock(mThreadHangStatsMutex);
n += mThreadHangStats.sizeOfExcludingThis(aMallocSizeOf);
}
// It's a bit gross that we measure this other stuff that lives outside of
// TelemetryImpl... oh well.
@ -2029,6 +2089,12 @@ void CaptureStack(const nsACString& aKey)
}
#endif
void RecordThreadHangStats(ThreadHangStats&& aStats)
{
TelemetryImpl::RecordThreadHangStats(Move(aStats));
}
void
WriteFailedProfileLock(nsIFile* aProfileDir)
{

Просмотреть файл

@ -341,6 +341,20 @@ void RecordChromeHang(uint32_t aDuration,
void CaptureStack(const nsCString& aKey);
#endif
class ThreadHangStats;
/**
* Move a ThreadHangStats to Telemetry storage. Normally Telemetry queries
* for active ThreadHangStats through BackgroundHangMonitor, but once a
* thread exits, the thread's copy of ThreadHangStats needs to be moved to
* inside Telemetry using this function.
*
* @param aStats ThreadHangStats to save; the data inside aStats
* will be moved and aStats should be treated as
* invalid after this function returns
*/
void RecordThreadHangStats(ThreadHangStats&& aStats);
/**
* Record a failed attempt at locking the user's profile.
*

Просмотреть файл

@ -977,6 +977,16 @@ var Impl = {
return snapshot;
},
getThreadHangStats: function getThreadHangStats(stats) {
stats.forEach((thread) => {
thread.activity = this.packHistogram(thread.activity);
thread.hangs.forEach((hang) => {
hang.histogram = this.packHistogram(hang.histogram);
});
});
return stats;
},
/**
* Descriptive metadata
*
@ -1230,6 +1240,7 @@ var Impl = {
// Add extended set measurements common to chrome & content processes
if (Telemetry.canRecordExtended) {
payloadObj.chromeHangs = protect(() => Telemetry.chromeHangs);
payloadObj.threadHangStats = protect(() => this.getThreadHangStats(Telemetry.threadHangStats));
payloadObj.log = protect(() => TelemetryLog.entries());
payloadObj.webrtc = protect(() => Telemetry.webrtcStats);
}

Просмотреть файл

@ -9,9 +9,242 @@
#include "HangReports.h"
#include "jsapi.h"
namespace {
using namespace mozilla;
using namespace mozilla::HangMonitor;
using namespace mozilla::Telemetry;
static JSObject*
CreateJSTimeHistogram(JSContext* cx, const Telemetry::TimeHistogram& time)
{
/* Create JS representation of TimeHistogram,
in the format of Chromium-style histograms. */
JS::RootedObject ret(cx, JS_NewPlainObject(cx));
if (!ret) {
return nullptr;
}
if (!JS_DefineProperty(cx, ret, "min", time.GetBucketMin(0),
JSPROP_ENUMERATE) ||
!JS_DefineProperty(cx, ret, "max",
time.GetBucketMax(ArrayLength(time) - 1),
JSPROP_ENUMERATE) ||
!JS_DefineProperty(cx, ret, "histogram_type",
nsITelemetry::HISTOGRAM_EXPONENTIAL,
JSPROP_ENUMERATE)) {
return nullptr;
}
// TODO: calculate "sum"
if (!JS_DefineProperty(cx, ret, "sum", 0, JSPROP_ENUMERATE)) {
return nullptr;
}
JS::RootedObject ranges(
cx, JS_NewArrayObject(cx, ArrayLength(time) + 1));
JS::RootedObject counts(
cx, JS_NewArrayObject(cx, ArrayLength(time) + 1));
if (!ranges || !counts) {
return nullptr;
}
/* In a Chromium-style histogram, the first bucket is an "under" bucket
that represents all values below the histogram's range. */
if (!JS_DefineElement(cx, ranges, 0, time.GetBucketMin(0), JSPROP_ENUMERATE) ||
!JS_DefineElement(cx, counts, 0, 0, JSPROP_ENUMERATE)) {
return nullptr;
}
for (size_t i = 0; i < ArrayLength(time); i++) {
if (!JS_DefineElement(cx, ranges, i + 1, time.GetBucketMax(i),
JSPROP_ENUMERATE) ||
!JS_DefineElement(cx, counts, i + 1, time[i], JSPROP_ENUMERATE)) {
return nullptr;
}
}
if (!JS_DefineProperty(cx, ret, "ranges", ranges, JSPROP_ENUMERATE) ||
!JS_DefineProperty(cx, ret, "counts", counts, JSPROP_ENUMERATE)) {
return nullptr;
}
return ret;
}
static JSObject*
CreateJSHangStack(JSContext* cx, const Telemetry::HangStack& stack)
{
JS::RootedObject ret(cx, JS_NewArrayObject(cx, stack.length()));
if (!ret) {
return nullptr;
}
for (size_t i = 0; i < stack.length(); i++) {
JS::RootedString string(cx, JS_NewStringCopyZ(cx, stack[i]));
if (!JS_DefineElement(cx, ret, i, string, JSPROP_ENUMERATE)) {
return nullptr;
}
}
return ret;
}
static void
CreateJSHangAnnotations(JSContext* cx, const HangAnnotationsVector& annotations,
JS::MutableHandleObject returnedObject)
{
JS::RootedObject annotationsArray(cx, JS_NewArrayObject(cx, 0));
if (!annotationsArray) {
returnedObject.set(nullptr);
return;
}
// We keep track of the annotations we reported in this hash set, so we can
// discard duplicated ones.
nsTHashtable<nsStringHashKey> reportedAnnotations;
size_t annotationIndex = 0;
for (const auto & curAnnotations : annotations) {
JS::RootedObject jsAnnotation(cx, JS_NewPlainObject(cx));
if (!jsAnnotation) {
continue;
}
// Build a key to index the current annotations in our hash set.
nsAutoString annotationsKey;
nsresult rv = ComputeAnnotationsKey(curAnnotations, annotationsKey);
if (NS_FAILED(rv)) {
continue;
}
// Check if the annotations are in the set. If that's the case, don't double report.
if (reportedAnnotations.GetEntry(annotationsKey)) {
continue;
}
// If not, report them.
reportedAnnotations.PutEntry(annotationsKey);
UniquePtr<HangAnnotations::Enumerator> annotationsEnum =
curAnnotations->GetEnumerator();
if (!annotationsEnum) {
continue;
}
nsAutoString key;
nsAutoString value;
while (annotationsEnum->Next(key, value)) {
JS::RootedValue jsValue(cx);
jsValue.setString(JS_NewUCStringCopyN(cx, value.get(), value.Length()));
if (!JS_DefineUCProperty(cx, jsAnnotation, key.get(), key.Length(),
jsValue, JSPROP_ENUMERATE)) {
returnedObject.set(nullptr);
return;
}
}
if (!JS_SetElement(cx, annotationsArray, annotationIndex, jsAnnotation)) {
continue;
}
++annotationIndex;
}
// Return the array using a |MutableHandleObject| to avoid triggering a false
// positive rooting issue in the hazard analysis build.
returnedObject.set(annotationsArray);
}
static JSObject*
CreateJSHangHistogram(JSContext* cx, const Telemetry::HangHistogram& hang)
{
JS::RootedObject ret(cx, JS_NewPlainObject(cx));
if (!ret) {
return nullptr;
}
JS::RootedObject stack(cx, CreateJSHangStack(cx, hang.GetStack()));
JS::RootedObject time(cx, CreateJSTimeHistogram(cx, hang));
auto& hangAnnotations = hang.GetAnnotations();
JS::RootedObject annotations(cx);
CreateJSHangAnnotations(cx, hangAnnotations, &annotations);
if (!stack ||
!time ||
!annotations ||
!JS_DefineProperty(cx, ret, "stack", stack, JSPROP_ENUMERATE) ||
!JS_DefineProperty(cx, ret, "histogram", time, JSPROP_ENUMERATE) ||
(!hangAnnotations.empty() && // <-- Only define annotations when nonempty
!JS_DefineProperty(cx, ret, "annotations", annotations, JSPROP_ENUMERATE))) {
return nullptr;
}
return ret;
}
} // namespace
namespace mozilla {
namespace Telemetry {
JSObject*
CreateJSThreadHangStats(JSContext* cx, const Telemetry::ThreadHangStats& thread)
{
JS::RootedObject ret(cx, JS_NewPlainObject(cx));
if (!ret) {
return nullptr;
}
JS::RootedString name(cx, JS_NewStringCopyZ(cx, thread.GetName()));
if (!name ||
!JS_DefineProperty(cx, ret, "name", name, JSPROP_ENUMERATE)) {
return nullptr;
}
JS::RootedObject activity(cx, CreateJSTimeHistogram(cx, thread.mActivity));
if (!activity ||
!JS_DefineProperty(cx, ret, "activity", activity, JSPROP_ENUMERATE)) {
return nullptr;
}
// Process the hangs into a hangs object.
JS::RootedObject hangs(cx, JS_NewArrayObject(cx, 0));
if (!hangs) {
return nullptr;
}
for (size_t i = 0; i < thread.mHangs.length(); i++) {
JS::RootedObject obj(cx, CreateJSHangHistogram(cx, thread.mHangs[i]));
if (!ret) {
return nullptr;
}
JS::RootedString runnableName(cx, JS_NewStringCopyZ(cx, thread.mHangs[i].GetRunnableName()));
if (!runnableName ||
!JS_DefineProperty(cx, ret, "runnableName", runnableName, JSPROP_ENUMERATE)) {
return nullptr;
}
// Check if we have a cached native stack index, and if we do record it.
uint32_t index = thread.mHangs[i].GetNativeStackIndex();
if (index != Telemetry::HangHistogram::NO_NATIVE_STACK_INDEX) {
if (!JS_DefineProperty(cx, obj, "nativeStack", index, JSPROP_ENUMERATE)) {
return nullptr;
}
}
if (!JS_DefineElement(cx, hangs, i, obj, JSPROP_ENUMERATE)) {
return nullptr;
}
}
if (!JS_DefineProperty(cx, ret, "hangs", hangs, JSPROP_ENUMERATE)) {
return nullptr;
}
// We should already have a CombinedStacks object on the ThreadHangStats, so
// add that one.
JS::RootedObject fullReportObj(cx, CreateJSStackObject(cx, thread.mCombinedStacks));
if (!fullReportObj) {
return nullptr;
}
if (!JS_DefineProperty(cx, ret, "nativeStacks", fullReportObj, JSPROP_ENUMERATE)) {
return nullptr;
}
return ret;
}
void
TimeHistogram::Add(PRIntervalTime aTime)
{
uint32_t timeMs = PR_IntervalToMilliseconds(aTime);
size_t index = mozilla::FloorLog2(timeMs);
operator[](index)++;
}
const char*
HangStack::InfallibleAppendViaBuffer(const char* aText, size_t aLength)
{
@ -55,5 +288,35 @@ HangStack::AppendViaBuffer(const char* aText, size_t aLength)
return InfallibleAppendViaBuffer(aText, aLength);
}
uint32_t
HangHistogram::GetHash(const HangStack& aStack)
{
uint32_t hash = 0;
for (const char* const* label = aStack.begin();
label != aStack.end(); label++) {
/* If the string is within our buffer, we need to hash its content.
Otherwise, the string is statically allocated, and we only need
to hash the pointer instead of the content. */
if (aStack.IsInBuffer(*label)) {
hash = AddToHash(hash, HashString(*label));
} else {
hash = AddToHash(hash, *label);
}
}
return hash;
}
bool
HangHistogram::operator==(const HangHistogram& aOther) const
{
if (mHash != aOther.mHash) {
return false;
}
if (mStack.length() != aOther.mStack.length()) {
return false;
}
return mStack == aOther.mStack;
}
} // namespace Telemetry
} // namespace mozilla

Просмотреть файл

@ -28,6 +28,31 @@ namespace Telemetry {
// ping size.
static const uint32_t kMaximumNativeHangStacks = 300;
static const size_t kTimeHistogramBuckets = 8 * sizeof(PRIntervalTime);
/* TimeHistogram is an efficient histogram that puts time durations into
exponential (base 2) buckets; times are accepted in PRIntervalTime and
stored in milliseconds. */
class TimeHistogram : public mozilla::Array<uint32_t, kTimeHistogramBuckets>
{
public:
TimeHistogram()
{
mozilla::PodArrayZero(*this);
}
// Get minimum (inclusive) range of bucket in milliseconds
uint32_t GetBucketMin(size_t aBucket) const {
MOZ_ASSERT(aBucket < ArrayLength(*this));
return (1u << aBucket) & ~1u; // Bucket 0 starts at 0, not 1
}
// Get maximum (inclusive) range of bucket in milliseconds
uint32_t GetBucketMax(size_t aBucket) const {
MOZ_ASSERT(aBucket < ArrayLength(*this));
return (1u << (aBucket + 1u)) - 1u;
}
void Add(PRIntervalTime aTime);
};
/* A native stack is a simple list of pointers, so rather than building a
wrapper type, we typdef the type here. */
typedef std::vector<uintptr_t> NativeHangStack;
@ -135,6 +160,129 @@ public:
const char* AppendViaBuffer(const char* aText, size_t aLength);
};
/* A hang histogram consists of a stack associated with the
hang, along with a time histogram of the hang times. */
class HangHistogram : public TimeHistogram
{
public:
// Value used for mNativeStackIndex to represent the absence of a cached
// native stack.
static const uint32_t NO_NATIVE_STACK_INDEX = UINT32_MAX;
private:
static uint32_t GetHash(const HangStack& aStack);
HangStack mStack;
// Cached index of the native stack in the mCombinedStacks list in the owning
// ThreadHangStats object. A default value of NO_NATIVE_STACK_INDEX means that
// the ThreadHangStats object which owns this HangHistogram doesn't have a
// cached CombinedStacks with this HangHistogram in it.
uint32_t mNativeStackIndex;
// Use a hash to speed comparisons
const uint32_t mHash;
// Annotations attributed to this stack
HangMonitor::HangAnnotationsVector mAnnotations;
// The name of the runnable on the current thread.
nsCString mRunnableName;
public:
explicit HangHistogram(HangStack&& aStack, const nsACString& aRunnableName)
: mStack(mozilla::Move(aStack))
, mNativeStackIndex(NO_NATIVE_STACK_INDEX)
, mHash(GetHash(mStack))
, mRunnableName(aRunnableName)
{
}
HangHistogram(HangHistogram&& aOther)
: TimeHistogram(mozilla::Move(aOther))
, mStack(mozilla::Move(aOther.mStack))
, mNativeStackIndex(mozilla::Move(aOther.mNativeStackIndex))
, mHash(mozilla::Move(aOther.mHash))
, mAnnotations(mozilla::Move(aOther.mAnnotations))
, mRunnableName(aOther.mRunnableName)
{
}
bool operator==(const HangHistogram& aOther) const;
bool operator!=(const HangHistogram& aOther) const
{
return !operator==(aOther);
}
const HangStack& GetStack() const {
return mStack;
}
uint32_t GetNativeStackIndex() const {
return mNativeStackIndex;
}
void SetNativeStackIndex(uint32_t aIndex) {
MOZ_ASSERT(aIndex != NO_NATIVE_STACK_INDEX);
mNativeStackIndex = aIndex;
}
const char* GetRunnableName() const {
return mRunnableName.get();
}
const HangMonitor::HangAnnotationsVector& GetAnnotations() const {
return mAnnotations;
}
void Add(PRIntervalTime aTime, HangMonitor::HangAnnotationsPtr aAnnotations) {
TimeHistogram::Add(aTime);
if (aAnnotations) {
if (!mAnnotations.append(Move(aAnnotations))) {
MOZ_CRASH();
}
}
}
};
/* Thread hang stats consist of
- thread name
- time histogram of all task run times
- hang histograms of individual hangs
- annotations for each hang
- combined native stacks for all hangs
*/
class ThreadHangStats
{
private:
nsCString mName;
public:
TimeHistogram mActivity;
mozilla::Vector<HangHistogram, 4> mHangs;
uint32_t mNativeStackCnt;
CombinedStacks mCombinedStacks;
explicit ThreadHangStats(const char* aName)
: mName(aName)
, mNativeStackCnt(0)
, mCombinedStacks(Telemetry::kMaximumNativeHangStacks)
{
}
ThreadHangStats(ThreadHangStats&& aOther)
: mName(mozilla::Move(aOther.mName))
, mActivity(mozilla::Move(aOther.mActivity))
, mHangs(mozilla::Move(aOther.mHangs))
, mNativeStackCnt(aOther.mNativeStackCnt)
, mCombinedStacks(mozilla::Move(aOther.mCombinedStacks))
{
aOther.mNativeStackCnt = 0;
}
const char* GetName() const {
return mName.get();
}
};
/**
* Reflects thread hang stats object as a JS object.
*
* @param JSContext* cx javascript context.
* @param JSContext* cx thread hang statistics.
*
* @return JSObject* Javascript reflection of the statistics.
*/
JSObject*
CreateJSThreadHangStats(JSContext* cx, const Telemetry::ThreadHangStats& thread);
} // namespace Telemetry
} // namespace mozilla

Просмотреть файл

@ -63,6 +63,7 @@ Structure:
histograms: {...},
keyedHistograms: {...},
chromeHangs: {...},
threadHangStats: [...],
capturedStacks: {...},
log: [...],
webrtc: {...},
@ -247,6 +248,66 @@ This section contains the keyed histograms available for the current platform.
As of Firefox 48, this section does not contain empty keyed histograms anymore.
threadHangStats
---------------
Contains the statistics about the hangs in main and background threads. Note that hangs in this section capture the `C++ pseudostack <https://developer.mozilla.org/en-US/docs/Mozilla/Performance/Profiling_with_the_Built-in_Profiler#Native_stack_vs._Pseudo_stack>`_ and an incomplete JS stack, which is not 100% precise. For particularly egregious hangs, and on nightly, an unsymbolicated native stack is also captured. The amount of time that is considered "egregious" is different from thread to thread, and is set when the BackgroundHangMonitor is constructed for that thread. In general though, hangs from 5 - 10 seconds are generally considered egregious. Shorter hangs (1 - 2s) are considered egregious for other threads (the compositor thread, and the hang monitor that is only enabled during tab switch).
To avoid submitting overly large payloads, some limits are applied:
* Identical, adjacent "(chrome script)" or "(content script)" stack entries are collapsed together. If a stack is reduced, the "(reduced stack)" frame marker is added as the oldest frame.
* The depth of the reported pseudostacks is limited to 11 entries. This value represents the 99.9th percentile of the thread hangs stack depths reported by Telemetry.
* The native stacks are limited to a depth of 25 stack frames.
Structure:
.. code-block:: js
"threadHangStats" : [
{
"name" : "Gecko",
"activity" : {...}, // a time histogram of all task run times
"nativeStacks": { // captured for all hangs on nightly, or egregious hangs on beta
"memoryMap": [
["wgdi32.pdb", "08A541B5942242BDB4AEABD8C87E4CFF2"],
["igd10iumd32.pdb", "D36DEBF2E78149B5BE1856B772F1C3991"],
// ... other entries in the format ["module name", "breakpad identifier"] ...
],
"stacks": [
[
[
0, // the module index or -1 for invalid module indices
190649 // the offset of this program counter in its module or an absolute pc
],
[1, 2540075],
// ... other frames ...
],
// ... other stacks ...
]
},
"hangs" : [
{
"stack" : [
"Startup::XRE_Main",
"Timer::Fire",
"(content script)",
"IPDL::PPluginScriptableObject::SendGetChildProperty",
... up to 11 frames ...
],
"nativeStack": 0, // index into nativeStacks.stacks array
"histogram" : {...}, // the time histogram of the hang times
"annotations" : [
{
"pluginName" : "Shockwave Flash",
"pluginVersion" : "18.0.0.209"
},
... other annotations ...
]
},
],
},
... other threads ...
]
capturedStacks
--------------
Contains information about stacks captured on demand via Telemetry API. For more

Просмотреть файл

@ -202,6 +202,23 @@ interface nsITelemetry : nsISupports
[implicit_jscontext]
nsISupports getLoadedModules();
/*
* An array of thread hang stats,
* [<thread>, <thread>, ...]
* <thread> represents a single thread,
* {"name": "<name>",
* "activity": <time>,
* "hangs": [<hang>, <hang>, ...]}
* <time> represents a histogram of time intervals in milliseconds,
* with the same format as histogramSnapshots
* <hang> represents a particular hang,
* {"stack": <stack>, "nativeStack": <stack>, "histogram": <time>}
* <stack> represents the hang's stack,
* ["<frame_0>", "<frame_1>", ...]
*/
[implicit_jscontext]
readonly attribute jsval threadHangStats;
/*
* An object with two fields: memoryMap and stacks.
* * memoryMap is a list of loaded libraries.

Просмотреть файл

@ -1980,7 +1980,7 @@ add_task(async function test_schedulerNothingDue() {
add_task(async function test_pingExtendedStats() {
const EXTENDED_PAYLOAD_FIELDS = [
"chromeHangs", "log", "slowSQL", "fileIOReports", "lateWrites",
"chromeHangs", "threadHangStats", "log", "slowSQL", "fileIOReports", "lateWrites",
"addonDetails", "webrtc"
];

Просмотреть файл

@ -49,6 +49,8 @@ bool StackScriptEntriesCollapser(const char* aStackEntry, const char *aAnotherSt
namespace mozilla {
class ProcessHangRunnable;
/**
* BackgroundHangManager is the global object that
* manages all instances of BackgroundHangThread.
@ -189,16 +191,17 @@ public:
Telemetry::HangStack mHangStack;
// Native stack of current hang
Telemetry::NativeHangStack mNativeHangStack;
// Statistics for telemetry
Telemetry::ThreadHangStats mStats;
// Annotations for the current hang
UniquePtr<HangMonitor::HangAnnotations> mAnnotations;
// Annotators registered for this thread
HangMonitor::Observer::Annotators mAnnotators;
// List of runnables which can hold a reference to us which need to be
// canceled before we can go away.
LinkedList<RefPtr<ProcessHangRunnable>> mProcessHangRunnables;
// The name of the runnable which is hanging the current process
nsCString mRunnableName;
// The name of the thread which is being monitored
nsCString mThreadName;
// The number of native stacks which have been collected so far.
uint32_t mNativeStackCnt;
BackgroundHangThread(const char* aName,
uint32_t aTimeoutMs,
@ -370,10 +373,10 @@ BackgroundHangManager::RunMonitorThread()
if (MOZ_UNLIKELY(hangTime >= currentThread->mTimeout)) {
// A hang started
#ifdef NIGHTLY_BUILD
if (currentThread->mNativeStackCnt < Telemetry::kMaximumNativeHangStacks) {
if (currentThread->mStats.mNativeStackCnt < Telemetry::kMaximumNativeHangStacks) {
// NOTE: In nightly builds of firefox we want to collect native stacks
// for all hangs, not just permahangs.
currentThread->mNativeStackCnt += 1;
currentThread->mStats.mNativeStackCnt += 1;
currentThread->mStackHelper.GetPseudoAndNativeStack(
currentThread->mHangStack,
currentThread->mNativeHangStack,
@ -443,8 +446,7 @@ BackgroundHangThread::BackgroundHangThread(const char* aName,
, mHanging(false)
, mWaiting(true)
, mThreadType(aThreadType)
, mThreadName(aName)
, mNativeStackCnt(0)
, mStats(aName)
{
if (sTlsKeyInitialized && IsShared()) {
sTlsKey.set(this);
@ -457,6 +459,94 @@ BackgroundHangThread::BackgroundHangThread(const char* aName,
autoLock.Notify();
}
// This runnable is used to pre-process a hang, performing any expensive
// operations on it, before submitting it into the BackgroundHangThread object
// for Telemetry.
//
// If this object is canceled, it will submit its payload to the
// BackgroundHangThread without performing the processing.
class ProcessHangRunnable final
: public CancelableRunnable
, public LinkedListElement<RefPtr<ProcessHangRunnable>>
{
public:
ProcessHangRunnable(BackgroundHangManager* aManager,
BackgroundHangThread* aThread,
Telemetry::HangHistogram&& aHistogram,
Telemetry::NativeHangStack&& aNativeStack)
: CancelableRunnable("ProcessHangRunnable")
, mManager(aManager)
, mNativeStack(mozilla::Move(aNativeStack))
, mThread(aThread)
, mHistogram(mozilla::Move(aHistogram))
{
MOZ_ASSERT(mThread);
}
NS_IMETHOD
Run() override
{
// Start processing this histogram's native hang stack before we try to lock
// anything, as we can do this without any locks held. This is the expensive
// part of the operation.
Telemetry::ProcessedStack processed;
if (!mNativeStack.empty()) {
processed = Telemetry::GetStackAndModules(mNativeStack);
}
// Lock the manager's lock, so that we can take a look at our mThread
{
MonitorAutoLock autoLock(mManager->mLock);
if (NS_WARN_IF(!mThread)) {
return NS_OK;
}
// If we have a stack, check if we can add it to combined stacks. This is
// a relatively cheap operation, and must occur with the lock held.
if (!mNativeStack.empty() &&
mThread->mStats.mCombinedStacks.GetStackCount() < Telemetry::kMaximumNativeHangStacks) {
mHistogram.SetNativeStackIndex(mThread->mStats.mCombinedStacks.AddStack(processed));
}
// Submit, remove ourselves from the list, and clear out mThread so we
// don't run again.
MOZ_ALWAYS_TRUE(mThread->mStats.mHangs.append(Move(mHistogram)));
remove();
mThread = nullptr;
}
return NS_OK;
}
// Submits hang, and removes from list.
nsresult
Cancel() override
{
mManager->mLock.AssertCurrentThreadOwns();
if (NS_WARN_IF(!mThread)) {
return NS_OK;
}
// Submit, remove ourselves from the list, and clear out mThread so we
// don't run again.
MOZ_ALWAYS_TRUE(mThread->mStats.mHangs.append(Move(mHistogram)));
if (isInList()) {
remove();
}
mThread = nullptr;
return NS_OK;
}
private:
// These variables are constant after initialization, and do not need
// synchronization.
RefPtr<BackgroundHangManager> mManager;
const Telemetry::NativeHangStack mNativeStack;
// These variables are guarded by mManager->mLock.
BackgroundHangThread* MOZ_NON_OWNING_REF mThread; // Will Cancel us before it dies
Telemetry::HangHistogram mHistogram;
};
BackgroundHangThread::~BackgroundHangThread()
{
// Lock here because LinkedList is not thread-safe
@ -470,6 +560,16 @@ BackgroundHangThread::~BackgroundHangThread()
if (sTlsKeyInitialized && IsShared()) {
sTlsKey.set(nullptr);
}
// Cancel any remaining process hang runnables, as they hold a weak reference
// into our mStats variable, which we're about to move.
while (RefPtr<ProcessHangRunnable> runnable = mProcessHangRunnables.popFirst()) {
runnable->Cancel();
}
// Record the ThreadHangStats for this thread before we go away. All stats
// should be in this method now, as we canceled any pending runnables.
Telemetry::RecordThreadHangStats(Move(mStats));
}
void
@ -502,12 +602,20 @@ BackgroundHangThread::ReportHang(PRIntervalTime aHangTime)
mHangStack.erase(mHangStack.begin() + 1, mHangStack.begin() + elementsToRemove);
}
// XXX: HangDetails will be expanded to contain all of the relevant
// information and handle reporting a custom ping to telemetry.
Telemetry::HangHistogram newHistogram(Move(mHangStack), mRunnableName);
for (Telemetry::HangHistogram* oldHistogram = mStats.mHangs.begin();
oldHistogram != mStats.mHangs.end(); oldHistogram++) {
if (newHistogram == *oldHistogram) {
// New histogram matches old one
oldHistogram->Add(aHangTime, Move(mAnnotations));
return;
}
}
newHistogram.Add(aHangTime, Move(mAnnotations));
// Notify any observers of the "bhr-thread-hang" topic that a thread has hung.
nsCString name;
name.Assign(mThreadName);
name.AssignASCII(mStats.GetName());
nsCOMPtr<nsIRunnable> runnable = NS_NewRunnableFunction("NotifyBHRHangObservers", [=] {
nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
if (os) {
@ -516,6 +624,39 @@ BackgroundHangThread::ReportHang(PRIntervalTime aHangTime)
os->NotifyObservers(hangDetails, "bhr-thread-hang", nullptr);
}
});
if (SystemGroup::Initialized()) {
// XXX(HACK): This is really sketchy. We need to keep a reference to the
// runnable in case the dispatch fails. If it fails, the already_AddRefed
// runnable which we passed in has been leaked, and we need to free it
// ourselves. The only time when this should fail is if we're shutting down.
//
// Most components just avoid dispatching runnables during shutdown, but BHR
// is not shut down until way too late, so we cannot do that. Instead, we
// just detect that the dispatch failed and manually unleak the leaked
// nsIRunnable in that situation.
nsresult rv = SystemGroup::Dispatch(TaskCategory::Other,
do_AddRef(runnable.get()));
if (NS_FAILED(rv)) {
// NOTE: We go through `get()` here in order to avoid the
// MOZ_NO_ADDREF_RELEASE_ON_RETURN static analysis.
nsrefcnt refcnt = runnable.get()->Release();
MOZ_RELEASE_ASSERT(refcnt == 1, "runnable should have had 1 reference leaked");
}
}
// Process the hang off-main thread. We record a reference to the runnable in
// mProcessHangRunnables so we can abort this preprocessing and just submit
// the message if the processing takes too long and our thread is going away.
RefPtr<ProcessHangRunnable> processHang =
new ProcessHangRunnable(mManager, this, Move(newHistogram), Move(mNativeHangStack));
mProcessHangRunnables.insertFront(processHang);
// Try to dispatch the runnable to the StreamTransportService threadpool. If
// we fail, cancel our runnable.
if (!mManager->mSTS || NS_FAILED(mManager->mSTS->Dispatch(processHang.forget()))) {
RefPtr<ProcessHangRunnable> runnable = mProcessHangRunnables.popFirst();
runnable->Cancel();
}
}
void
@ -545,6 +686,7 @@ BackgroundHangThread::Update()
mManager->Wakeup();
} else {
PRIntervalTime duration = intervalNow - mInterval;
mStats.mActivity.Add(duration);
if (MOZ_UNLIKELY(duration >= mTimeout)) {
/* Wake up the manager thread to tell it that a hang ended */
mManager->Wakeup();
@ -755,6 +897,33 @@ BackgroundHangMonitor::UnregisterAnnotator(HangMonitor::Annotator& aAnnotator)
#endif
}
/* Because we are iterating through the BackgroundHangThread linked list,
we need to take a lock. Using MonitorAutoLock as a base class makes
sure all of that is taken care of for us. */
BackgroundHangMonitor::ThreadHangStatsIterator::ThreadHangStatsIterator()
: MonitorAutoLock(BackgroundHangManager::sInstance->mLock)
, mThread(BackgroundHangManager::sInstance ?
BackgroundHangManager::sInstance->mHangThreads.getFirst() :
nullptr)
{
#ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
MOZ_ASSERT(BackgroundHangManager::sInstance ||
BackgroundHangManager::sDisabled,
"Inconsistent state");
#endif
}
Telemetry::ThreadHangStats*
BackgroundHangMonitor::ThreadHangStatsIterator::GetNext()
{
if (!mThread) {
return nullptr;
}
Telemetry::ThreadHangStats* stats = &mThread->mStats;
mThread = mThread->getNext();
return stats;
}
NS_IMETHODIMP
HangDetails::GetDuration(uint32_t* aDuration)
{

Просмотреть файл

@ -17,6 +17,10 @@
namespace mozilla {
namespace Telemetry {
class ThreadHangStats;
} // namespace Telemetry
class BackgroundHangThread;
class BackgroundHangManager;
@ -123,6 +127,44 @@ public:
THREAD_PRIVATE
};
/**
* ThreadHangStatsIterator is used to iterate through the ThreadHangStats
* associated with each active monitored thread. Because of an internal
* lock while this object is alive, a thread must use only one instance
* of this class at a time and must iterate through the list as fast as
* possible. The following example shows using the iterator:
*
* {
* // Scope the iter variable so it's destroyed as soon as we're done
* BackgroundHangMonitor::ThreadHangStatsIterator iter;
* for (ThreadHangStats* histogram = iter.GetNext();
* histogram; histogram = iter.GetNext()) {
* // Process histogram
* }
* }
*/
class ThreadHangStatsIterator : public MonitorAutoLock
{
private:
BackgroundHangThread* mThread;
ThreadHangStatsIterator(const ThreadHangStatsIterator&);
ThreadHangStatsIterator& operator=(const ThreadHangStatsIterator&);
public:
/**
* Create an ThreadHangStatsIterator instance and take the internal lock.
* Internal lock is released on destruction.
*/
ThreadHangStatsIterator();
/**
* Get the next item in the list; the first call returns the first item.
* Returns nullptr at the end of the list.
*/
Telemetry::ThreadHangStats* GetNext();
};
/**
* Enable hang monitoring.
* Must return before using BackgroundHangMonitor.