Bug 1225851: Capturing keyed call stacks on demand inside Telemetry. r=chutten,r=gfritzsche

MozReview-Commit-ID: XSs5MeQ1Bs
This commit is contained in:
Iaroslav (yarik) Sheptykin 2016-11-17 20:52:53 +01:00
Родитель 7a1645f45d
Коммит 6cb6fc4177
10 изменённых файлов: 585 добавлений и 1 удалений

Просмотреть файл

@ -75,6 +75,8 @@
#include "mozilla/HangMonitor.h"
#if defined(MOZ_ENABLE_PROFILER_SPS)
#include "shared-libraries.h"
#include "mozilla/StackWalk.h"
#include "nsPrintfCString.h"
#endif
namespace {
@ -100,6 +102,9 @@ public:
size_t AddStack(const Telemetry::ProcessedStack& aStack);
size_t GetStackCount() const;
size_t SizeOfExcludingThis() const;
/** Clears the contents of vectors and resets the index. */
void Clear();
private:
std::vector<Telemetry::ProcessedStack::Module> mModules;
// A circular buffer to hold the stacks.
@ -208,6 +213,13 @@ ComputeAnnotationsKey(const HangAnnotationsPtr& aAnnotations, nsAString& aKeyOut
return NS_OK;
}
void
CombinedStacks::Clear() {
mNextIndex = 0;
mStacks.clear();
mModules.clear();
}
class HangReports {
public:
/**
@ -385,6 +397,191 @@ HangReports::GetAnnotationInfo() const {
return mAnnotationInfo;
}
#if defined(MOZ_ENABLE_PROFILER_SPS)
const uint8_t kMaxKeyLength = 50;
/**
* Checks if a single character of the key string is valid.
*
* @param aChar a character to validate.
* @return True, if the char is valid, False - otherwise.
*/
bool
IsKeyCharValid(const char aChar)
{
return (aChar >= 'A' && aChar <= 'Z')
|| (aChar >= 'a' && aChar <= 'z')
|| (aChar >= '0' && aChar <= '9')
|| aChar == '-';
}
/**
* Checks if a given string is a valid telemetry key.
*
* @param aKey is the key string.
* @return True, if the key is valid, False - otherwise.
*/
bool
IsKeyValid(const nsACString& aKey)
{
// Check key length.
if (aKey.Length() > kMaxKeyLength) {
return false;
}
// Check key characters.
const char* cur = aKey.BeginReading();
const char* end = aKey.EndReading();
for (; cur < end; ++cur) {
if (!IsKeyCharValid(*cur)) {
return false;
}
}
return true;
}
/**
* Allows taking a snapshot of a call stack on demand. Captured stacks are
* indexed by a string key in a hash table. The stack is only captured Once
* for each key. Consequent captures with the same key result in incrementing
* capture counter without re-capturing the stack.
*/
class KeyedStackCapturer {
public:
KeyedStackCapturer();
void Capture(const nsACString& aKey);
NS_IMETHODIMP ReflectCapturedStacks(JSContext *cx, JS::MutableHandle<JS::Value> ret);
/**
* Resets captured stacks and the information related to them.
*/
void Clear();
private:
/**
* Describes how often a stack was captured.
*/
struct StackFrequencyInfo {
// A number of times the stack was captured.
uint32_t mCount;
// Index of the stack inside stacks array.
uint32_t mIndex;
StackFrequencyInfo(uint32_t aCount, uint32_t aIndex)
: mCount(aCount)
, mIndex(aIndex)
{}
};
typedef nsClassHashtable<nsCStringHashKey, StackFrequencyInfo> FrequencyInfoMapType;
FrequencyInfoMapType mStackInfos;
CombinedStacks mStacks;
Mutex mStackCapturerMutex;
};
KeyedStackCapturer::KeyedStackCapturer()
: mStackCapturerMutex("Telemetry::StackCapturerMutex")
{}
void KeyedStackCapturer::Capture(const nsACString& aKey) {
// Check if the key is ok.
if (!IsKeyValid(aKey)) {
NS_WARNING(nsPrintfCString(
"Invalid key is used to capture stack in telemetry: '%s'",
PromiseFlatCString(aKey).get()
).get());
return;
}
// Trying to find and update the stack information.
StackFrequencyInfo* info = mStackInfos.Get(aKey);
if (info) {
// We already recorded this stack before, only increase the count.
info->mCount++;
return;
}
// Check if we have room for new captures.
if (mStackInfos.Count() >= kMaxChromeStacksKept) {
// Addressed by Bug 1316793.
return;
}
// We haven't captured a stack for this key before, do it now.
// Note that this does a stackwalk and is an expensive operation.
std::vector<uintptr_t> rawStack;
auto callback = [](uint32_t, void* aPC, void*, void* aClosure) {
std::vector<uintptr_t>* stack =
static_cast<std::vector<uintptr_t>*>(aClosure);
stack->push_back(reinterpret_cast<uintptr_t>(aPC));
};
MozStackWalk(callback, /* skipFrames */ 0,
/* maxFrames */ 0, reinterpret_cast<void*>(&rawStack), 0, nullptr);
Telemetry::ProcessedStack stack = Telemetry::GetStackAndModules(rawStack);
// Store the new stack info.
MutexAutoLock captureStackMutex(mStackCapturerMutex);
size_t stackIndex = mStacks.AddStack(stack);
mStackInfos.Put(aKey, new StackFrequencyInfo(1, stackIndex));
}
NS_IMETHODIMP
KeyedStackCapturer::ReflectCapturedStacks(JSContext *cx, JS::MutableHandle<JS::Value> ret)
{
MutexAutoLock capturedStackMutex(mStackCapturerMutex);
// this adds the memoryMap and stacks properties.
JS::RootedObject fullReportObj(cx, CreateJSStackObject(cx, mStacks));
if (!fullReportObj) {
return NS_ERROR_FAILURE;
}
JS::RootedObject keysArray(cx, JS_NewArrayObject(cx, 0));
if (!keysArray) {
return NS_ERROR_FAILURE;
}
bool ok = JS_DefineProperty(cx, fullReportObj, "captures",
keysArray, JSPROP_ENUMERATE);
if (!ok) {
return NS_ERROR_FAILURE;
}
size_t keyIndex = 0;
for (auto iter = mStackInfos.ConstIter(); !iter.Done(); iter.Next(), ++keyIndex) {
const StackFrequencyInfo* info = iter.Data();
JS::RootedObject infoArray(cx, JS_NewArrayObject(cx, 0));
if (!keysArray) {
return NS_ERROR_FAILURE;
}
JS::RootedString str(cx, JS_NewStringCopyZ(cx,
PromiseFlatCString(iter.Key()).get()));
if (!str ||
!JS_DefineElement(cx, infoArray, 0, str, JSPROP_ENUMERATE) ||
!JS_DefineElement(cx, infoArray, 1, info->mIndex, JSPROP_ENUMERATE) ||
!JS_DefineElement(cx, infoArray, 2, info->mCount, JSPROP_ENUMERATE) ||
!JS_DefineElement(cx, keysArray, keyIndex, infoArray, JSPROP_ENUMERATE)) {
return NS_ERROR_FAILURE;
}
}
ret.setObject(*fullReportObj);
return NS_OK;
}
void
KeyedStackCapturer::Clear()
{
MutexAutoLock captureStackMutex(mStackCapturerMutex);
mStackInfos.Clear();
mStacks.Clear();
}
#endif
/**
* IOInterposeObserver recording statistics of main-thread I/O during execution,
* aimed at consumption by TelemetryImpl
@ -688,6 +885,7 @@ public:
int32_t aSystemUptime,
int32_t aFirefoxUptime,
HangAnnotationsPtr aAnnotations);
static void DoStackCapture(const nsACString& aKey);
#endif
static void RecordThreadHangStats(Telemetry::ThreadHangStats& aStats);
size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf);
@ -734,6 +932,12 @@ private:
Mutex mHashMutex;
HangReports mHangReports;
Mutex mHangReportsMutex;
#if defined(MOZ_ENABLE_PROFILER_SPS)
// Stores data about stacks captured on demand.
KeyedStackCapturer mStackCapturer;
#endif
// mThreadHangStats stores recorded, inactive thread hang stats
Vector<Telemetry::ThreadHangStats> mThreadHangStats;
Mutex mThreadHangStatsMutex;
@ -1323,6 +1527,20 @@ TelemetryImpl::GetChromeHangs(JSContext *cx, JS::MutableHandle<JS::Value> ret)
return NS_OK;
}
NS_IMETHODIMP
TelemetryImpl::SnapshotCapturedStacks(bool clear, JSContext *cx, JS::MutableHandle<JS::Value> ret)
{
#if defined(MOZ_ENABLE_PROFILER_SPS)
nsresult rv = mStackCapturer.ReflectCapturedStacks(cx, ret);
if (clear) {
mStackCapturer.Clear();
}
return rv;
#else
return NS_OK;
#endif
}
static JSObject *
CreateJSStackObject(JSContext *cx, const CombinedStacks &stacks) {
JS::Rooted<JSObject*> ret(cx, JS_NewPlainObject(cx));
@ -2247,8 +2465,23 @@ TelemetryImpl::RecordChromeHang(uint32_t aDuration,
aSystemUptime, aFirefoxUptime,
Move(annotations));
}
void
TelemetryImpl::DoStackCapture(const nsACString& aKey) {
if (Telemetry::CanRecordExtended() && XRE_IsParentProcess()) {
sTelemetry->mStackCapturer.Capture(aKey);
}
}
#endif
nsresult
TelemetryImpl::CaptureStack(const nsACString& aKey) {
#if defined(MOZ_ENABLE_PROFILER_SPS)
TelemetryImpl::DoStackCapture(aKey);
#endif
return NS_OK;
}
void
TelemetryImpl::RecordThreadHangStats(Telemetry::ThreadHangStats& aStats)
{
@ -2921,6 +3154,11 @@ void RecordChromeHang(uint32_t duration,
aSystemUptime, aFirefoxUptime,
Move(aAnnotations));
}
void CaptureStack(const nsACString& aKey)
{
TelemetryImpl::DoStackCapture(aKey);
}
#endif
void RecordThreadHangStats(ThreadHangStats& aStats)

Просмотреть файл

@ -328,6 +328,17 @@ void RecordChromeHang(uint32_t aDuration,
int32_t aFirefoxUptime,
mozilla::UniquePtr<mozilla::HangMonitor::HangAnnotations>
aAnnotations);
/**
* Record the current thread's call stack on demand. Note that, the stack is
* only captured once. Subsequent calls result in incrementing the capture
* counter.
*
* @param aKey - A user defined key associated with the captured stack.
*
* NOTE: Unwinding call stacks is an expensive operation performance-wise.
*/
void CaptureStack(const nsCString& aKey);
#endif
class ThreadHangStats;

Просмотреть файл

@ -1346,6 +1346,13 @@ var Impl = {
payloadObj.processes.parent.gc = protect(() => GCTelemetry.entries("main", clearSubsession));
payloadObj.processes.content.gc = protect(() => GCTelemetry.entries("content", clearSubsession));
}
// Adding captured stacks to the payload only if any exist and clearing
// captures for this sub-session.
let stacks = protect(() => Telemetry.snapshotCapturedStacks(true));
if (stacks && ("captures" in stacks) && (stacks.captures.length > 0)) {
payloadObj.processes.parent.capturedStacks = stacks;
}
}
if (this._childTelemetry.length) {

Просмотреть файл

@ -62,6 +62,7 @@ Structure:
keyedHistograms: {...},
chromeHangs: {...},
threadHangStats: [...],
capturedStacks: {...},
log: [...],
webrtc: {...},
gc: {...},
@ -263,6 +264,39 @@ Structure:
... other threads ...
]
capturedStacks
--------------
Contains information about stacks captured on demand via Telemetry API. This is similar to `chromeHangs`, but only stacks captured on the main thread of the parent process are reported. It reports precise C++ stacks are reported and is only available on Windows, either in Firefox Nightly or in builds using "--enable-profiling" switch.
Limits for captured stacks are the same as for chromeHangs (see below). Furthermore:
* the key length is limited to 50 characters,
* keys are restricted to alpha-numeric characters and `-`.
Structure:
.. code-block:: js
"capturedStacks" : {
"memoryMap": [
["wgdi32.pdb", "08A541B5942242BDB4AEABD8C87E4CFF2"],
["igd10iumd32.pdb", "D36DEBF2E78149B5BE1856B772F1C3991"],
// ... other entries in the format ["module name", "breakpad identifier"] ...
],
"stacks": [
[
[
0, // the module index or -1 for invalid module indices
190649 // the offset of this program counter in its module or an absolute pc
],
[1, 2540075],
// ... other frames ...
],
// ... other stacks ...
],
"captures": [["string-key", stack-index, count], ... ]
}
chromeHangs
-----------
Contains the statistics about the hangs happening exclusively on the main thread of the parent process. Precise C++ stacks are reported. This is only available on Nightly Release on Windows, when building using "--enable-profiling" switch.

Просмотреть файл

@ -139,6 +139,47 @@ interface nsITelemetry : nsISupports
[implicit_jscontext]
readonly attribute jsval chromeHangs;
/*
* Record the current thread's call stack on demand. Note that, the stack is
* only captured at the first call. All subsequent calls result in incrementing
* the capture counter without doing actual stack unwinding.
*
* @param aKey - A user defined key associated with the captured stack.
*
* NOTE: Unwinding call stacks is an expensive operation performance-wise.
*/
void captureStack(in ACString name);
/*
* Returns a snapshot of captured stacks. The data has the following structure:
*
* {
* "memoryMap": [
* ["wgdi32.pdb", "08A541B5942242BDB4AEABD8C87E4CFF2"],
* ["igd10iumd32.pdb", "D36DEBF2E78149B5BE1856B772F1C3991"],
* ... other entries in the format ["module name", "breakpad identifier"] ...
* ],
* "stacks": [
* [
* [
* 0, // the module index or -1 for invalid module indices
* 190649 // the offset of this program counter in its module or an absolute pc
* ],
* [1, 2540075],
* ... other frames ...
* ],
* ... other stacks ...
* ],
* "captures": [["string-key", stack-index, count], ... ]
* }
*
* @param clear Whether to clear out the subsession histograms after taking a snapshot.
*
* @return A snapshot of captured stacks.
*/
[implicit_jscontext]
jsval snapshotCapturedStacks([optional] in boolean clear);
/*
* An array of thread hang stats,
* [<thread>, <thread>, ...]

Просмотреть файл

@ -0,0 +1,185 @@
/* Any copyright is dedicated to the Public Domain.
http://creativecommons.org/publicdomain/zero/1.0/ */
Cu.import("resource://gre/modules/TelemetryController.jsm", this);
Cu.import("resource://gre/modules/AppConstants.jsm", this);
/**
* Ensures that the sctucture of the javascript object used for capturing stacks
* is as intended. The structure is expected to be as in this example:
*
* {
* "memoryMap": [
* [String, String],
* ...
* ],
* "stacks": [
* [
* [Integer, Integer], // Frame
* ...
* ],
* ...
* ],
* "captures": [
* [String, Integer, Integer],
* ...
* ]
* }
*
* @param {Object} obj abject to be inpected vor validity.
*
* @return {Boolean} True if the structure is valid. False - otherwise.
*/
function checkObjectStructure(obj) {
// Ensuring an object is given.
if (!obj || typeof obj !== "object") {
return false;
}
// Ensuring all properties exist inside the object and are arrays.
for (let property of ["memoryMap", "stacks", "captures"]) {
if (!(property in obj) || !Array.isArray(obj[property]))
return false;
}
return true;
}
/**
* A helper for triggering a stack capture and returning the new state of stacks.
*
* @param {String} key The key for capturing stack.
* @param {Boolean} clear True to reset captured stacks, False - otherwise.
*
* @return {Object} captured stacks.
*/
function captureStacks(key, clear = true) {
Telemetry.captureStack(key);
let stacks = Telemetry.snapshotCapturedStacks(clear);
Assert.ok(checkObjectStructure(stacks));
return stacks;
}
const TEST_STACK_KEYS = ["TEST-KEY1", "TEST-KEY2"];
/**
* Ensures that captured stacks appear in pings, if any were captured.
*/
add_task({
skip_if: () => !AppConstants.MOZ_ENABLE_PROFILER_SPS
}, function* test_capturedStacksAppearInPings() {
yield TelemetryController.testSetup();
captureStacks("DOES-NOT-MATTER", false);
let ping = TelemetryController.getCurrentPingData();
Assert.ok("capturedStacks" in ping.payload.processes.parent);
let capturedStacks = ping.payload.processes.parent.capturedStacks;
Assert.ok(checkObjectStructure(capturedStacks));
});
/**
* Ensures that capturing a stack for a new key increases the number
* of captured stacks and adds a new entry to captures.
*/
add_task({
skip_if: () => !AppConstants.MOZ_ENABLE_PROFILER_SPS
}, function* test_CaptureStacksIncreasesNumberOfCapturedStacks() {
// Construct a unique key for this test.
let key = TEST_STACK_KEYS[0] + "-UNIQUE-KEY-1";
// Ensure that no captures for the key exist.
let original = Telemetry.snapshotCapturedStacks();
Assert.equal(undefined, original.captures.find(capture => capture[0] === key));
// Capture stack and find updated capture stats for TEST_STACK_KEYS[0].
let updated = captureStacks(key);
// Ensure that a new element has been appended to both stacks and captures.
Assert.equal(original.stacks.length + 1, updated.stacks.length);
Assert.equal(original.captures.length + 1, updated.captures.length);
// Ensure that the capture info for the key exists and structured well.
Assert.deepEqual(
[key, original.stacks.length, 1],
updated.captures.find(capture => capture[0] === key)
);
});
/**
* Ensures that stacks are grouped by the key. If a stack is captured
* more than once for the key, the length of stacks does not increase.
*/
add_task({
skip_if: () => !AppConstants.MOZ_ENABLE_PROFILER_SPS
}, function* test_CaptureStacksGroupsDuplicateStacks() {
// Make sure that there are initial captures for TEST_STACK_KEYS[0].
let stacks = captureStacks(TEST_STACK_KEYS[0], false);
let original = {
captures: stacks.captures.find(capture => capture[0] === TEST_STACK_KEYS[0]),
stacks: stacks.stacks
};
// Capture stack and find updated capture stats for TEST_STACK_KEYS[0].
stacks = captureStacks(TEST_STACK_KEYS[0]);
let updated = {
captures: stacks.captures.find(capture => capture[0] === TEST_STACK_KEYS[0]),
stacks: stacks.stacks
};
// The length of captured stacks should remain same.
Assert.equal(original.stacks.length, updated.stacks.length);
// We expect the info on captures to look as original. Only
// stack counter should be increased by one.
let expectedCaptures = original.captures;
expectedCaptures[2]++;
Assert.deepEqual(expectedCaptures, updated.captures);
});
/**
* Ensure that capturing the stack for a key does not affect info
* for other keys.
*/
add_task({
skip_if: () => !AppConstants.MOZ_ENABLE_PROFILER_SPS
}, function* test_CaptureStacksSeparatesInformationByKeys() {
// Make sure that there are initial captures for TEST_STACK_KEYS[0].
let stacks = captureStacks(TEST_STACK_KEYS[0], false);
let original = {
captures: stacks.captures.find(capture => capture[0] === TEST_STACK_KEYS[0]),
stacks: stacks.stacks
};
// Capture stack for a new key.
let uniqueKey = TEST_STACK_KEYS[1] + "-UNIQUE-KEY-2";
updated = captureStacks(uniqueKey);
// The length of captured stacks should increase to reflect the new capture.
Assert.equal(original.stacks.length + 1, updated.stacks.length);
// The information for TEST_STACK_KEYS[0] should remain same.
Assert.deepEqual(
original.captures,
updated.captures.find(capture => capture[0] === TEST_STACK_KEYS[0])
);
});
/**
* Ensure that Telemetry does not allow weird keys.
*/
add_task({
skip_if: () => !AppConstants.MOZ_ENABLE_PROFILER_SPS
}, function* test_CaptureStacksDoesNotAllowBadKey() {
for (let badKey of [null, "KEY-!@\"#$%^&*()_"]) {
let stacks = captureStacks(badKey);
let captureData = stacks.captures.find(capture => capture[0] === badKey)
Assert.ok(!captureData, `"${badKey}" should not be allowed as a key`);
}
});
function run_test() {
do_get_profile(true);
Services.prefs.setBoolPref(PREF_TELEMETRY_ENABLED, true);
run_next_test();
}

Просмотреть файл

@ -60,4 +60,5 @@ tags = addons
[test_TelemetryScalars.js]
[test_TelemetryTimestamps.js]
skip-if = toolkit == 'android'
[test_TelemetryCaptureStack.js]
[test_TelemetryEvents.js]

Просмотреть файл

@ -1108,6 +1108,28 @@ var ChromeHangs = {
}
};
var CapturedStacks = {
symbolRequest: null,
render: function CapturedStacks_render(payload) {
// Retrieve captured stacks from telemetry payload.
let capturedStacks = "processes" in payload && "parent" in payload.processes
? payload.processes.parent.capturedStacks
: false;
let hasData = capturedStacks && capturedStacks.stacks &&
capturedStacks.stacks.length > 0;
setHasData("captured-stacks-section", hasData);
if (!hasData) {
return;
}
let stacks = capturedStacks.stacks;
let memoryMap = capturedStacks.memoryMap;
StackRenderer.renderStacks("captured-stacks", stacks, memoryMap, () => {});
},
};
var ThreadHangStats = {
/**
@ -1797,6 +1819,29 @@ function setupListeners() {
ChromeHangs.render(gPingData);
}, false);
document.getElementById("captured-stacks-fetch-symbols").addEventListener("click",
function() {
if (!gPingData) {
return;
}
let capturedStacks = gPingData.payload.processes.parent.capturedStacks;
let req = new SymbolicationRequest("captured-stacks",
CapturedStacks.render,
capturedStacks.memoryMap,
capturedStacks.stacks,
null);
req.fetchSymbols();
}, false);
document.getElementById("captured-stacks-hide-symbols").addEventListener("click",
function() {
if (!gPingData) {
return;
}
CapturedStacks.render(gPingData);
}, false);
document.getElementById("late-writes-fetch-symbols").addEventListener("click",
function() {
if (!gPingData) {
@ -2063,6 +2108,9 @@ function displayPingData(ping, updatePayloadList = false) {
// Show thread hang stats
ThreadHangStats.render(payload);
// Show captured stacks.
CapturedStacks.render(payload);
// Show simple measurements
let simpleMeasurements = sortStartupMilestones(payload.simpleMeasurements);
let hasData = Object.keys(simpleMeasurements).length > 0;

Просмотреть файл

@ -274,6 +274,21 @@
</div>
</section>
<section id="captured-stacks-section" class="data-section">
<input type="checkbox" class="statebox"/>
<h1 class="section-name">&aboutTelemetry.capturedStacksSection;</h1>
<span class="toggle-caption">&aboutTelemetry.toggle;</span>
<span class="empty-caption">&aboutTelemetry.emptySection;</span>
<div id="captured-stacks" class="data">
<a id="captured-stacks-fetch-symbols" href="#">&aboutTelemetry.fetchSymbols;</a>
<a id="captured-stacks-hide-symbols" class="hidden" href="#">&aboutTelemetry.hideSymbols;</a>
<br/>
<br/>
<div id="captured-stacks-data">
</div>
</div>
</section>
<section id="raw-payload-section" class="data-section">
<input type="checkbox" class="statebox"/>
<h1 class="section-name">&aboutTelemetry.rawPayload;</h1>

Просмотреть файл

@ -100,6 +100,10 @@ Ping
Thread Hangs
">
<!ENTITY aboutTelemetry.capturedStacksSection "
Captured Stacks
">
<!ENTITY aboutTelemetry.scalarsSection "
Scalars
">
@ -170,4 +174,4 @@ Ping
<!ENTITY aboutTelemetry.rawPayload "
Raw Payload
">
">