зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1610134: Part 1: add timeout pref that turns on late write checking to see if it's possible to crash browser earlier. r=dthayer,chutten
Differential Revision: https://phabricator.services.mozilla.com/D67884
This commit is contained in:
Родитель
3d235e9172
Коммит
044488a3af
|
@ -805,6 +805,7 @@ pref("toolkit.telemetry.debugSlowSql", false);
|
|||
pref("toolkit.telemetry.unified", true);
|
||||
// AsyncShutdown delay before crashing in case of shutdown freeze
|
||||
#if !defined(MOZ_ASAN) && !defined(MOZ_TSAN)
|
||||
pref("toolkit.asyncshutdown.report_writes_after", 20000); // 20 seconds
|
||||
pref("toolkit.asyncshutdown.crash_timeout", 60000); // 1 minute
|
||||
#else
|
||||
// ASan and TSan builds can be considerably slower. Extend the grace period
|
||||
|
|
|
@ -2658,6 +2658,23 @@ telemetry:
|
|||
- 'main'
|
||||
- 'content'
|
||||
|
||||
failed_late_write:
|
||||
bug_numbers:
|
||||
- 1610134
|
||||
description: >
|
||||
The number of times a late write stack failed to be parsed.
|
||||
expires: never
|
||||
kind: uint
|
||||
notification_emails:
|
||||
- emalysz@mozilla.com
|
||||
release_channel_collection: opt-out
|
||||
products:
|
||||
- 'firefox'
|
||||
- 'fennec'
|
||||
- 'geckoview'
|
||||
record_in_processes:
|
||||
- 'main'
|
||||
|
||||
telemetry.discarded:
|
||||
accumulations:
|
||||
bug_numbers:
|
||||
|
|
|
@ -97,6 +97,9 @@
|
|||
#include "TelemetryHistogram.h"
|
||||
#include "TelemetryOrigin.h"
|
||||
#include "TelemetryScalar.h"
|
||||
#ifndef ANDROID
|
||||
# include "nsTerminator.h"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -949,8 +952,8 @@ static bool IsValidBreakpadId(const std::string& breakpadId) {
|
|||
|
||||
// Read a stack from the given file name. In case of any error, aStack is
|
||||
// unchanged.
|
||||
static void ReadStack(PathCharPtr aFileName,
|
||||
Telemetry::ProcessedStack& aStack) {
|
||||
static void ReadLateWriteStack(PathCharPtr aFileName,
|
||||
Telemetry::ProcessedStack& aStack) {
|
||||
IFStream file(aFileName);
|
||||
|
||||
size_t numModules;
|
||||
|
@ -1014,6 +1017,14 @@ static void ReadStack(PathCharPtr aFileName,
|
|||
stack.AddFrame(frame);
|
||||
}
|
||||
|
||||
bool isFromTerminatorWatchdog;
|
||||
file >> isFromTerminatorWatchdog;
|
||||
if (file.fail()) {
|
||||
ScalarAdd(mozilla::Telemetry::ScalarID::TELEMETRY_FAILED_LATE_WRITE, 1);
|
||||
return;
|
||||
}
|
||||
stack.SetIsFromTerminatorWatchdog(isFromTerminatorWatchdog);
|
||||
|
||||
aStack = stack;
|
||||
}
|
||||
|
||||
|
@ -1033,9 +1044,12 @@ void TelemetryImpl::ReadLateWritesStacks(nsIFile* aProfileDir) {
|
|||
}
|
||||
|
||||
Telemetry::ProcessedStack stack;
|
||||
ReadStack(file->NativePath().get(), stack);
|
||||
ReadLateWriteStack(file->NativePath().get(), stack);
|
||||
if (stack.GetStackSize() != 0) {
|
||||
mLateWritesStacks.AddStack(stack);
|
||||
if (stack.GetIsFromTerminatorWatchdog()) {
|
||||
mLateWritesStacks.SetIsFromTerminatorWatchdog(true);
|
||||
}
|
||||
}
|
||||
// Delete the file so that we don't report it again on the next run.
|
||||
file->Remove(false);
|
||||
|
@ -1056,16 +1070,23 @@ TelemetryImpl::GetLateWrites(JSContext* cx, JS::MutableHandle<JS::Value> ret) {
|
|||
// CreateJSStackObject, but we would still need to figure out where to call
|
||||
// JS_RemoveObjectRoot. Would it be ok to never call JS_RemoveObjectRoot
|
||||
// and just set the pointer to nullptr is the telemetry destructor?
|
||||
|
||||
JSObject* report;
|
||||
JS::Rooted<JSObject*> temp(cx, JS_NewPlainObject(cx));
|
||||
if (!mCachedTelemetryData) {
|
||||
CombinedStacks empty;
|
||||
report = CreateJSStackObject(cx, empty);
|
||||
temp = CreateJSStackObject(cx, empty);
|
||||
} else {
|
||||
report = CreateJSStackObject(cx, mLateWritesStacks);
|
||||
temp = CreateJSStackObject(cx, mLateWritesStacks);
|
||||
}
|
||||
|
||||
if (report == nullptr) {
|
||||
// Add this information afer we CreateJSStackObject because
|
||||
// the function is used elsewhere, not just for latewrites.
|
||||
JS::Rooted<JSObject*> report(cx, temp);
|
||||
bool isFromTerminatorWatchdog =
|
||||
mLateWritesStacks.GetIsFromTerminatorWatchdog();
|
||||
bool ok = JS_DefineProperty(cx, report, "isFromTerminatorWatchdog",
|
||||
isFromTerminatorWatchdog, JSPROP_ENUMERATE);
|
||||
|
||||
if (temp == nullptr || !ok) {
|
||||
return NS_ERROR_FAILURE;
|
||||
}
|
||||
|
||||
|
|
|
@ -536,7 +536,7 @@ Structure:
|
|||
|
||||
lateWrites
|
||||
----------
|
||||
This sections reports writes to the file system that happen during shutdown. The reported data contains the stack and the file names of the loaded libraries at the time the writes happened.
|
||||
This sections reports writes to the file system that happen during shutdown. The reported data contains the stack, the file names of the loaded libraries at the time the writes happened, and indicates if the late writes originated from the terminator watchdog.
|
||||
|
||||
The file names of the loaded libraries can contain unicode characters.
|
||||
|
||||
|
@ -560,6 +560,7 @@ Structure:
|
|||
],
|
||||
... other stacks ...
|
||||
],
|
||||
"isFromTerminatorWatchdog": <boolean>
|
||||
},
|
||||
|
||||
addonDetails
|
||||
|
|
|
@ -18,7 +18,9 @@ const size_t kMaxChromeStacksKept = 50;
|
|||
CombinedStacks::CombinedStacks() : CombinedStacks(kMaxChromeStacksKept) {}
|
||||
|
||||
CombinedStacks::CombinedStacks(size_t aMaxStacksCount)
|
||||
: mNextIndex(0), mMaxStacksCount(aMaxStacksCount) {}
|
||||
: mNextIndex(0),
|
||||
mMaxStacksCount(aMaxStacksCount),
|
||||
mIsFromTerminatorWatchdog(false) {}
|
||||
|
||||
size_t CombinedStacks::GetModuleCount() const { return mModules.size(); }
|
||||
|
||||
|
@ -104,6 +106,15 @@ void CombinedStacks::RemoveStack(unsigned aIndex) {
|
|||
}
|
||||
}
|
||||
|
||||
bool CombinedStacks::GetIsFromTerminatorWatchdog() {
|
||||
return mIsFromTerminatorWatchdog;
|
||||
}
|
||||
|
||||
void CombinedStacks::SetIsFromTerminatorWatchdog(
|
||||
bool aIsFromTerminatorWatchdog) {
|
||||
mIsFromTerminatorWatchdog = aIsFromTerminatorWatchdog;
|
||||
}
|
||||
|
||||
void CombinedStacks::Swap(CombinedStacks& aOther) {
|
||||
mModules.swap(aOther.mModules);
|
||||
mStacks.swap(aOther.mStacks);
|
||||
|
|
|
@ -39,6 +39,8 @@ class CombinedStacks {
|
|||
size_t GetStackCount() const;
|
||||
size_t SizeOfExcludingThis() const;
|
||||
void RemoveStack(unsigned aIndex);
|
||||
bool GetIsFromTerminatorWatchdog();
|
||||
void SetIsFromTerminatorWatchdog(bool aIsFromTerminatorWatchdog);
|
||||
|
||||
#if defined(MOZ_GECKO_PROFILER)
|
||||
/** Clears the contents of vectors and resets the index. */
|
||||
|
@ -53,6 +55,8 @@ class CombinedStacks {
|
|||
size_t mNextIndex;
|
||||
// The maximum number of stacks to keep in the CombinedStacks object.
|
||||
size_t mMaxStacksCount;
|
||||
// Indicates if this stack came from a late write in nsTerminator.
|
||||
bool mIsFromTerminatorWatchdog;
|
||||
|
||||
friend struct ::IPC::ParamTraits<CombinedStacks>;
|
||||
};
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace mozilla::Telemetry {
|
|||
|
||||
const size_t kMaxChromeStackDepth = 50;
|
||||
|
||||
ProcessedStack::ProcessedStack() = default;
|
||||
ProcessedStack::ProcessedStack() : mIsFromTerminatorWatchdog(false){};
|
||||
|
||||
size_t ProcessedStack::GetStackSize() const { return mStack.size(); }
|
||||
|
||||
|
@ -56,6 +56,15 @@ void ProcessedStack::AddModule(const Module& aModule) {
|
|||
mModules.push_back(aModule);
|
||||
}
|
||||
|
||||
const bool& ProcessedStack::GetIsFromTerminatorWatchdog() const {
|
||||
return mIsFromTerminatorWatchdog;
|
||||
}
|
||||
|
||||
void ProcessedStack::SetIsFromTerminatorWatchdog(
|
||||
const bool aIsFromTerminatorWatchdog) {
|
||||
mIsFromTerminatorWatchdog = aIsFromTerminatorWatchdog;
|
||||
}
|
||||
|
||||
void ProcessedStack::Clear() {
|
||||
mModules.clear();
|
||||
mStack.clear();
|
||||
|
|
|
@ -48,12 +48,15 @@ class ProcessedStack {
|
|||
void AddFrame(const Frame& aFrame);
|
||||
const Module& GetModule(unsigned aIndex) const;
|
||||
void AddModule(const Module& aFrame);
|
||||
const bool& GetIsFromTerminatorWatchdog() const;
|
||||
void SetIsFromTerminatorWatchdog(const bool aIsFromTerminatorWatchdog);
|
||||
|
||||
void Clear();
|
||||
|
||||
private:
|
||||
std::vector<Module> mModules;
|
||||
std::vector<Frame> mStack;
|
||||
bool mIsFromTerminatorWatchdog;
|
||||
};
|
||||
|
||||
// Get the current list of loaded modules, filter and pair it to the provided
|
||||
|
|
|
@ -38,6 +38,7 @@ const STACK2 = [
|
|||
[2, 10],
|
||||
[3, 15],
|
||||
];
|
||||
const isFromTerminatorWatchdog = false;
|
||||
// XXX The only error checking is for a zero-sized stack.
|
||||
const STACK_BOGUS = [];
|
||||
|
||||
|
@ -82,6 +83,8 @@ function write_late_writes_file(stack, suffix) {
|
|||
contents += element[0] + " " + element[1].toString(16) + "\n";
|
||||
}
|
||||
|
||||
contents += isFromTerminatorWatchdog ? 1 : 0 + "\n";
|
||||
|
||||
write_string_to_file(file, contents);
|
||||
}
|
||||
|
||||
|
@ -97,6 +100,8 @@ function run_test() {
|
|||
Assert.equal(lateWrites.memoryMap.length, 0);
|
||||
Assert.ok("stacks" in lateWrites);
|
||||
Assert.equal(lateWrites.stacks.length, 0);
|
||||
Assert.ok("isFromTerminatorWatchdog" in lateWrites);
|
||||
Assert.equal(lateWrites.isFromTerminatorWatchdog, false);
|
||||
|
||||
do_test_pending();
|
||||
Telemetry.asyncFetchTelemetryData(function() {
|
||||
|
@ -138,6 +143,9 @@ function actual_test() {
|
|||
return unevalCanonicalStack == obj;
|
||||
};
|
||||
}
|
||||
Assert.ok("isFromTerminatorWatchdog" in lateWrites);
|
||||
Assert.equal(lateWrites.isFromTerminatorWatchdog, false);
|
||||
|
||||
Assert.equal(uneval_STACKS.filter(stackChecker(first_stack)).length, 1);
|
||||
Assert.equal(uneval_STACKS.filter(stackChecker(second_stack)).length, 1);
|
||||
|
||||
|
|
|
@ -50,6 +50,7 @@
|
|||
#include "mozilla/UniquePtr.h"
|
||||
#include "mozilla/Unused.h"
|
||||
#include "mozilla/Telemetry.h"
|
||||
#include "mozilla/LateWriteChecks.h"
|
||||
|
||||
#include "mozilla/dom/workerinternals/RuntimeService.h"
|
||||
|
||||
|
@ -91,6 +92,7 @@ static ShutdownStep sShutdownSteps[] = {
|
|||
};
|
||||
|
||||
Atomic<bool> sShutdownNotified;
|
||||
Atomic<bool> sHasTerminatorLateWrite;
|
||||
|
||||
// Utility function: create a thread that is non-joinable,
|
||||
// does not prevent the process from terminating, is never
|
||||
|
@ -142,6 +144,12 @@ struct Options {
|
|||
* How many ticks before we should crash the process.
|
||||
*/
|
||||
uint32_t crashAfterTicks;
|
||||
|
||||
/**
|
||||
* A reduced number of ticks to crash earlier
|
||||
* in the event we're hung indefinitely.
|
||||
*/
|
||||
uint32_t reportWritesAfterTicks;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -154,6 +162,7 @@ void RunWatchdog(void* arg) {
|
|||
// about.
|
||||
UniquePtr<Options> options((Options*)arg);
|
||||
uint32_t crashAfterTicks = options->crashAfterTicks;
|
||||
uint32_t reportWritesAfterTicks = options->reportWritesAfterTicks;
|
||||
options = nullptr;
|
||||
|
||||
const uint32_t timeToLive = crashAfterTicks;
|
||||
|
@ -173,8 +182,17 @@ void RunWatchdog(void* arg) {
|
|||
#else
|
||||
usleep(1000000 /* usec */);
|
||||
#endif
|
||||
|
||||
if (gHeartbeat++ < timeToLive) {
|
||||
#if !defined(MOZ_VALGRIND) || !defined(MOZ_CODE_COVERAGE)
|
||||
// We should not report if we are running on Valgrind because
|
||||
// it is known to be much slower.
|
||||
// We only also want to BeginLateWriteCheck once for the first
|
||||
// time we exceed the reduced number of ticks.
|
||||
if (gHeartbeat >= reportWritesAfterTicks && !sHasTerminatorLateWrite) {
|
||||
sHasTerminatorLateWrite = true;
|
||||
BeginLateWriteChecks();
|
||||
}
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -382,6 +400,7 @@ void nsTerminator::Start() {
|
|||
#endif // !defined(NS_FREE_PERMANENT_DATA)
|
||||
mInitialized = true;
|
||||
sShutdownNotified = false;
|
||||
sHasTerminatorLateWrite = false;
|
||||
}
|
||||
|
||||
// Prepare, allocate and start the watchdog thread.
|
||||
|
@ -390,6 +409,10 @@ void nsTerminator::StartWatchdog() {
|
|||
int32_t crashAfterMS =
|
||||
Preferences::GetInt("toolkit.asyncshutdown.crash_timeout",
|
||||
FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS);
|
||||
|
||||
int32_t reducedCrashTimeoutMS =
|
||||
Preferences::GetInt("toolkit.asyncshutdown.report_writes_after",
|
||||
FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS);
|
||||
// Ignore negative values
|
||||
if (crashAfterMS <= 0) {
|
||||
crashAfterMS = FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS;
|
||||
|
@ -402,6 +425,7 @@ void nsTerminator::StartWatchdog() {
|
|||
crashAfterMS = INT32_MAX;
|
||||
} else {
|
||||
crashAfterMS += ADDITIONAL_WAIT_BEFORE_CRASH_MS;
|
||||
reducedCrashTimeoutMS += ADDITIONAL_WAIT_BEFORE_CRASH_MS;
|
||||
}
|
||||
|
||||
#ifdef MOZ_VALGRIND
|
||||
|
@ -426,9 +450,11 @@ void nsTerminator::StartWatchdog() {
|
|||
UniquePtr<Options> options(new Options());
|
||||
const PRIntervalTime ticksDuration = PR_MillisecondsToInterval(1000);
|
||||
options->crashAfterTicks = crashAfterMS / ticksDuration;
|
||||
options->reportWritesAfterTicks = reducedCrashTimeoutMS / ticksDuration;
|
||||
// Handle systems where ticksDuration is greater than crashAfterMS.
|
||||
if (options->crashAfterTicks == 0) {
|
||||
options->crashAfterTicks = crashAfterMS / 1000;
|
||||
options->reportWritesAfterTicks = reducedCrashTimeoutMS / 1000;
|
||||
}
|
||||
|
||||
DebugOnly<PRThread*> watchdogThread =
|
||||
|
@ -580,6 +606,8 @@ void nsTerminator::UpdateCrashReport(const char* aTopic) {
|
|||
CrashReporter::Annotation::ShutdownProgress, report);
|
||||
}
|
||||
|
||||
bool nsTerminator::IsCheckingLateWrites() { return sHasTerminatorLateWrite; }
|
||||
|
||||
void XPCOMShutdownNotified() {
|
||||
MOZ_DIAGNOSTIC_ASSERT(sShutdownNotified == false);
|
||||
sShutdownNotified = true;
|
||||
|
|
|
@ -18,6 +18,7 @@ class nsTerminator final : public nsIObserver {
|
|||
NS_DECL_NSIOBSERVER
|
||||
|
||||
nsTerminator();
|
||||
static bool IsCheckingLateWrites();
|
||||
|
||||
private:
|
||||
nsresult SelfInit();
|
||||
|
|
|
@ -14,10 +14,14 @@
|
|||
#include "mozilla/StaticPtr.h"
|
||||
#include "mozilla/Telemetry.h"
|
||||
#include "mozilla/Unused.h"
|
||||
#include "mozilla/Mutex.h"
|
||||
#include "nsAppDirectoryServiceDefs.h"
|
||||
#include "nsDirectoryServiceUtils.h"
|
||||
#include "nsLocalFile.h"
|
||||
#include "nsPrintfCString.h"
|
||||
#ifndef ANDROID
|
||||
# include "nsTerminator.h"
|
||||
#endif
|
||||
#include "mozilla/StackWalk.h"
|
||||
#include "plstr.h"
|
||||
#include "prio.h"
|
||||
|
@ -195,6 +199,11 @@ void LateWriteObserver::Observe(
|
|||
sha1Stream.Printf("%d %x\n", frame.mModIndex, (unsigned)frame.mOffset);
|
||||
}
|
||||
|
||||
#ifndef ANDROID
|
||||
sha1Stream.Printf("%d\n", mozilla::nsTerminator::IsCheckingLateWrites());
|
||||
#else
|
||||
sha1Stream.Printf("%d\n", false);
|
||||
#endif
|
||||
mozilla::SHA1Sum::Hash sha1;
|
||||
sha1Stream.Finish(sha1);
|
||||
|
||||
|
@ -216,6 +225,7 @@ void LateWriteObserver::Observe(
|
|||
/******************************* Setup/Teardown *******************************/
|
||||
|
||||
static mozilla::StaticAutoPtr<LateWriteObserver> sLateWriteObserver;
|
||||
mozilla::Mutex* mMutex = nullptr;
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
|
@ -228,13 +238,20 @@ void InitLateWriteChecks() {
|
|||
sLateWriteObserver = new LateWriteObserver(nativePath.get());
|
||||
}
|
||||
}
|
||||
mMutex = new Mutex("LateWriteCheck::mMutex");
|
||||
}
|
||||
|
||||
void BeginLateWriteChecks() {
|
||||
if (mMutex) {
|
||||
MutexAutoLock lock(*mMutex);
|
||||
}
|
||||
|
||||
if (sLateWriteObserver) {
|
||||
IOInterposer::Register(IOInterposeObserver::OpWriteFSync,
|
||||
sLateWriteObserver);
|
||||
}
|
||||
delete mMutex;
|
||||
mMutex = nullptr;
|
||||
}
|
||||
|
||||
void StopLateWriteChecks() {
|
||||
|
|
Загрузка…
Ссылка в новой задаче