Bug 1610134: Part 1: add timeout pref that turns on late write checking to see if it's possible to crash browser earlier. r=dthayer,chutten

Differential Revision: https://phabricator.services.mozilla.com/D67884
This commit is contained in:
Emma Malysz 2020-04-21 06:09:28 +00:00
Родитель 3d235e9172
Коммит 044488a3af
12 изменённых файлов: 133 добавлений и 12 удалений

Просмотреть файл

@ -805,6 +805,7 @@ pref("toolkit.telemetry.debugSlowSql", false);
pref("toolkit.telemetry.unified", true);
// AsyncShutdown delay before crashing in case of shutdown freeze
#if !defined(MOZ_ASAN) && !defined(MOZ_TSAN)
pref("toolkit.asyncshutdown.report_writes_after", 20000); // 20 seconds
pref("toolkit.asyncshutdown.crash_timeout", 60000); // 1 minute
#else
// ASan and TSan builds can be considerably slower. Extend the grace period

Просмотреть файл

@ -2658,6 +2658,23 @@ telemetry:
- 'main'
- 'content'
failed_late_write:
bug_numbers:
- 1610134
description: >
The number of times a late write stack failed to be parsed.
expires: never
kind: uint
notification_emails:
- emalysz@mozilla.com
release_channel_collection: opt-out
products:
- 'firefox'
- 'fennec'
- 'geckoview'
record_in_processes:
- 'main'
telemetry.discarded:
accumulations:
bug_numbers:

Просмотреть файл

@ -97,6 +97,9 @@
#include "TelemetryHistogram.h"
#include "TelemetryOrigin.h"
#include "TelemetryScalar.h"
#ifndef ANDROID
# include "nsTerminator.h"
#endif
namespace {
@ -949,8 +952,8 @@ static bool IsValidBreakpadId(const std::string& breakpadId) {
// Read a stack from the given file name. In case of any error, aStack is
// unchanged.
static void ReadStack(PathCharPtr aFileName,
Telemetry::ProcessedStack& aStack) {
static void ReadLateWriteStack(PathCharPtr aFileName,
Telemetry::ProcessedStack& aStack) {
IFStream file(aFileName);
size_t numModules;
@ -1014,6 +1017,14 @@ static void ReadStack(PathCharPtr aFileName,
stack.AddFrame(frame);
}
bool isFromTerminatorWatchdog;
file >> isFromTerminatorWatchdog;
if (file.fail()) {
ScalarAdd(mozilla::Telemetry::ScalarID::TELEMETRY_FAILED_LATE_WRITE, 1);
return;
}
stack.SetIsFromTerminatorWatchdog(isFromTerminatorWatchdog);
aStack = stack;
}
@ -1033,9 +1044,12 @@ void TelemetryImpl::ReadLateWritesStacks(nsIFile* aProfileDir) {
}
Telemetry::ProcessedStack stack;
ReadStack(file->NativePath().get(), stack);
ReadLateWriteStack(file->NativePath().get(), stack);
if (stack.GetStackSize() != 0) {
mLateWritesStacks.AddStack(stack);
if (stack.GetIsFromTerminatorWatchdog()) {
mLateWritesStacks.SetIsFromTerminatorWatchdog(true);
}
}
// Delete the file so that we don't report it again on the next run.
file->Remove(false);
@ -1056,16 +1070,23 @@ TelemetryImpl::GetLateWrites(JSContext* cx, JS::MutableHandle<JS::Value> ret) {
// CreateJSStackObject, but we would still need to figure out where to call
// JS_RemoveObjectRoot. Would it be ok to never call JS_RemoveObjectRoot
// and just set the pointer to nullptr is the telemetry destructor?
JSObject* report;
JS::Rooted<JSObject*> temp(cx, JS_NewPlainObject(cx));
if (!mCachedTelemetryData) {
CombinedStacks empty;
report = CreateJSStackObject(cx, empty);
temp = CreateJSStackObject(cx, empty);
} else {
report = CreateJSStackObject(cx, mLateWritesStacks);
temp = CreateJSStackObject(cx, mLateWritesStacks);
}
if (report == nullptr) {
// Add this information afer we CreateJSStackObject because
// the function is used elsewhere, not just for latewrites.
JS::Rooted<JSObject*> report(cx, temp);
bool isFromTerminatorWatchdog =
mLateWritesStacks.GetIsFromTerminatorWatchdog();
bool ok = JS_DefineProperty(cx, report, "isFromTerminatorWatchdog",
isFromTerminatorWatchdog, JSPROP_ENUMERATE);
if (temp == nullptr || !ok) {
return NS_ERROR_FAILURE;
}

Просмотреть файл

@ -536,7 +536,7 @@ Structure:
lateWrites
----------
This sections reports writes to the file system that happen during shutdown. The reported data contains the stack and the file names of the loaded libraries at the time the writes happened.
This sections reports writes to the file system that happen during shutdown. The reported data contains the stack, the file names of the loaded libraries at the time the writes happened, and indicates if the late writes originated from the terminator watchdog.
The file names of the loaded libraries can contain unicode characters.
@ -560,6 +560,7 @@ Structure:
],
... other stacks ...
],
"isFromTerminatorWatchdog": <boolean>
},
addonDetails

Просмотреть файл

@ -18,7 +18,9 @@ const size_t kMaxChromeStacksKept = 50;
CombinedStacks::CombinedStacks() : CombinedStacks(kMaxChromeStacksKept) {}
CombinedStacks::CombinedStacks(size_t aMaxStacksCount)
: mNextIndex(0), mMaxStacksCount(aMaxStacksCount) {}
: mNextIndex(0),
mMaxStacksCount(aMaxStacksCount),
mIsFromTerminatorWatchdog(false) {}
size_t CombinedStacks::GetModuleCount() const { return mModules.size(); }
@ -104,6 +106,15 @@ void CombinedStacks::RemoveStack(unsigned aIndex) {
}
}
bool CombinedStacks::GetIsFromTerminatorWatchdog() {
return mIsFromTerminatorWatchdog;
}
void CombinedStacks::SetIsFromTerminatorWatchdog(
bool aIsFromTerminatorWatchdog) {
mIsFromTerminatorWatchdog = aIsFromTerminatorWatchdog;
}
void CombinedStacks::Swap(CombinedStacks& aOther) {
mModules.swap(aOther.mModules);
mStacks.swap(aOther.mStacks);

Просмотреть файл

@ -39,6 +39,8 @@ class CombinedStacks {
size_t GetStackCount() const;
size_t SizeOfExcludingThis() const;
void RemoveStack(unsigned aIndex);
bool GetIsFromTerminatorWatchdog();
void SetIsFromTerminatorWatchdog(bool aIsFromTerminatorWatchdog);
#if defined(MOZ_GECKO_PROFILER)
/** Clears the contents of vectors and resets the index. */
@ -53,6 +55,8 @@ class CombinedStacks {
size_t mNextIndex;
// The maximum number of stacks to keep in the CombinedStacks object.
size_t mMaxStacksCount;
// Indicates if this stack came from a late write in nsTerminator.
bool mIsFromTerminatorWatchdog;
friend struct ::IPC::ParamTraits<CombinedStacks>;
};

Просмотреть файл

@ -30,7 +30,7 @@ namespace mozilla::Telemetry {
const size_t kMaxChromeStackDepth = 50;
ProcessedStack::ProcessedStack() = default;
ProcessedStack::ProcessedStack() : mIsFromTerminatorWatchdog(false){};
size_t ProcessedStack::GetStackSize() const { return mStack.size(); }
@ -56,6 +56,15 @@ void ProcessedStack::AddModule(const Module& aModule) {
mModules.push_back(aModule);
}
const bool& ProcessedStack::GetIsFromTerminatorWatchdog() const {
return mIsFromTerminatorWatchdog;
}
void ProcessedStack::SetIsFromTerminatorWatchdog(
const bool aIsFromTerminatorWatchdog) {
mIsFromTerminatorWatchdog = aIsFromTerminatorWatchdog;
}
void ProcessedStack::Clear() {
mModules.clear();
mStack.clear();

Просмотреть файл

@ -48,12 +48,15 @@ class ProcessedStack {
void AddFrame(const Frame& aFrame);
const Module& GetModule(unsigned aIndex) const;
void AddModule(const Module& aFrame);
const bool& GetIsFromTerminatorWatchdog() const;
void SetIsFromTerminatorWatchdog(const bool aIsFromTerminatorWatchdog);
void Clear();
private:
std::vector<Module> mModules;
std::vector<Frame> mStack;
bool mIsFromTerminatorWatchdog;
};
// Get the current list of loaded modules, filter and pair it to the provided

Просмотреть файл

@ -38,6 +38,7 @@ const STACK2 = [
[2, 10],
[3, 15],
];
const isFromTerminatorWatchdog = false;
// XXX The only error checking is for a zero-sized stack.
const STACK_BOGUS = [];
@ -82,6 +83,8 @@ function write_late_writes_file(stack, suffix) {
contents += element[0] + " " + element[1].toString(16) + "\n";
}
contents += isFromTerminatorWatchdog ? 1 : 0 + "\n";
write_string_to_file(file, contents);
}
@ -97,6 +100,8 @@ function run_test() {
Assert.equal(lateWrites.memoryMap.length, 0);
Assert.ok("stacks" in lateWrites);
Assert.equal(lateWrites.stacks.length, 0);
Assert.ok("isFromTerminatorWatchdog" in lateWrites);
Assert.equal(lateWrites.isFromTerminatorWatchdog, false);
do_test_pending();
Telemetry.asyncFetchTelemetryData(function() {
@ -138,6 +143,9 @@ function actual_test() {
return unevalCanonicalStack == obj;
};
}
Assert.ok("isFromTerminatorWatchdog" in lateWrites);
Assert.equal(lateWrites.isFromTerminatorWatchdog, false);
Assert.equal(uneval_STACKS.filter(stackChecker(first_stack)).length, 1);
Assert.equal(uneval_STACKS.filter(stackChecker(second_stack)).length, 1);

Просмотреть файл

@ -50,6 +50,7 @@
#include "mozilla/UniquePtr.h"
#include "mozilla/Unused.h"
#include "mozilla/Telemetry.h"
#include "mozilla/LateWriteChecks.h"
#include "mozilla/dom/workerinternals/RuntimeService.h"
@ -91,6 +92,7 @@ static ShutdownStep sShutdownSteps[] = {
};
Atomic<bool> sShutdownNotified;
Atomic<bool> sHasTerminatorLateWrite;
// Utility function: create a thread that is non-joinable,
// does not prevent the process from terminating, is never
@ -142,6 +144,12 @@ struct Options {
* How many ticks before we should crash the process.
*/
uint32_t crashAfterTicks;
/**
* A reduced number of ticks to crash earlier
* in the event we're hung indefinitely.
*/
uint32_t reportWritesAfterTicks;
};
/**
@ -154,6 +162,7 @@ void RunWatchdog(void* arg) {
// about.
UniquePtr<Options> options((Options*)arg);
uint32_t crashAfterTicks = options->crashAfterTicks;
uint32_t reportWritesAfterTicks = options->reportWritesAfterTicks;
options = nullptr;
const uint32_t timeToLive = crashAfterTicks;
@ -173,8 +182,17 @@ void RunWatchdog(void* arg) {
#else
usleep(1000000 /* usec */);
#endif
if (gHeartbeat++ < timeToLive) {
#if !defined(MOZ_VALGRIND) || !defined(MOZ_CODE_COVERAGE)
// We should not report if we are running on Valgrind because
// it is known to be much slower.
// We only also want to BeginLateWriteCheck once for the first
// time we exceed the reduced number of ticks.
if (gHeartbeat >= reportWritesAfterTicks && !sHasTerminatorLateWrite) {
sHasTerminatorLateWrite = true;
BeginLateWriteChecks();
}
#endif
continue;
}
@ -382,6 +400,7 @@ void nsTerminator::Start() {
#endif // !defined(NS_FREE_PERMANENT_DATA)
mInitialized = true;
sShutdownNotified = false;
sHasTerminatorLateWrite = false;
}
// Prepare, allocate and start the watchdog thread.
@ -390,6 +409,10 @@ void nsTerminator::StartWatchdog() {
int32_t crashAfterMS =
Preferences::GetInt("toolkit.asyncshutdown.crash_timeout",
FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS);
int32_t reducedCrashTimeoutMS =
Preferences::GetInt("toolkit.asyncshutdown.report_writes_after",
FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS);
// Ignore negative values
if (crashAfterMS <= 0) {
crashAfterMS = FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS;
@ -402,6 +425,7 @@ void nsTerminator::StartWatchdog() {
crashAfterMS = INT32_MAX;
} else {
crashAfterMS += ADDITIONAL_WAIT_BEFORE_CRASH_MS;
reducedCrashTimeoutMS += ADDITIONAL_WAIT_BEFORE_CRASH_MS;
}
#ifdef MOZ_VALGRIND
@ -426,9 +450,11 @@ void nsTerminator::StartWatchdog() {
UniquePtr<Options> options(new Options());
const PRIntervalTime ticksDuration = PR_MillisecondsToInterval(1000);
options->crashAfterTicks = crashAfterMS / ticksDuration;
options->reportWritesAfterTicks = reducedCrashTimeoutMS / ticksDuration;
// Handle systems where ticksDuration is greater than crashAfterMS.
if (options->crashAfterTicks == 0) {
options->crashAfterTicks = crashAfterMS / 1000;
options->reportWritesAfterTicks = reducedCrashTimeoutMS / 1000;
}
DebugOnly<PRThread*> watchdogThread =
@ -580,6 +606,8 @@ void nsTerminator::UpdateCrashReport(const char* aTopic) {
CrashReporter::Annotation::ShutdownProgress, report);
}
bool nsTerminator::IsCheckingLateWrites() { return sHasTerminatorLateWrite; }
void XPCOMShutdownNotified() {
MOZ_DIAGNOSTIC_ASSERT(sShutdownNotified == false);
sShutdownNotified = true;

Просмотреть файл

@ -18,6 +18,7 @@ class nsTerminator final : public nsIObserver {
NS_DECL_NSIOBSERVER
nsTerminator();
static bool IsCheckingLateWrites();
private:
nsresult SelfInit();

Просмотреть файл

@ -14,10 +14,14 @@
#include "mozilla/StaticPtr.h"
#include "mozilla/Telemetry.h"
#include "mozilla/Unused.h"
#include "mozilla/Mutex.h"
#include "nsAppDirectoryServiceDefs.h"
#include "nsDirectoryServiceUtils.h"
#include "nsLocalFile.h"
#include "nsPrintfCString.h"
#ifndef ANDROID
# include "nsTerminator.h"
#endif
#include "mozilla/StackWalk.h"
#include "plstr.h"
#include "prio.h"
@ -195,6 +199,11 @@ void LateWriteObserver::Observe(
sha1Stream.Printf("%d %x\n", frame.mModIndex, (unsigned)frame.mOffset);
}
#ifndef ANDROID
sha1Stream.Printf("%d\n", mozilla::nsTerminator::IsCheckingLateWrites());
#else
sha1Stream.Printf("%d\n", false);
#endif
mozilla::SHA1Sum::Hash sha1;
sha1Stream.Finish(sha1);
@ -216,6 +225,7 @@ void LateWriteObserver::Observe(
/******************************* Setup/Teardown *******************************/
static mozilla::StaticAutoPtr<LateWriteObserver> sLateWriteObserver;
mozilla::Mutex* mMutex = nullptr;
namespace mozilla {
@ -228,13 +238,20 @@ void InitLateWriteChecks() {
sLateWriteObserver = new LateWriteObserver(nativePath.get());
}
}
mMutex = new Mutex("LateWriteCheck::mMutex");
}
void BeginLateWriteChecks() {
if (mMutex) {
MutexAutoLock lock(*mMutex);
}
if (sLateWriteObserver) {
IOInterposer::Register(IOInterposeObserver::OpWriteFSync,
sLateWriteObserver);
}
delete mMutex;
mMutex = nullptr;
}
void StopLateWriteChecks() {