Bug 1840164 - Pre-commit stack pages on background hang monitor thread. r=mhowell

When a first hang is detected, the BHMgr Monitor thread needs to commit
5 more pages of stack to run profiler_suspend_and_sample_thread, which
contains big stack variables. If that occurs while we are low on memory,
failure to commit stack pages can crash the process.

In bug 1716727, we have added delays on failed allocations to try
to avoid crashing the main process under low memory condition. These
delays could trigger the background hang monitor, which could in
turn crash the process, as they occur in a low memory condition where we
will likely fail to commit.

We can pre-commit the 5 pages of stack at thread initialization to
ensure that they will already be commited when we later need them. Or at
least, we can try to see if that works.

We do that with a wrapper for the __chkstk function. We add a new test
in the NativeNt cppunit test, to ensure that our wrapper function
behaves as expected.

Differential Revision: https://phabricator.services.mozilla.com/D182582
This commit is contained in:
Yannis Juglaret 2023-09-12 12:27:46 +00:00
Родитель af2afe1c48
Коммит 8c54dbd6c0
3 изменённых файлов: 81 добавлений и 4 удалений

Просмотреть файл

@ -1284,6 +1284,14 @@ inline DWORD RtlGetCurrentThreadId() {
0xFFFFFFFFUL);
}
inline PVOID RtlGetThreadStackBase() {
return reinterpret_cast<_NT_TIB*>(::NtCurrentTeb())->StackBase;
}
inline PVOID RtlGetThreadStackLimit() {
return reinterpret_cast<_NT_TIB*>(::NtCurrentTeb())->StackLimit;
}
const HANDLE kCurrentProcess = reinterpret_cast<HANDLE>(-1);
inline LauncherResult<DWORD> GetParentProcessId() {
@ -1723,6 +1731,22 @@ class AutoMappedView final {
}
};
#if defined(_M_X64)
// CheckStack ensures that stack memory pages are committed up to a given size
// in bytes from the current stack pointer. It updates the thread stack limit,
// which points to the lowest committed stack address.
MOZ_NEVER_INLINE __attribute__((naked)) inline void CheckStack(uint32_t size) {
asm volatile(
"mov %ecx, %eax;"
# if defined(__MINGW32__)
"jmp ___chkstk_ms;"
# else
"jmp __chkstk;"
# endif // __MINGW32__
);
}
#endif // _M_X64
} // namespace nt
} // namespace mozilla

Просмотреть файл

@ -10,6 +10,7 @@
#include "mozilla/UniquePtr.h"
#include "mozilla/WindowsEnumProcessModules.h"
#include <limits>
#include <stdio.h>
#include <windows.h>
#include <strsafe.h>
@ -365,6 +366,42 @@ MOZ_NEVER_INLINE PVOID SwapThreadLocalStoragePointer(PVOID aNewValue) {
return oldValue;
}
#if defined(_M_X64)
bool TestCheckStack() {
auto stackBase = reinterpret_cast<uint8_t*>(RtlGetThreadStackBase());
auto stackLimit = reinterpret_cast<uint8_t*>(RtlGetThreadStackLimit());
uint8_t* stackPointer = nullptr;
asm volatile("mov %%rsp, %0;" : "=r"(stackPointer));
if (!(stackLimit < stackBase && stackLimit <= stackPointer &&
stackPointer < stackBase)) {
printf("TEST-FAIL | NativeNt | Stack addresses are not coherent.\n");
return false;
}
uintptr_t committedBytes = stackPointer - stackLimit;
const uint32_t maxExtraCommittedBytes = 0x10000;
if ((committedBytes + maxExtraCommittedBytes) >
std::numeric_limits<uint32_t>::max()) {
printf(
"TEST-FAIL | NativeNt | The stack limit is too high to perform the "
"test.\n");
return false;
}
for (uint32_t extraSize = 0; extraSize < maxExtraCommittedBytes;
++extraSize) {
CheckStack(static_cast<uint32_t>(committedBytes) + extraSize);
auto expectedNewLimit = stackLimit - ((extraSize + 0xFFF) & ~0xFFF);
if (expectedNewLimit != RtlGetThreadStackLimit()) {
printf(
"TEST-FAIL | NativeNt | CheckStack did not grow the stack "
"correctly (expected: %p, got: %p).\n",
expectedNewLimit, RtlGetThreadStackLimit());
return false;
}
}
return true;
}
#endif // _M_X64
int wmain(int argc, wchar_t* argv[]) {
UNICODE_STRING normal;
::RtlInitUnicodeString(&normal, kNormal);
@ -601,6 +638,12 @@ int wmain(int argc, wchar_t* argv[]) {
return 1;
}
#if defined(_M_X64)
if (!TestCheckStack()) {
return 1;
}
#endif // _M_X64
printf("TEST-PASS | NativeNt | All tests ran successfully\n");
return 0;
}

Просмотреть файл

@ -35,6 +35,10 @@
#include <algorithm>
#if defined(XP_WIN)
# include "mozilla/NativeNt.h"
#endif
// Activate BHR only for one every BHR_BETA_MOD users.
// We're doing experimentation with collecting a lot more data from BHR, and
// don't want to enable it for beta users at the moment. We can scale this up in
@ -87,8 +91,14 @@ class BackgroundHangManager : public nsIObserver {
ProfilerThreadId mHangMonitorProfilerThreadId;
void SetMonitorThreadId() {
void InitMonitorThread() {
mHangMonitorProfilerThreadId = profiler_current_thread_id();
#if defined(MOZ_GECKO_PROFILER) && defined(XP_WIN) && defined(_M_X64)
// Pre-commit 5 more pages of stack to guarantee enough commited stack
// space on this thread upon hang detection, when we will need to run
// profiler_suspend_and_sample_thread (bug 1840164).
mozilla::nt::CheckStack(5 * 0x1000);
#endif
}
// Used for recording a permahang in case we don't ever make it back to
@ -326,14 +336,14 @@ bool BackgroundHangThread::sTlsKeyInitialized;
BackgroundHangManager::BackgroundHangManager()
: mShutdown(false), mLock("BackgroundHangManager") {
// Save a reference to sInstance now so that the destructor is not triggered
// if the SetMonitorThreadId RunnableMethod is released before we are done.
// if the InitMonitorThread RunnableMethod is released before we are done.
sInstance = this;
DebugOnly<nsresult> rv =
NS_NewNamedThread("BHMgr Monitor", getter_AddRefs(mHangMonitorThread),
mozilla::NewRunnableMethod(
"BackgroundHangManager::SetMonitorThreadId", this,
&BackgroundHangManager::SetMonitorThreadId));
"BackgroundHangManager::InitMonitorThread", this,
&BackgroundHangManager::InitMonitorThread));
MOZ_ASSERT(NS_SUCCEEDED(rv) && mHangMonitorThread,
"Failed to create BHR processing thread");