зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1793523 - Replace IPC's use of waitpid with waitid to avoid racing with the crash reporter. r=nika
When IPC is waiting for a process to exit at the same time the crash reporter is trying to attach to the main thread with `ptrace`, both of them will use `waitpid` for their respective state changes, and these calls can race: the ptrace-stop is delivered to IPC, while the crash reporter and crashed child process hang forever. This patch changes IPC to use `waitid` with `WNOWAIT`, which allows us to check the process status without side-effecting it; if it is an exit, the call is reissued without that flag to obtain the status as before. (This may not be a problem on macOS, where we use Mach APIs instead of `ptrace`, but `waitid` has been in POSIX long enough that we should be safe using it on all Unix platforms.) This patch also adds some logging when child processes exit with a failure status or due to a signal. Differential Revision: https://phabricator.services.mozilla.com/D159186
This commit is contained in:
Родитель
fb2ce68a73
Коммит
9964ac8e24
|
@ -274,7 +274,13 @@ bool KillProcess(ProcessHandle process, int exit_code);
|
|||
// In various error cases (e.g., the process doesn't exist or isn't a
|
||||
// child of this process) it will also return true to indicate that
|
||||
// the caller should give up and not try again.
|
||||
bool IsProcessDead(ProcessHandle handle);
|
||||
//
|
||||
// If the `blocking` parameter is set to true, this function will try
|
||||
// to block the calling thread indefinitely until the process exits.
|
||||
// This may not be possible (if the child is also being debugged by
|
||||
// the parent process, e.g. due to the crash reporter), in which case
|
||||
// it will return false and the caller will need to wait and retry.
|
||||
bool IsProcessDead(ProcessHandle handle, bool blocking = false);
|
||||
#endif
|
||||
|
||||
} // namespace base
|
||||
|
|
|
@ -187,7 +187,7 @@ void CloseSuperfluousFds(void* aCtx, bool (*aShouldPreserve)(void*, int)) {
|
|||
}
|
||||
}
|
||||
|
||||
bool IsProcessDead(ProcessHandle handle) {
|
||||
bool IsProcessDead(ProcessHandle handle, bool blocking) {
|
||||
#ifdef MOZ_ENABLE_FORKSERVER
|
||||
if (mozilla::ipc::ForkServiceChild::Get()) {
|
||||
// We only know if a process exists, but not if it has crashed.
|
||||
|
@ -202,8 +202,15 @@ bool IsProcessDead(ProcessHandle handle) {
|
|||
return r < 0 && errno == ESRCH;
|
||||
}
|
||||
#endif
|
||||
int status;
|
||||
const int result = HANDLE_EINTR(waitpid(handle, &status, WNOHANG));
|
||||
|
||||
// We use `WNOWAIT` to read the process status without
|
||||
// side-effecting it, in case it's something unexpected like a
|
||||
// ptrace-stop for the crash reporter. If is an exit, the call is
|
||||
// reissued (see the end of this function) without that flag in
|
||||
// order to collect the process.
|
||||
siginfo_t si{};
|
||||
const int wflags = WEXITED | WNOWAIT | (blocking ? 0 : WNOHANG);
|
||||
int result = HANDLE_EINTR(waitid(P_PID, handle, &si, wflags));
|
||||
if (result == -1) {
|
||||
// This shouldn't happen, but sometimes it does. The error is
|
||||
// probably ECHILD and the reason is probably that a pid was
|
||||
|
@ -215,14 +222,54 @@ bool IsProcessDead(ProcessHandle handle) {
|
|||
// So, lacking reliable information, we indicate that the process
|
||||
// is dead, in the hope that the caller will give up and stop
|
||||
// calling us. See also bug 943174 and bug 933680.
|
||||
CHROMIUM_LOG(ERROR) << "waitpid failed pid:" << handle
|
||||
<< " errno:" << errno;
|
||||
CHROMIUM_LOG(ERROR) << "waitid failed pid:" << handle << " errno:" << errno;
|
||||
return true;
|
||||
} else if (result == 0) {
|
||||
}
|
||||
|
||||
if (si.si_pid == 0) {
|
||||
// the child hasn't exited yet.
|
||||
return false;
|
||||
}
|
||||
|
||||
DCHECK(si.si_pid == handle);
|
||||
switch (si.si_code) {
|
||||
case CLD_STOPPED:
|
||||
case CLD_CONTINUED:
|
||||
DCHECK(false) << "waitid returned an event type that it shouldn't have";
|
||||
[[fallthrough]];
|
||||
case CLD_TRAPPED:
|
||||
CHROMIUM_LOG(WARNING) << "ignoring non-exit event for process " << handle;
|
||||
return false;
|
||||
|
||||
case CLD_KILLED:
|
||||
case CLD_DUMPED:
|
||||
CHROMIUM_LOG(WARNING)
|
||||
<< "process " << handle << " exited on signal " << si.si_status;
|
||||
break;
|
||||
|
||||
case CLD_EXITED:
|
||||
if (si.si_status != 0) {
|
||||
CHROMIUM_LOG(WARNING)
|
||||
<< "process " << handle << " exited with status " << si.si_status;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
CHROMIUM_LOG(ERROR) << "unexpected waitid si_code value: " << si.si_code;
|
||||
DCHECK(false);
|
||||
// This shouldn't happen, but assume that the process exited to
|
||||
// avoid the caller possibly ending up in a loop.
|
||||
}
|
||||
|
||||
// Now consume the status / collect the dead process
|
||||
const int old_si_code = si.si_code;
|
||||
si.si_pid = 0;
|
||||
// In theory it shouldn't matter either way if we use `WNOHANG` at
|
||||
// this point, but just in case, avoid unexpected blocking.
|
||||
result = HANDLE_EINTR(waitid(P_PID, handle, &si, WEXITED | WNOHANG));
|
||||
DCHECK(result == 0);
|
||||
DCHECK(si.si_pid == handle);
|
||||
DCHECK(si.si_code == old_si_code);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <signal.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "base/eintr_wrapper.h"
|
||||
#include "base/message_loop.h"
|
||||
|
@ -46,8 +47,14 @@ class ChildReaper : public base::MessagePumpLibevent::SignalEvent,
|
|||
|
||||
protected:
|
||||
void WaitForChildExit() {
|
||||
DCHECK(process_);
|
||||
HANDLE_EINTR(waitpid(process_, NULL, 0));
|
||||
CHECK(process_);
|
||||
while (!base::IsProcessDead(process_, true)) {
|
||||
// It doesn't matter if this is interrupted; we just need to
|
||||
// wait for some amount of time while the other process status
|
||||
// event is (hopefully) handled. This is used only during an
|
||||
// error case at shutdown, so a 1s wait won't be too noticeable.
|
||||
sleep(1);
|
||||
}
|
||||
}
|
||||
|
||||
pid_t process_;
|
||||
|
|
Загрузка…
Ссылка в новой задаче