From e2fe309320f5e837e0bb9ddd74561fd84c1d7120 Mon Sep 17 00:00:00 2001 From: David Chisnall Date: Mon, 11 Jul 2022 12:56:59 +0100 Subject: [PATCH] Fix process sandbox for double faults. The `snmalloc::message` function calls `fsync` on `stderr`. With Capsicum, this is an explicit permission and so tests that hit a code path that did some logging (specifically, the curl test) in debug builds during system call emulation would receive the second trap. The first symptom of this is fixed by adding fsync permission to the standard out / error file descriptors. With that change, all of the tests pass but the underlying problem remains: the signal is delivered *after* return from the first system call, at which point it does not have a sensible `ucontext` and things break in exciting ways (sufficiently exciting that a debugger can't tell you anything useful). The root cause is addressed by adding `SA_NODEFER` when registering the signal handler. This allows system calls that are used to emulate other signal handlers to deliver their signals on top of the stack. If a system call is used in emulating itself, this still goes horribly wrong, but in a way that's much easier to debug. At the same time, use the SIGCAP support that is in process of being upstreamed to FreeBSD. This makes debugging Capsicum failures easier because the debugger does not try to hide the signal from the application whose correctness depends on handling it. --- .../platform/child_process_pdfork.h | 1 + .../process_sandbox/platform/sandbox_capsicum.h | 17 +++++++++++++---- .../platform/syscall_context_freebsd.h | 8 +++++++- .../process_sandbox/src/library_runner.cc | 6 +++++- 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/experiments/process_sandbox/include/process_sandbox/platform/child_process_pdfork.h b/experiments/process_sandbox/include/process_sandbox/platform/child_process_pdfork.h index c106e69c..2988eb83 100644 --- a/experiments/process_sandbox/include/process_sandbox/platform/child_process_pdfork.h +++ b/experiments/process_sandbox/include/process_sandbox/platform/child_process_pdfork.h @@ -3,6 +3,7 @@ #ifdef USE_KQUEUE_PROCDESC # include # include +# include # include # include # include diff --git a/experiments/process_sandbox/include/process_sandbox/platform/sandbox_capsicum.h b/experiments/process_sandbox/include/process_sandbox/platform/sandbox_capsicum.h index a10b43e6..7f79ab26 100644 --- a/experiments/process_sandbox/include/process_sandbox/platform/sandbox_capsicum.h +++ b/experiments/process_sandbox/include/process_sandbox/platform/sandbox_capsicum.h @@ -66,9 +66,12 @@ namespace sandbox::platform }; // Standard in is read only limit_fd(STDIN_FILENO, CAP_READ); - // Standard out and error are write only - limit_fd(STDOUT_FILENO, CAP_WRITE); - limit_fd(STDERR_FILENO, CAP_WRITE); + // Standard out and error are write only. Allow them fsync to make sure + // that debugging messages are properly flushed. Without this, + // snmalloc's `message()` raises a SIGCAP and if we call it in debug + // builds while handling the signal then we infinite loop. + limit_fd(STDOUT_FILENO, CAP_WRITE | CAP_FSYNC); + limit_fd(STDERR_FILENO, CAP_WRITE | CAP_FSYNC); // The socket is used with a call-return protocol for requesting // services for malloc. limit_fd(PageMapUpdates, CAP_WRITE, CAP_READ); @@ -85,7 +88,13 @@ namespace sandbox::platform { limit_fd(libfd, CAP_READ, CAP_FSTAT, CAP_LOOKUP, CAP_MMAP_RX); } - int arg = PROC_TRAPCAP_CTL_ENABLE; + int arg = +# ifdef PROC_TRAPCAP_CTL_ENABLE_SIGCAP + PROC_TRAPCAP_CTL_ENABLE_SIGCAP +# else + PROC_TRAPCAP_CTL_ENABLE +# endif + ; int ret = procctl(P_PID, getpid(), PROC_TRAPCAP_CTL, &arg); SANDBOX_INVARIANT( ret == 0, "Failed to register for traps on Capsicum violations"); diff --git a/experiments/process_sandbox/include/process_sandbox/platform/syscall_context_freebsd.h b/experiments/process_sandbox/include/process_sandbox/platform/syscall_context_freebsd.h index 79691a7e..8c4ff199 100644 --- a/experiments/process_sandbox/include/process_sandbox/platform/syscall_context_freebsd.h +++ b/experiments/process_sandbox/include/process_sandbox/platform/syscall_context_freebsd.h @@ -86,7 +86,13 @@ namespace sandbox::platform /** * The signal delivered for a Capsicum-disallowed system call. */ - static constexpr int syscall_signal = SIGTRAP; + static constexpr int syscall_signal = +# ifdef SIGCAP + SIGCAP +# else + SIGTRAP +# endif + ; /** * Constructor. Takes the arguments to a signal handler. diff --git a/experiments/process_sandbox/src/library_runner.cc b/experiments/process_sandbox/src/library_runner.cc index 210b8982..3cc2c250 100644 --- a/experiments/process_sandbox/src/library_runner.cc +++ b/experiments/process_sandbox/src/library_runner.cc @@ -1049,7 +1049,11 @@ int main() sandbox::platform::Sandbox::apply_sandboxing_policy_postexec(); struct sigaction sa; memset(&sa, 0, sizeof(sa)); - sa.sa_flags = SA_SIGINFO; + // If we do a disallowed system call while handling another, deliver the + // signal immediately. We may still not be able to handle it usefully, but + // we will definitely not handle it correctly if it is not delivered during + // system call return. + sa.sa_flags = SA_SIGINFO | SA_NODEFER; sa.sa_sigaction = (void (*)(int, siginfo_t*, void*))emulate; sigaction(SyscallFrame::syscall_signal, &sa, nullptr); // Close the shared memory region file descriptor before we call untrusted