diff --git a/SPECS/moby-runc/CVE-2024-21626.patch b/SPECS/moby-runc/CVE-2024-21626.patch index 495bbc9d85..cf6eb88740 100644 --- a/SPECS/moby-runc/CVE-2024-21626.patch +++ b/SPECS/moby-runc/CVE-2024-21626.patch @@ -1,83 +1,4 @@ -From 7362cd5afe9d40131fb62cb075092025c7c71064 Mon Sep 17 00:00:00 2001 -From: "hang.jiang" -Date: Fri, 1 Sep 2023 16:17:13 +0800 -Subject: [runc v1.1.z PATCH 1/6] Fix File to Close - -(This is a cherry-pick of 937ca107c3d22da77eb8e8030f2342253b980980.) - -Signed-off-by: hang.jiang -Fixes: GHSA-xr7r-f8xq-vfvv CVE-2024-21626 -Signed-off-by: Aleksa Sarai ---- - libcontainer/cgroups/fs/paths.go | 1 + - update.go | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/libcontainer/cgroups/fs/paths.go b/libcontainer/cgroups/fs/paths.go -index 1092331b25d8..2cb970a3d55b 100644 ---- a/libcontainer/cgroups/fs/paths.go -+++ b/libcontainer/cgroups/fs/paths.go -@@ -83,6 +83,7 @@ func tryDefaultCgroupRoot() string { - if err != nil { - return "" - } -+ defer dir.Close() - names, err := dir.Readdirnames(1) - if err != nil { - return "" -diff --git a/update.go b/update.go -index 9ce5a2e835b2..6d582ddddecb 100644 ---- a/update.go -+++ b/update.go -@@ -174,6 +174,7 @@ other options are ignored. - if err != nil { - return err - } -+ defer f.Close() - } - err = json.NewDecoder(f).Decode(&r) - if err != nil { --- -2.43.0 - -From 2ed79a6c91e56dcd2d1da47f8ffd663066153746 Mon Sep 17 00:00:00 2001 -From: Aleksa Sarai -Date: Tue, 26 Dec 2023 23:53:07 +1100 -Subject: [runc v1.1.z PATCH 2/6] init: verify after chdir that cwd is inside - the container - -If a file descriptor of a directory in the host's mount namespace is -leaked to runc init, a malicious config.json could use /proc/self/fd/... -as a working directory to allow for host filesystem access after the -container runs. This can also be exploited by a container process if it -knows that an administrator will use "runc exec --cwd" and the target ---cwd (the attacker can change that cwd to be a symlink pointing to -/proc/self/fd/... and wait for the process to exec and then snoop on -/proc/$pid/cwd to get access to the host). The former issue can lead to -a critical vulnerability in Docker and Kubernetes, while the latter is a -container breakout. - -We can (ab)use the fact that getcwd(2) on Linux detects this exact case, -and getcwd(3) and Go's Getwd() return an error as a result. Thus, if we -just do os.Getwd() after chdir we can easily detect this case and error -out. - -In runc 1.1, a /sys/fs/cgroup handle happens to be leaked to "runc -init", making this exploitable. On runc main it just so happens that the -leaked /sys/fs/cgroup gets clobbered and thus this is only consistently -exploitable for runc 1.1. - -Fixes: GHSA-xr7r-f8xq-vfvv CVE-2024-21626 -Co-developed-by: lifubang -Signed-off-by: lifubang -[refactored the implementation and added more comments] -Signed-off-by: Aleksa Sarai ---- - libcontainer/init_linux.go | 31 ++++++++++++++++++++++++ - libcontainer/integration/seccomp_test.go | 20 +++++++-------- - 2 files changed, 41 insertions(+), 10 deletions(-) - -diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go +diff a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 5b88c71fc83a..057b30669811 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -133,8 +54,8 @@ index 5b88c71fc83a..057b30669811 100644 if err := system.ClearKeepCaps(); err != nil { return fmt.Errorf("unable to clear keep caps: %w", err) } -diff --git a/libcontainer/integration/seccomp_test.go b/libcontainer/integration/seccomp_test.go -index 31092a0a5d21..ecdfa7957df1 100644 +diff a/libcontainer/integration/seccomp_test.go b/libcontainer/integration/seccomp_test.go +index 31092a0a5d2..ecdfa7957df 100644 --- a/libcontainer/integration/seccomp_test.go +++ b/libcontainer/integration/seccomp_test.go @@ -13,7 +13,7 @@ import ( @@ -216,36 +137,109 @@ index 31092a0a5d21..ecdfa7957df1 100644 actual := strings.Trim(buffers.Stderr.String(), "\n") if actual != expected { t.Fatalf("Expected output %s but got %s\n", expected, actual) --- -2.43.0 -From b2b5754eb34174e032f1048beb1b27db83e77c5a Mon Sep 17 00:00:00 2001 +From f2f16213e174fb63e931fe0546bbbad1d9bbed6f Mon Sep 17 00:00:00 2001 From: Aleksa Sarai -Date: Fri, 5 Jan 2024 01:42:32 +1100 -Subject: [runc v1.1.z PATCH 3/6] setns init: do explicit lookup of execve - argument early +Date: Tue, 2 Jan 2024 14:58:28 +1100 +Subject: [PATCH 2/5] init: close internal fds before execve -(This is a partial backport of a minor change included in commit -dac41717465462b21fab5b5942fe4cb3f47d7e53.) +If we leak a file descriptor referencing the host filesystem, an +attacker could use a /proc/self/fd magic-link as the source for execve +to execute a host binary in the container. This would allow the binary +itself (or a process inside the container in the 'runc exec' case) to +write to a host binary, leading to a container escape. -This mirrors the logic in standard_init_linux.go, and also ensures that -we do not call exec.LookPath in the final execve step. +The simple solution is to make sure we close all file descriptors +immediately before the execve(2) step. Doing this earlier can lead to very +serious issues in Go (as file descriptors can be reused, any (*os.File) +reference could start silently operating on a different file) so we have +to do it as late as possible. -While this is okay for regular binaries, it seems exec.LookPath calls -os.Getenv which tries to emit a log entry to the test harness when -running in "go test" mode. In a future patch (in order to fix -CVE-2024-21626), we will close all of the file descriptors immediately -before execve, which would mean the file descriptor for test harness -logging would be closed at execve time. So, moving exec.LookPath earlier -is necessary. +Unfortunately, there are some Go runtime file descriptors that we must +not close (otherwise the Go scheduler panics randomly). The only way of +being sure which file descriptors cannot be closed is to sneakily +go:linkname the runtime internal "internal/poll.IsPollDescriptor" +function. This is almost certainly not recommended but there isn't any +other way to be absolutely sure, while also closing any other possible +files. -Ref: dac417174654 ("runc-dmz: reduce memfd binary cloning cost with small C binary") +In addition, we can keep the logrus forwarding logfd open because you +cannot execve a pipe and the contents of the pipe are so restricted +(JSON-encoded in a format we pick) that it seems unlikely you could even +construct shellcode. Closing the logfd causes issues if there is an +error returned from execve. + +In mainline runc, runc-dmz protects us against this attack because the +intermediate execve(2) closes all of the O_CLOEXEC internal runc file +descriptors and thus runc-dmz cannot access them to attack the host. + +Fixes: GHSA-xr7r-f8xq-vfvv CVE-2024-21626 Signed-off-by: Aleksa Sarai ---- - libcontainer/setns_init_linux.go | 18 +++++++++++++++++- - 1 file changed, 17 insertions(+), 1 deletion(-) +--- a/libcontainer/integration/seccomp_test.go + libcontainer/init_linux.go | 2 +- + libcontainer/logs/logs.go | 9 ++++ + libcontainer/setns_init_linux.go | 19 +++++++ + libcontainer/standard_init_linux.go | 18 +++++++ + libcontainer/utils/utils_unix.go | 82 ++++++++++++++++++++++++----- + 5 files changed, 117 insertions(+), 13 deletions(-) -diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go +diff a/libcontainer/init_linux.go b/libcontainer/init_linux.go +index 5b88c71fc83a..057b30669811 100644 +--- a/libcontainer/init_linux.go ++++ b/libcontainer/init_linux.go +@@ -8,6 +8,7 @@ import ( + "io" + "net" + "os" ++ "path/filepath" + "strings" + "unsafe" + +@@ -135,6 +136,32 @@ func populateProcessEnvironment(env []string) error { + return nil + } + ++// verifyCwd ensures that the current directory is actually inside the mount ++// namespace root of the current process. ++func verifyCwd() error { ++ // getcwd(2) on Linux detects if cwd is outside of the rootfs of the ++ // current mount namespace root, and in that case prefixes "(unreachable)" ++ // to the returned string. glibc's getcwd(3) and Go's Getwd() both detect ++ // when this happens and return ENOENT rather than returning a non-absolute ++ // path. In both cases we can therefore easily detect if we have an invalid ++ // cwd by checking the return value of getcwd(3). See getcwd(3) for more ++ // details, and CVE-2024-21626 for the security issue that motivated this ++ // check. ++ // ++ // We have to use unix.Getwd() here because os.Getwd() has a workaround for ++ // $PWD which involves doing stat(.), which can fail if the current ++ // directory is inaccessible to the container process. ++ if wd, err := unix.Getwd(); err == unix.ENOENT { ++ return errors.New("current working directory is outside of container mount namespace root -- possible container breakout detected") ++ } else if err != nil { ++ return fmt.Errorf("failed to verify if current working directory is safe: %w", err) ++ } else if !filepath.IsAbs(wd) { ++ // We shouldn't ever hit this, but check just in case. ++ return fmt.Errorf("current working directory is not absolute -- possible container breakout detected: cwd is %q", wd) ++ } ++ return nil ++} ++ + // finalizeNamespace drops the caps, sets the correct user + // and working dir, and closes any leaked file descriptors + // before executing the command inside the namespace +@@ -193,6 +220,10 @@ func finalizeNamespace(config *initConfig) error { + return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err) + } + } ++ // Make sure our final working directory is inside the container. ++ if err := verifyCwd(); err != nil { ++ return err ++ } + if err := system.ClearKeepCaps(); err != nil { + return fmt.Errorf("unable to clear keep caps: %w", err) + } +diff a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index 09ab552b3d12..e891773ec578 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go