diff --git a/daemon/execdriver/lxc/driver.go b/daemon/execdriver/lxc/driver.go index 1232d608a3..6ee7f3c1dd 100644 --- a/daemon/execdriver/lxc/driver.go +++ b/daemon/execdriver/lxc/driver.go @@ -5,6 +5,7 @@ import ( "github.com/dotcloud/docker/daemon/execdriver" "github.com/dotcloud/docker/pkg/cgroups" "github.com/dotcloud/docker/pkg/label" + "github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/utils" "io/ioutil" "log" @@ -42,6 +43,10 @@ func init() { return err } + if err := system.CloseFdsFrom(3); err != nil { + return err + } + if err := changeUser(args); err != nil { return err } diff --git a/hack/dind b/hack/dind index 94147f5324..e3641a342f 100755 --- a/hack/dind +++ b/hack/dind @@ -70,22 +70,6 @@ grep -q :devices: /proc/1/cgroup || grep -qw devices /proc/1/cgroup || echo "WARNING: it looks like the 'devices' cgroup is not mounted." -# Now, close extraneous file descriptors. -pushd /proc/self/fd >/dev/null -for FD in * -do - case "$FD" in - # Keep stdin/stdout/stderr - [012]) - ;; - # Nuke everything else - *) - eval exec "$FD>&-" - ;; - esac -done -popd >/dev/null - # Mount /tmp mount -t tmpfs none /tmp diff --git a/hack/make/test-integration-cli b/hack/make/test-integration-cli index adb9be024c..f2128a26ac 100644 --- a/hack/make/test-integration-cli +++ b/hack/make/test-integration-cli @@ -19,6 +19,9 @@ bundle_test_integration_cli() { false fi + # intentionally open a couple bogus file descriptors to help test that they get scrubbed in containers + exec 41>&1 42>&2 + ( set -x; exec \ docker --daemon --debug \ --storage-driver "$DOCKER_GRAPHDRIVER" \ diff --git a/integration-cli/docker_cli_run_test.go b/integration-cli/docker_cli_run_test.go index 4a770bef1e..83867267ae 100644 --- a/integration-cli/docker_cli_run_test.go +++ b/integration-cli/docker_cli_run_test.go @@ -92,6 +92,22 @@ func TestDockerRunEchoNamedContainer(t *testing.T) { logDone("run - echo with named container") } +// docker run should not leak file descriptors +func TestDockerRunLeakyFileDescriptors(t *testing.T) { + runCmd := exec.Command(dockerBinary, "run", "busybox", "ls", "-C", "/proc/self/fd") + out, _, _, err := runCommandWithStdoutStderr(runCmd) + errorOut(err, t, out) + + // normally, we should only get 0, 1, and 2, but 3 gets created by "ls" when it does "opendir" on the "fd" directory + if out != "0 1 2 3\n" { + t.Errorf("container should've printed '0 1 2 3', not: %s", out) + } + + deleteAllContainers() + + logDone("run - check file descriptor leakage") +} + // it should be possible to ping Google DNS resolver // this will fail when Internet access is unavailable func TestDockerRunPingGoogle(t *testing.T) { diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 4e50bc513b..28f056af0d 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -117,12 +117,16 @@ func setupNetwork(container *libcontainer.Container, context libcontainer.Contex return nil } -// finalizeNamespace drops the caps and sets the correct user -// and working dir before execing the command inside the namespace +// finalizeNamespace drops the caps, sets the correct user +// and working dir, and closes any leaky file descriptors +// before execing the command inside the namespace func finalizeNamespace(container *libcontainer.Container) error { if err := capabilities.DropCapabilities(container); err != nil { return fmt.Errorf("drop capabilities %s", err) } + if err := system.CloseFdsFrom(3); err != nil { + return fmt.Errorf("close open file descriptors %s", err) + } if err := setupUser(container); err != nil { return fmt.Errorf("setup user %s", err) } diff --git a/pkg/system/fds_linux.go b/pkg/system/fds_linux.go new file mode 100644 index 0000000000..53d2299d3e --- /dev/null +++ b/pkg/system/fds_linux.go @@ -0,0 +1,38 @@ +package system + +import ( + "io/ioutil" + "strconv" + "syscall" +) + +// Works similarly to OpenBSD's "closefrom(2)": +// The closefrom() call deletes all descriptors numbered fd and higher from +// the per-process file descriptor table. It is effectively the same as +// calling close(2) on each descriptor. +// http://www.openbsd.org/cgi-bin/man.cgi?query=closefrom&sektion=2 +// +// See also http://stackoverflow.com/a/918469/433558 +func CloseFdsFrom(minFd int) error { + fdList, err := ioutil.ReadDir("/proc/self/fd") + if err != nil { + return err + } + for _, fi := range fdList { + fd, err := strconv.Atoi(fi.Name()) + if err != nil { + // ignore non-numeric file names + continue + } + + if fd < minFd { + // ignore descriptors lower than our specified minimum + continue + } + + // intentionally ignore errors from syscall.Close + syscall.Close(fd) + // the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall) + } + return nil +} diff --git a/pkg/system/fds_unsupported.go b/pkg/system/fds_unsupported.go new file mode 100644 index 0000000000..c1e08e82d3 --- /dev/null +++ b/pkg/system/fds_unsupported.go @@ -0,0 +1,12 @@ +// +build !linux + +package system + +import ( + "fmt" + "runtime" +) + +func CloseFdsFrom(minFd int) error { + return fmt.Errorf("CloseFdsFrom is unsupported on this platform (%s/%s)", runtime.GOOS, runtime.GOARCH) +}