зеркало из https://github.com/microsoft/docker.git
Merge pull request #5529 from crosbymichael/restrict-proc
Mount /proc and /sys read-only, except in privileged containers
This commit is contained in:
Коммит
1c5a3123cc
|
@ -2,11 +2,6 @@ package lxc
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/dotcloud/docker/daemon/execdriver"
|
||||
"github.com/dotcloud/docker/pkg/cgroups"
|
||||
"github.com/dotcloud/docker/pkg/label"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
"github.com/dotcloud/docker/utils"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
|
@ -17,6 +12,13 @@ import (
|
|||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/dotcloud/docker/daemon/execdriver"
|
||||
"github.com/dotcloud/docker/pkg/cgroups"
|
||||
"github.com/dotcloud/docker/pkg/label"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
"github.com/dotcloud/docker/utils"
|
||||
)
|
||||
|
||||
const DriverName = "lxc"
|
||||
|
@ -26,27 +28,26 @@ func init() {
|
|||
if err := setupEnv(args); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := setupHostname(args); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := setupNetworking(args); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !args.Privileged {
|
||||
if err := restrict.Restrict(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := setupCapabilities(args); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := setupWorkingDirectory(args); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := system.CloseFdsFrom(3); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := changeUser(args); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -64,10 +65,9 @@ func init() {
|
|||
}
|
||||
|
||||
type driver struct {
|
||||
root string // root path for the driver to use
|
||||
apparmor bool
|
||||
sharedRoot bool
|
||||
restrictionPath string
|
||||
root string // root path for the driver to use
|
||||
apparmor bool
|
||||
sharedRoot bool
|
||||
}
|
||||
|
||||
func NewDriver(root string, apparmor bool) (*driver, error) {
|
||||
|
@ -75,15 +75,10 @@ func NewDriver(root string, apparmor bool) (*driver, error) {
|
|||
if err := linkLxcStart(root); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
restrictionPath := filepath.Join(root, "empty")
|
||||
if err := os.MkdirAll(restrictionPath, 0700); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &driver{
|
||||
apparmor: apparmor,
|
||||
root: root,
|
||||
sharedRoot: rootIsShared(),
|
||||
restrictionPath: restrictionPath,
|
||||
apparmor: apparmor,
|
||||
root: root,
|
||||
sharedRoot: rootIsShared(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -414,16 +409,14 @@ func (d *driver) generateLXCConfig(c *execdriver.Command) (string, error) {
|
|||
|
||||
if err := LxcTemplateCompiled.Execute(fo, struct {
|
||||
*execdriver.Command
|
||||
AppArmor bool
|
||||
ProcessLabel string
|
||||
MountLabel string
|
||||
RestrictionSource string
|
||||
AppArmor bool
|
||||
ProcessLabel string
|
||||
MountLabel string
|
||||
}{
|
||||
Command: c,
|
||||
AppArmor: d.apparmor,
|
||||
ProcessLabel: process,
|
||||
MountLabel: mount,
|
||||
RestrictionSource: d.restrictionPath,
|
||||
Command: c,
|
||||
AppArmor: d.apparmor,
|
||||
ProcessLabel: process,
|
||||
MountLabel: mount,
|
||||
}); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
package lxc
|
||||
|
||||
import (
|
||||
"github.com/dotcloud/docker/daemon/execdriver"
|
||||
"github.com/dotcloud/docker/pkg/label"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
"github.com/dotcloud/docker/daemon/execdriver"
|
||||
"github.com/dotcloud/docker/pkg/label"
|
||||
)
|
||||
|
||||
const LxcTemplate = `
|
||||
|
@ -82,15 +83,12 @@ lxc.pivotdir = lxc_putold
|
|||
|
||||
# NOTICE: These mounts must be applied within the namespace
|
||||
|
||||
# WARNING: procfs is a known attack vector and should probably be disabled
|
||||
# if your userspace allows it. eg. see http://blog.zx2c4.com/749
|
||||
# WARNING: mounting procfs and/or sysfs read-write is a known attack vector.
|
||||
# See e.g. http://blog.zx2c4.com/749 and http://bit.ly/T9CkqJ
|
||||
# We mount them read-write here, but later, dockerinit will call the Restrict() function to remount them read-only.
|
||||
# We cannot mount them directly read-only, because that would prevent loading AppArmor profiles.
|
||||
lxc.mount.entry = proc {{escapeFstabSpaces $ROOTFS}}/proc proc nosuid,nodev,noexec 0 0
|
||||
|
||||
# WARNING: sysfs is a known attack vector and should probably be disabled
|
||||
# if your userspace allows it. eg. see http://bit.ly/T9CkqJ
|
||||
{{if .Privileged}}
|
||||
lxc.mount.entry = sysfs {{escapeFstabSpaces $ROOTFS}}/sys sysfs nosuid,nodev,noexec 0 0
|
||||
{{end}}
|
||||
|
||||
{{if .Tty}}
|
||||
lxc.mount.entry = {{.Console}} {{escapeFstabSpaces $ROOTFS}}/dev/console none bind,rw 0 0
|
||||
|
@ -111,15 +109,8 @@ lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabS
|
|||
{{if .AppArmor}}
|
||||
lxc.aa_profile = unconfined
|
||||
{{else}}
|
||||
# not unconfined
|
||||
# Let AppArmor normal confinement take place (i.e., not unconfined)
|
||||
{{end}}
|
||||
{{else}}
|
||||
# restrict access to proc
|
||||
lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/sys none bind,ro 0 0
|
||||
lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/irq none bind,ro 0 0
|
||||
lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/acpi none bind,ro 0 0
|
||||
lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/sysrq-trigger none bind,ro 0 0
|
||||
lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/kcore none bind,ro 0 0
|
||||
{{end}}
|
||||
|
||||
# limits
|
||||
|
|
|
@ -24,7 +24,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container
|
|||
container.Cgroups.Name = c.ID
|
||||
// check to see if we are running in ramdisk to disable pivot root
|
||||
container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
|
||||
container.Context["restriction_path"] = d.restrictionPath
|
||||
container.Context["restrictions"] = "true"
|
||||
|
||||
if err := d.createNetwork(container, c); err != nil {
|
||||
return nil, err
|
||||
|
@ -84,9 +84,7 @@ func (d *driver) setPrivileged(container *libcontainer.Container) error {
|
|||
}
|
||||
container.Cgroups.DeviceAccess = true
|
||||
|
||||
// add sysfs as a mount for privileged containers
|
||||
container.Mounts = append(container.Mounts, libcontainer.Mount{Type: "sysfs"})
|
||||
delete(container.Context, "restriction_path")
|
||||
delete(container.Context, "restrictions")
|
||||
|
||||
if apparmor.IsEnabled() {
|
||||
container.Context["apparmor_profile"] = "unconfined"
|
||||
|
|
|
@ -57,7 +57,6 @@ type driver struct {
|
|||
root string
|
||||
initPath string
|
||||
activeContainers map[string]*exec.Cmd
|
||||
restrictionPath string
|
||||
}
|
||||
|
||||
func NewDriver(root, initPath string) (*driver, error) {
|
||||
|
@ -68,14 +67,8 @@ func NewDriver(root, initPath string) (*driver, error) {
|
|||
if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
restrictionPath := filepath.Join(root, "empty")
|
||||
if err := os.MkdirAll(restrictionPath, 0700); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &driver{
|
||||
root: root,
|
||||
restrictionPath: restrictionPath,
|
||||
initPath: initPath,
|
||||
activeContainers: make(map[string]*exec.Cmd),
|
||||
}, nil
|
||||
|
|
|
@ -725,24 +725,46 @@ func TestUnPrivilegedCannotMount(t *testing.T) {
|
|||
logDone("run - test un-privileged cannot mount")
|
||||
}
|
||||
|
||||
func TestSysNotAvaliableInNonPrivilegedContainers(t *testing.T) {
|
||||
cmd := exec.Command(dockerBinary, "run", "busybox", "ls", "/sys/kernel")
|
||||
func TestSysNotWritableInNonPrivilegedContainers(t *testing.T) {
|
||||
cmd := exec.Command(dockerBinary, "run", "busybox", "touch", "/sys/kernel/profiling")
|
||||
if code, err := runCommand(cmd); err == nil || code == 0 {
|
||||
t.Fatal("sys should not be available in a non privileged container")
|
||||
t.Fatal("sys should not be writable in a non privileged container")
|
||||
}
|
||||
|
||||
deleteAllContainers()
|
||||
|
||||
logDone("run - sys not avaliable in non privileged container")
|
||||
logDone("run - sys not writable in non privileged container")
|
||||
}
|
||||
|
||||
func TestSysAvaliableInPrivilegedContainers(t *testing.T) {
|
||||
cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "ls", "/sys/kernel")
|
||||
func TestSysWritableInPrivilegedContainers(t *testing.T) {
|
||||
cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "touch", "/sys/kernel/profiling")
|
||||
if code, err := runCommand(cmd); err != nil || code != 0 {
|
||||
t.Fatalf("sys should be available in privileged container")
|
||||
t.Fatalf("sys should be writable in privileged container")
|
||||
}
|
||||
|
||||
deleteAllContainers()
|
||||
|
||||
logDone("run - sys avaliable in privileged container")
|
||||
logDone("run - sys writable in privileged container")
|
||||
}
|
||||
|
||||
func TestProcNotWritableInNonPrivilegedContainers(t *testing.T) {
|
||||
cmd := exec.Command(dockerBinary, "run", "busybox", "touch", "/proc/sysrq-trigger")
|
||||
if code, err := runCommand(cmd); err == nil || code == 0 {
|
||||
t.Fatal("proc should not be writable in a non privileged container")
|
||||
}
|
||||
|
||||
deleteAllContainers()
|
||||
|
||||
logDone("run - proc not writable in non privileged container")
|
||||
}
|
||||
|
||||
func TestProcWritableInPrivilegedContainers(t *testing.T) {
|
||||
cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "touch", "/proc/sysrq-trigger")
|
||||
if code, err := runCommand(cmd); err != nil || code != 0 {
|
||||
t.Fatalf("proc should be writable in privileged container")
|
||||
}
|
||||
|
||||
deleteAllContainers()
|
||||
|
||||
logDone("run - proc writable in privileged container")
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ func IsEnabled() bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func ApplyProfile(pid int, name string) error {
|
||||
func ApplyProfile(name string) error {
|
||||
if name == "" {
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -2,12 +2,10 @@
|
|||
|
||||
package apparmor
|
||||
|
||||
import ()
|
||||
|
||||
func IsEnabled() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func ApplyProfile(pid int, name string) error {
|
||||
func ApplyProfile(name string) error {
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -4,11 +4,12 @@ package console
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/dotcloud/docker/pkg/label"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/dotcloud/docker/pkg/label"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
)
|
||||
|
||||
// Setup initializes the proper /dev/console inside the rootfs path
|
||||
|
|
|
@ -11,7 +11,6 @@ import (
|
|||
"github.com/dotcloud/docker/pkg/label"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
)
|
||||
|
||||
|
@ -51,11 +50,6 @@ func InitializeMountNamespace(rootfs, console string, container *libcontainer.Co
|
|||
if err := nodes.CopyN(rootfs, nodes.DefaultNodes); err != nil {
|
||||
return fmt.Errorf("copy dev nodes %s", err)
|
||||
}
|
||||
if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" {
|
||||
if err := restrict.Restrict(rootfs, restrictionPath); err != nil {
|
||||
return fmt.Errorf("restrict %s", err)
|
||||
}
|
||||
}
|
||||
if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -124,22 +118,17 @@ func setupBindmounts(rootfs string, bindMounts libcontainer.Mounts) error {
|
|||
}
|
||||
|
||||
// TODO: this is crappy right now and should be cleaned up with a better way of handling system and
|
||||
// standard bind mounts allowing them to be more dymanic
|
||||
// standard bind mounts allowing them to be more dynamic
|
||||
func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mount {
|
||||
systemMounts := []mount{
|
||||
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
|
||||
{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
|
||||
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
|
||||
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
|
||||
}
|
||||
|
||||
if len(mounts.OfType("devtmpfs")) == 1 {
|
||||
systemMounts = append(systemMounts, mount{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)})
|
||||
}
|
||||
systemMounts = append(systemMounts,
|
||||
mount{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
|
||||
mount{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
|
||||
)
|
||||
|
||||
if len(mounts.OfType("sysfs")) == 1 {
|
||||
systemMounts = append(systemMounts, mount{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags})
|
||||
}
|
||||
return systemMounts
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@ import (
|
|||
"github.com/dotcloud/docker/pkg/libcontainer/mount"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/network"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/security/capabilities"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/utils"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
"github.com/dotcloud/docker/pkg/user"
|
||||
|
@ -68,18 +69,23 @@ func Init(container *libcontainer.Container, uncleanRootfs, consolePath string,
|
|||
if err := system.Sethostname(container.Hostname); err != nil {
|
||||
return fmt.Errorf("sethostname %s", err)
|
||||
}
|
||||
if err := FinalizeNamespace(container); err != nil {
|
||||
return fmt.Errorf("finalize namespace %s", err)
|
||||
}
|
||||
|
||||
runtime.LockOSThread()
|
||||
|
||||
if err := apparmor.ApplyProfile(os.Getpid(), container.Context["apparmor_profile"]); err != nil {
|
||||
return err
|
||||
if err := apparmor.ApplyProfile(container.Context["apparmor_profile"]); err != nil {
|
||||
return fmt.Errorf("set apparmor profile %s: %s", container.Context["apparmor_profile"], err)
|
||||
}
|
||||
if err := label.SetProcessLabel(container.Context["process_label"]); err != nil {
|
||||
return fmt.Errorf("set process label %s", err)
|
||||
}
|
||||
if container.Context["restrictions"] != "" {
|
||||
if err := restrict.Restrict(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := FinalizeNamespace(container); err != nil {
|
||||
return fmt.Errorf("finalize namespace %s", err)
|
||||
}
|
||||
return system.Execv(args[0], args[0:], container.Env)
|
||||
}
|
||||
|
||||
|
|
|
@ -1,51 +1,25 @@
|
|||
// +build linux
|
||||
|
||||
package restrict
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
)
|
||||
|
||||
const flags = syscall.MS_BIND | syscall.MS_REC | syscall.MS_RDONLY
|
||||
|
||||
var restrictions = map[string]string{
|
||||
// dirs
|
||||
"/proc/sys": "",
|
||||
"/proc/irq": "",
|
||||
"/proc/acpi": "",
|
||||
|
||||
// files
|
||||
"/proc/sysrq-trigger": "/dev/null",
|
||||
"/proc/kcore": "/dev/null",
|
||||
}
|
||||
|
||||
// Restrict locks down access to many areas of proc
|
||||
// by using the asumption that the user does not have mount caps to
|
||||
// revert the changes made here
|
||||
func Restrict(rootfs, empty string) error {
|
||||
for dest, source := range restrictions {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
|
||||
// we don't have a "/dev/null" for dirs so have the requester pass a dir
|
||||
// for us to bind mount
|
||||
switch source {
|
||||
case "":
|
||||
source = empty
|
||||
default:
|
||||
source = filepath.Join(rootfs, source)
|
||||
}
|
||||
if err := system.Mount(source, dest, "bind", flags, ""); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
|
||||
}
|
||||
if err := system.Mount("", dest, "bind", flags|syscall.MS_REMOUNT, ""); err != nil {
|
||||
return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
|
||||
// This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts).
|
||||
// However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes).
|
||||
func Restrict() error {
|
||||
// remount proc and sys as readonly
|
||||
for _, dest := range []string{"proc", "sys"} {
|
||||
if err := system.Mount("", dest, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
|
||||
return fmt.Errorf("unable to remount %s readonly: %s", dest, err)
|
||||
}
|
||||
}
|
||||
if err := system.Mount("/dev/null", "/proc/kcore", "", syscall.MS_BIND, ""); err != nil {
|
||||
return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
// +build !linux
|
||||
|
||||
package restrict
|
||||
|
||||
import "fmt"
|
||||
|
||||
func Restrict() error {
|
||||
return fmt.Errorf("not supported")
|
||||
}
|
Загрузка…
Ссылка в новой задаче