Merge pull request #5529 from crosbymichael/restrict-proc

Mount /proc and /sys read-only, except in privileged containers
This commit is contained in:
Guillaume J. Charmes 2014-05-02 10:52:53 -07:00
Родитель b372c19b38 76fa7d588a
Коммит 1c5a3123cc
12 изменённых файлов: 106 добавлений и 132 удалений

Просмотреть файл

@ -2,11 +2,6 @@ package lxc
import ( import (
"fmt" "fmt"
"github.com/dotcloud/docker/daemon/execdriver"
"github.com/dotcloud/docker/pkg/cgroups"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/system"
"github.com/dotcloud/docker/utils"
"io/ioutil" "io/ioutil"
"log" "log"
"os" "os"
@ -17,6 +12,13 @@ import (
"strings" "strings"
"syscall" "syscall"
"time" "time"
"github.com/dotcloud/docker/daemon/execdriver"
"github.com/dotcloud/docker/pkg/cgroups"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
"github.com/dotcloud/docker/pkg/system"
"github.com/dotcloud/docker/utils"
) )
const DriverName = "lxc" const DriverName = "lxc"
@ -26,27 +28,26 @@ func init() {
if err := setupEnv(args); err != nil { if err := setupEnv(args); err != nil {
return err return err
} }
if err := setupHostname(args); err != nil { if err := setupHostname(args); err != nil {
return err return err
} }
if err := setupNetworking(args); err != nil { if err := setupNetworking(args); err != nil {
return err return err
} }
if !args.Privileged {
if err := restrict.Restrict(); err != nil {
return err
}
}
if err := setupCapabilities(args); err != nil { if err := setupCapabilities(args); err != nil {
return err return err
} }
if err := setupWorkingDirectory(args); err != nil { if err := setupWorkingDirectory(args); err != nil {
return err return err
} }
if err := system.CloseFdsFrom(3); err != nil { if err := system.CloseFdsFrom(3); err != nil {
return err return err
} }
if err := changeUser(args); err != nil { if err := changeUser(args); err != nil {
return err return err
} }
@ -64,10 +65,9 @@ func init() {
} }
type driver struct { type driver struct {
root string // root path for the driver to use root string // root path for the driver to use
apparmor bool apparmor bool
sharedRoot bool sharedRoot bool
restrictionPath string
} }
func NewDriver(root string, apparmor bool) (*driver, error) { func NewDriver(root string, apparmor bool) (*driver, error) {
@ -75,15 +75,10 @@ func NewDriver(root string, apparmor bool) (*driver, error) {
if err := linkLxcStart(root); err != nil { if err := linkLxcStart(root); err != nil {
return nil, err return nil, err
} }
restrictionPath := filepath.Join(root, "empty")
if err := os.MkdirAll(restrictionPath, 0700); err != nil {
return nil, err
}
return &driver{ return &driver{
apparmor: apparmor, apparmor: apparmor,
root: root, root: root,
sharedRoot: rootIsShared(), sharedRoot: rootIsShared(),
restrictionPath: restrictionPath,
}, nil }, nil
} }
@ -414,16 +409,14 @@ func (d *driver) generateLXCConfig(c *execdriver.Command) (string, error) {
if err := LxcTemplateCompiled.Execute(fo, struct { if err := LxcTemplateCompiled.Execute(fo, struct {
*execdriver.Command *execdriver.Command
AppArmor bool AppArmor bool
ProcessLabel string ProcessLabel string
MountLabel string MountLabel string
RestrictionSource string
}{ }{
Command: c, Command: c,
AppArmor: d.apparmor, AppArmor: d.apparmor,
ProcessLabel: process, ProcessLabel: process,
MountLabel: mount, MountLabel: mount,
RestrictionSource: d.restrictionPath,
}); err != nil { }); err != nil {
return "", err return "", err
} }

Просмотреть файл

@ -1,10 +1,11 @@
package lxc package lxc
import ( import (
"github.com/dotcloud/docker/daemon/execdriver"
"github.com/dotcloud/docker/pkg/label"
"strings" "strings"
"text/template" "text/template"
"github.com/dotcloud/docker/daemon/execdriver"
"github.com/dotcloud/docker/pkg/label"
) )
const LxcTemplate = ` const LxcTemplate = `
@ -82,15 +83,12 @@ lxc.pivotdir = lxc_putold
# NOTICE: These mounts must be applied within the namespace # NOTICE: These mounts must be applied within the namespace
# WARNING: procfs is a known attack vector and should probably be disabled # WARNING: mounting procfs and/or sysfs read-write is a known attack vector.
# if your userspace allows it. eg. see http://blog.zx2c4.com/749 # See e.g. http://blog.zx2c4.com/749 and http://bit.ly/T9CkqJ
# We mount them read-write here, but later, dockerinit will call the Restrict() function to remount them read-only.
# We cannot mount them directly read-only, because that would prevent loading AppArmor profiles.
lxc.mount.entry = proc {{escapeFstabSpaces $ROOTFS}}/proc proc nosuid,nodev,noexec 0 0 lxc.mount.entry = proc {{escapeFstabSpaces $ROOTFS}}/proc proc nosuid,nodev,noexec 0 0
# WARNING: sysfs is a known attack vector and should probably be disabled
# if your userspace allows it. eg. see http://bit.ly/T9CkqJ
{{if .Privileged}}
lxc.mount.entry = sysfs {{escapeFstabSpaces $ROOTFS}}/sys sysfs nosuid,nodev,noexec 0 0 lxc.mount.entry = sysfs {{escapeFstabSpaces $ROOTFS}}/sys sysfs nosuid,nodev,noexec 0 0
{{end}}
{{if .Tty}} {{if .Tty}}
lxc.mount.entry = {{.Console}} {{escapeFstabSpaces $ROOTFS}}/dev/console none bind,rw 0 0 lxc.mount.entry = {{.Console}} {{escapeFstabSpaces $ROOTFS}}/dev/console none bind,rw 0 0
@ -111,15 +109,8 @@ lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabS
{{if .AppArmor}} {{if .AppArmor}}
lxc.aa_profile = unconfined lxc.aa_profile = unconfined
{{else}} {{else}}
# not unconfined # Let AppArmor normal confinement take place (i.e., not unconfined)
{{end}} {{end}}
{{else}}
# restrict access to proc
lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/sys none bind,ro 0 0
lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/irq none bind,ro 0 0
lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/acpi none bind,ro 0 0
lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/sysrq-trigger none bind,ro 0 0
lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/kcore none bind,ro 0 0
{{end}} {{end}}
# limits # limits

Просмотреть файл

@ -24,7 +24,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container
container.Cgroups.Name = c.ID container.Cgroups.Name = c.ID
// check to see if we are running in ramdisk to disable pivot root // check to see if we are running in ramdisk to disable pivot root
container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != "" container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
container.Context["restriction_path"] = d.restrictionPath container.Context["restrictions"] = "true"
if err := d.createNetwork(container, c); err != nil { if err := d.createNetwork(container, c); err != nil {
return nil, err return nil, err
@ -84,9 +84,7 @@ func (d *driver) setPrivileged(container *libcontainer.Container) error {
} }
container.Cgroups.DeviceAccess = true container.Cgroups.DeviceAccess = true
// add sysfs as a mount for privileged containers delete(container.Context, "restrictions")
container.Mounts = append(container.Mounts, libcontainer.Mount{Type: "sysfs"})
delete(container.Context, "restriction_path")
if apparmor.IsEnabled() { if apparmor.IsEnabled() {
container.Context["apparmor_profile"] = "unconfined" container.Context["apparmor_profile"] = "unconfined"

Просмотреть файл

@ -57,7 +57,6 @@ type driver struct {
root string root string
initPath string initPath string
activeContainers map[string]*exec.Cmd activeContainers map[string]*exec.Cmd
restrictionPath string
} }
func NewDriver(root, initPath string) (*driver, error) { func NewDriver(root, initPath string) (*driver, error) {
@ -68,14 +67,8 @@ func NewDriver(root, initPath string) (*driver, error) {
if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil { if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil {
return nil, err return nil, err
} }
restrictionPath := filepath.Join(root, "empty")
if err := os.MkdirAll(restrictionPath, 0700); err != nil {
return nil, err
}
return &driver{ return &driver{
root: root, root: root,
restrictionPath: restrictionPath,
initPath: initPath, initPath: initPath,
activeContainers: make(map[string]*exec.Cmd), activeContainers: make(map[string]*exec.Cmd),
}, nil }, nil

Просмотреть файл

@ -725,24 +725,46 @@ func TestUnPrivilegedCannotMount(t *testing.T) {
logDone("run - test un-privileged cannot mount") logDone("run - test un-privileged cannot mount")
} }
func TestSysNotAvaliableInNonPrivilegedContainers(t *testing.T) { func TestSysNotWritableInNonPrivilegedContainers(t *testing.T) {
cmd := exec.Command(dockerBinary, "run", "busybox", "ls", "/sys/kernel") cmd := exec.Command(dockerBinary, "run", "busybox", "touch", "/sys/kernel/profiling")
if code, err := runCommand(cmd); err == nil || code == 0 { if code, err := runCommand(cmd); err == nil || code == 0 {
t.Fatal("sys should not be available in a non privileged container") t.Fatal("sys should not be writable in a non privileged container")
} }
deleteAllContainers() deleteAllContainers()
logDone("run - sys not avaliable in non privileged container") logDone("run - sys not writable in non privileged container")
} }
func TestSysAvaliableInPrivilegedContainers(t *testing.T) { func TestSysWritableInPrivilegedContainers(t *testing.T) {
cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "ls", "/sys/kernel") cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "touch", "/sys/kernel/profiling")
if code, err := runCommand(cmd); err != nil || code != 0 { if code, err := runCommand(cmd); err != nil || code != 0 {
t.Fatalf("sys should be available in privileged container") t.Fatalf("sys should be writable in privileged container")
} }
deleteAllContainers() deleteAllContainers()
logDone("run - sys avaliable in privileged container") logDone("run - sys writable in privileged container")
}
func TestProcNotWritableInNonPrivilegedContainers(t *testing.T) {
cmd := exec.Command(dockerBinary, "run", "busybox", "touch", "/proc/sysrq-trigger")
if code, err := runCommand(cmd); err == nil || code == 0 {
t.Fatal("proc should not be writable in a non privileged container")
}
deleteAllContainers()
logDone("run - proc not writable in non privileged container")
}
func TestProcWritableInPrivilegedContainers(t *testing.T) {
cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "touch", "/proc/sysrq-trigger")
if code, err := runCommand(cmd); err != nil || code != 0 {
t.Fatalf("proc should be writable in privileged container")
}
deleteAllContainers()
logDone("run - proc writable in privileged container")
} }

Просмотреть файл

@ -20,7 +20,7 @@ func IsEnabled() bool {
return false return false
} }
func ApplyProfile(pid int, name string) error { func ApplyProfile(name string) error {
if name == "" { if name == "" {
return nil return nil
} }

Просмотреть файл

@ -2,12 +2,10 @@
package apparmor package apparmor
import ()
func IsEnabled() bool { func IsEnabled() bool {
return false return false
} }
func ApplyProfile(pid int, name string) error { func ApplyProfile(name string) error {
return nil return nil
} }

Просмотреть файл

@ -4,11 +4,12 @@ package console
import ( import (
"fmt" "fmt"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/system"
"os" "os"
"path/filepath" "path/filepath"
"syscall" "syscall"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/system"
) )
// Setup initializes the proper /dev/console inside the rootfs path // Setup initializes the proper /dev/console inside the rootfs path

Просмотреть файл

@ -11,7 +11,6 @@ import (
"github.com/dotcloud/docker/pkg/label" "github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes" "github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
"github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/system"
) )
@ -51,11 +50,6 @@ func InitializeMountNamespace(rootfs, console string, container *libcontainer.Co
if err := nodes.CopyN(rootfs, nodes.DefaultNodes); err != nil { if err := nodes.CopyN(rootfs, nodes.DefaultNodes); err != nil {
return fmt.Errorf("copy dev nodes %s", err) return fmt.Errorf("copy dev nodes %s", err)
} }
if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" {
if err := restrict.Restrict(rootfs, restrictionPath); err != nil {
return fmt.Errorf("restrict %s", err)
}
}
if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil { if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
return err return err
} }
@ -124,22 +118,17 @@ func setupBindmounts(rootfs string, bindMounts libcontainer.Mounts) error {
} }
// TODO: this is crappy right now and should be cleaned up with a better way of handling system and // TODO: this is crappy right now and should be cleaned up with a better way of handling system and
// standard bind mounts allowing them to be more dymanic // standard bind mounts allowing them to be more dynamic
func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mount { func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mount {
systemMounts := []mount{ systemMounts := []mount{
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags}, {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
} }
if len(mounts.OfType("devtmpfs")) == 1 { if len(mounts.OfType("devtmpfs")) == 1 {
systemMounts = append(systemMounts, mount{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)}) systemMounts = append(systemMounts, mount{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)})
} }
systemMounts = append(systemMounts,
mount{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
mount{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
)
if len(mounts.OfType("sysfs")) == 1 {
systemMounts = append(systemMounts, mount{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags})
}
return systemMounts return systemMounts
} }

Просмотреть файл

@ -16,6 +16,7 @@ import (
"github.com/dotcloud/docker/pkg/libcontainer/mount" "github.com/dotcloud/docker/pkg/libcontainer/mount"
"github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/libcontainer/network"
"github.com/dotcloud/docker/pkg/libcontainer/security/capabilities" "github.com/dotcloud/docker/pkg/libcontainer/security/capabilities"
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
"github.com/dotcloud/docker/pkg/libcontainer/utils" "github.com/dotcloud/docker/pkg/libcontainer/utils"
"github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/system"
"github.com/dotcloud/docker/pkg/user" "github.com/dotcloud/docker/pkg/user"
@ -68,18 +69,23 @@ func Init(container *libcontainer.Container, uncleanRootfs, consolePath string,
if err := system.Sethostname(container.Hostname); err != nil { if err := system.Sethostname(container.Hostname); err != nil {
return fmt.Errorf("sethostname %s", err) return fmt.Errorf("sethostname %s", err)
} }
if err := FinalizeNamespace(container); err != nil {
return fmt.Errorf("finalize namespace %s", err)
}
runtime.LockOSThread() runtime.LockOSThread()
if err := apparmor.ApplyProfile(os.Getpid(), container.Context["apparmor_profile"]); err != nil { if err := apparmor.ApplyProfile(container.Context["apparmor_profile"]); err != nil {
return err return fmt.Errorf("set apparmor profile %s: %s", container.Context["apparmor_profile"], err)
} }
if err := label.SetProcessLabel(container.Context["process_label"]); err != nil { if err := label.SetProcessLabel(container.Context["process_label"]); err != nil {
return fmt.Errorf("set process label %s", err) return fmt.Errorf("set process label %s", err)
} }
if container.Context["restrictions"] != "" {
if err := restrict.Restrict(); err != nil {
return err
}
}
if err := FinalizeNamespace(container); err != nil {
return fmt.Errorf("finalize namespace %s", err)
}
return system.Execv(args[0], args[0:], container.Env) return system.Execv(args[0], args[0:], container.Env)
} }

Просмотреть файл

@ -1,51 +1,25 @@
// +build linux
package restrict package restrict
import ( import (
"fmt" "fmt"
"os"
"path/filepath"
"syscall" "syscall"
"github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/system"
) )
const flags = syscall.MS_BIND | syscall.MS_REC | syscall.MS_RDONLY // This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts).
// However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes).
var restrictions = map[string]string{ func Restrict() error {
// dirs // remount proc and sys as readonly
"/proc/sys": "", for _, dest := range []string{"proc", "sys"} {
"/proc/irq": "", if err := system.Mount("", dest, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
"/proc/acpi": "", return fmt.Errorf("unable to remount %s readonly: %s", dest, err)
// files
"/proc/sysrq-trigger": "/dev/null",
"/proc/kcore": "/dev/null",
}
// Restrict locks down access to many areas of proc
// by using the asumption that the user does not have mount caps to
// revert the changes made here
func Restrict(rootfs, empty string) error {
for dest, source := range restrictions {
dest = filepath.Join(rootfs, dest)
// we don't have a "/dev/null" for dirs so have the requester pass a dir
// for us to bind mount
switch source {
case "":
source = empty
default:
source = filepath.Join(rootfs, source)
}
if err := system.Mount(source, dest, "bind", flags, ""); err != nil {
if os.IsNotExist(err) {
continue
}
return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
}
if err := system.Mount("", dest, "bind", flags|syscall.MS_REMOUNT, ""); err != nil {
return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
} }
} }
if err := system.Mount("/dev/null", "/proc/kcore", "", syscall.MS_BIND, ""); err != nil {
return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore")
}
return nil return nil
} }

Просмотреть файл

@ -0,0 +1,9 @@
// +build !linux
package restrict
import "fmt"
func Restrict() error {
return fmt.Errorf("not supported")
}