зеркало из https://github.com/microsoft/docker.git
Merge pull request #16244 from calavera/libcontainer_0_0_4
Vendor libcontainer v0.0.4
This commit is contained in:
Коммит
dac92a8afb
|
@ -1112,12 +1112,9 @@ func (container *Container) unmountVolumes(forceSyscall bool) error {
|
||||||
|
|
||||||
func (container *Container) networkMounts() []execdriver.Mount {
|
func (container *Container) networkMounts() []execdriver.Mount {
|
||||||
var mounts []execdriver.Mount
|
var mounts []execdriver.Mount
|
||||||
mode := "Z"
|
shared := container.hostConfig.NetworkMode.IsContainer()
|
||||||
if container.hostConfig.NetworkMode.IsContainer() {
|
|
||||||
mode = "z"
|
|
||||||
}
|
|
||||||
if container.ResolvConfPath != "" {
|
if container.ResolvConfPath != "" {
|
||||||
label.Relabel(container.ResolvConfPath, container.MountLabel, mode)
|
label.Relabel(container.ResolvConfPath, container.MountLabel, shared)
|
||||||
writable := !container.hostConfig.ReadonlyRootfs
|
writable := !container.hostConfig.ReadonlyRootfs
|
||||||
if m, exists := container.MountPoints["/etc/resolv.conf"]; exists {
|
if m, exists := container.MountPoints["/etc/resolv.conf"]; exists {
|
||||||
writable = m.RW
|
writable = m.RW
|
||||||
|
@ -1130,7 +1127,7 @@ func (container *Container) networkMounts() []execdriver.Mount {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
if container.HostnamePath != "" {
|
if container.HostnamePath != "" {
|
||||||
label.Relabel(container.HostnamePath, container.MountLabel, mode)
|
label.Relabel(container.HostnamePath, container.MountLabel, shared)
|
||||||
writable := !container.hostConfig.ReadonlyRootfs
|
writable := !container.hostConfig.ReadonlyRootfs
|
||||||
if m, exists := container.MountPoints["/etc/hostname"]; exists {
|
if m, exists := container.MountPoints["/etc/hostname"]; exists {
|
||||||
writable = m.RW
|
writable = m.RW
|
||||||
|
@ -1143,7 +1140,7 @@ func (container *Container) networkMounts() []execdriver.Mount {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
if container.HostsPath != "" {
|
if container.HostsPath != "" {
|
||||||
label.Relabel(container.HostsPath, container.MountLabel, mode)
|
label.Relabel(container.HostsPath, container.MountLabel, shared)
|
||||||
writable := !container.hostConfig.ReadonlyRootfs
|
writable := !container.hostConfig.ReadonlyRootfs
|
||||||
if m, exists := container.MountPoints["/etc/hosts"]; exists {
|
if m, exists := container.MountPoints["/etc/hosts"]; exists {
|
||||||
writable = m.RW
|
writable = m.RW
|
||||||
|
|
|
@ -59,7 +59,7 @@ func createContainerPlatformSpecificSettings(container *Container, config *runco
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := label.Relabel(v.Path(), container.MountLabel, "z"); err != nil {
|
if err := label.Relabel(v.Path(), container.MountLabel, true); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -355,7 +355,8 @@ func (daemon *Daemon) registerMountPoints(container *Container, hostConfig *runc
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := label.Relabel(bind.Source, container.MountLabel, bind.Mode); err != nil {
|
shared := label.IsShared(bind.Mode)
|
||||||
|
if err := label.Relabel(bind.Source, container.MountLabel, shared); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
binds[bind.Destination] = true
|
binds[bind.Destination] = true
|
||||||
|
|
|
@ -42,7 +42,7 @@ clone git github.com/endophage/gotuf 9bcdad0308e34a49f38448b8ad436ad8860825ce
|
||||||
clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16
|
clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16
|
||||||
clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c
|
clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c
|
||||||
|
|
||||||
clone git github.com/opencontainers/runc v0.0.3 # libcontainer
|
clone git github.com/opencontainers/runc v0.0.4 # libcontainer
|
||||||
# libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh)
|
# libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh)
|
||||||
clone git github.com/coreos/go-systemd v3
|
clone git github.com/coreos/go-systemd v3
|
||||||
clone git github.com/godbus/dbus v2
|
clone git github.com/godbus/dbus v2
|
||||||
|
|
|
@ -83,7 +83,7 @@ type data struct {
|
||||||
pid int
|
pid int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Manager) Apply(pid int) error {
|
func (m *Manager) Apply(pid int) (err error) {
|
||||||
if m.Cgroups == nil {
|
if m.Cgroups == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -235,12 +235,12 @@ func getCgroupData(c *configs.Cgroup, pid int) (*data, error) {
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (raw *data) parent(subsystem, mountpoint, src string) (string, error) {
|
func (raw *data) parent(subsystem, mountpoint, root string) (string, error) {
|
||||||
initPath, err := cgroups.GetInitCgroupDir(subsystem)
|
initPath, err := cgroups.GetThisCgroupDir(subsystem)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
relDir, err := filepath.Rel(src, initPath)
|
relDir, err := filepath.Rel(root, initPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
@ -248,7 +248,7 @@ func (raw *data) parent(subsystem, mountpoint, src string) (string, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (raw *data) path(subsystem string) (string, error) {
|
func (raw *data) path(subsystem string) (string, error) {
|
||||||
mnt, src, err := cgroups.FindCgroupMountpointAndSource(subsystem)
|
mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
|
||||||
// If we didn't mount the subsystem, there is no point we make the path.
|
// If we didn't mount the subsystem, there is no point we make the path.
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
|
@ -259,7 +259,7 @@ func (raw *data) path(subsystem string) (string, error) {
|
||||||
return filepath.Join(raw.root, filepath.Base(mnt), raw.cgroup), nil
|
return filepath.Join(raw.root, filepath.Base(mnt), raw.cgroup), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
parent, err := raw.parent(subsystem, mnt, src)
|
parent, err := raw.parent(subsystem, mnt, root)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@ import (
|
||||||
type MemoryGroup struct {
|
type MemoryGroup struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *MemoryGroup) Apply(d *data) error {
|
func (s *MemoryGroup) Apply(d *data) (err error) {
|
||||||
path, err := d.path("memory")
|
path, err := d.path("memory")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if cgroups.IsNotFound(err) {
|
if cgroups.IsNotFound(err) {
|
||||||
|
@ -28,21 +28,22 @@ func (s *MemoryGroup) Apply(d *data) error {
|
||||||
if err := os.MkdirAll(path, 0755); err != nil {
|
if err := os.MkdirAll(path, 0755); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
os.RemoveAll(path)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
if err := s.Set(path, d.c); err != nil {
|
if err := s.Set(path, d.c); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// We need to join memory cgroup after set memory limits, because
|
// We need to join memory cgroup after set memory limits, because
|
||||||
// kmem.limit_in_bytes can only be set when the cgroup is empty.
|
// kmem.limit_in_bytes can only be set when the cgroup is empty.
|
||||||
_, err = d.join("memory")
|
if _, err = d.join("memory"); err != nil {
|
||||||
if err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer func() {
|
|
||||||
if err != nil {
|
|
||||||
os.RemoveAll(path)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,9 @@ const cgroupNamePrefix = "name="
|
||||||
|
|
||||||
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
|
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
|
||||||
func FindCgroupMountpoint(subsystem string) (string, error) {
|
func FindCgroupMountpoint(subsystem string) (string, error) {
|
||||||
|
// We are not using mount.GetMounts() because it's super-inefficient,
|
||||||
|
// parsing it directly sped up x10 times because of not using Sscanf.
|
||||||
|
// It was one of two major performance drawbacks in container start.
|
||||||
f, err := os.Open("/proc/self/mountinfo")
|
f, err := os.Open("/proc/self/mountinfo")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
|
@ -44,7 +47,7 @@ func FindCgroupMountpoint(subsystem string) (string, error) {
|
||||||
return "", NewNotFoundError(subsystem)
|
return "", NewNotFoundError(subsystem)
|
||||||
}
|
}
|
||||||
|
|
||||||
func FindCgroupMountpointAndSource(subsystem string) (string, string, error) {
|
func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
|
||||||
f, err := os.Open("/proc/self/mountinfo")
|
f, err := os.Open("/proc/self/mountinfo")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", "", err
|
return "", "", err
|
||||||
|
@ -69,15 +72,28 @@ func FindCgroupMountpointAndSource(subsystem string) (string, string, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func FindCgroupMountpointDir() (string, error) {
|
func FindCgroupMountpointDir() (string, error) {
|
||||||
mounts, err := mount.GetMounts()
|
f, err := os.Open("/proc/self/mountinfo")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
for _, mount := range mounts {
|
scanner := bufio.NewScanner(f)
|
||||||
if mount.Fstype == "cgroup" {
|
for scanner.Scan() {
|
||||||
return filepath.Dir(mount.Mountpoint), nil
|
text := scanner.Text()
|
||||||
|
fields := strings.Split(text, " ")
|
||||||
|
// Safe as mountinfo encodes mountpoints with spaces as \040.
|
||||||
|
index := strings.Index(text, " - ")
|
||||||
|
postSeparatorFields := strings.Fields(text[index+3:])
|
||||||
|
if len(postSeparatorFields) < 3 {
|
||||||
|
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
|
||||||
}
|
}
|
||||||
|
if postSeparatorFields[0] == "cgroup" {
|
||||||
|
return filepath.Dir(fields[4]), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
return "", NewNotFoundError("cgroup")
|
return "", NewNotFoundError("cgroup")
|
||||||
|
|
|
@ -1,5 +1,11 @@
|
||||||
package configs
|
package configs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"os/exec"
|
||||||
|
)
|
||||||
|
|
||||||
type Rlimit struct {
|
type Rlimit struct {
|
||||||
Type int `json:"type"`
|
Type int `json:"type"`
|
||||||
Hard uint64 `json:"hard"`
|
Hard uint64 `json:"hard"`
|
||||||
|
@ -13,36 +19,46 @@ type IDMap struct {
|
||||||
Size int `json:"size"`
|
Size int `json:"size"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Seccomp represents syscall restrictions
|
||||||
type Seccomp struct {
|
type Seccomp struct {
|
||||||
Syscalls []*Syscall `json:"syscalls"`
|
DefaultAction Action `json:"default_action"`
|
||||||
|
Syscalls []*Syscall `json:"syscalls"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// An action to be taken upon rule match in Seccomp
|
||||||
type Action int
|
type Action int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
Kill Action = iota - 3
|
Kill Action = iota - 4
|
||||||
|
Errno
|
||||||
Trap
|
Trap
|
||||||
Allow
|
Allow
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// A comparison operator to be used when matching syscall arguments in Seccomp
|
||||||
type Operator int
|
type Operator int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
EqualTo Operator = iota
|
EqualTo Operator = iota
|
||||||
NotEqualTo
|
NotEqualTo
|
||||||
GreatherThan
|
GreaterThan
|
||||||
|
GreaterThanOrEqualTo
|
||||||
LessThan
|
LessThan
|
||||||
|
LessThanOrEqualTo
|
||||||
MaskEqualTo
|
MaskEqualTo
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// A rule to match a specific syscall argument in Seccomp
|
||||||
type Arg struct {
|
type Arg struct {
|
||||||
Index int `json:"index"`
|
Index uint `json:"index"`
|
||||||
Value uint32 `json:"value"`
|
Value uint64 `json:"value"`
|
||||||
Op Operator `json:"op"`
|
ValueTwo uint64 `json:"value_two"`
|
||||||
|
Op Operator `json:"op"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// An rule to match a syscall in Seccomp
|
||||||
type Syscall struct {
|
type Syscall struct {
|
||||||
Value int `json:"value"`
|
Name string `json:"name"`
|
||||||
Action Action `json:"action"`
|
Action Action `json:"action"`
|
||||||
Args []*Arg `json:"args"`
|
Args []*Arg `json:"args"`
|
||||||
}
|
}
|
||||||
|
@ -117,6 +133,12 @@ type Config struct {
|
||||||
// If Rlimits are not set, the container will inherit rlimits from the parent process
|
// If Rlimits are not set, the container will inherit rlimits from the parent process
|
||||||
Rlimits []Rlimit `json:"rlimits"`
|
Rlimits []Rlimit `json:"rlimits"`
|
||||||
|
|
||||||
|
// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
|
||||||
|
// for a process. Valid values are between the range [-1000, '1000'], where processes with
|
||||||
|
// higher scores are preferred for being killed.
|
||||||
|
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
|
||||||
|
OomScoreAdj int `json:"oom_score_adj"`
|
||||||
|
|
||||||
// AdditionalGroups specifies the gids that should be added to supplementary groups
|
// AdditionalGroups specifies the gids that should be added to supplementary groups
|
||||||
// in addition to those that the user belongs to.
|
// in addition to those that the user belongs to.
|
||||||
AdditionalGroups []string `json:"additional_groups"`
|
AdditionalGroups []string `json:"additional_groups"`
|
||||||
|
@ -140,7 +162,79 @@ type Config struct {
|
||||||
Sysctl map[string]string `json:"sysctl"`
|
Sysctl map[string]string `json:"sysctl"`
|
||||||
|
|
||||||
// Seccomp allows actions to be taken whenever a syscall is made within the container.
|
// Seccomp allows actions to be taken whenever a syscall is made within the container.
|
||||||
// By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno
|
// A number of rules are given, each having an action to be taken if a syscall matches it.
|
||||||
// can be specified on a per syscall basis.
|
// A default action to be taken if no rules match is also given.
|
||||||
Seccomp *Seccomp `json:"seccomp"`
|
Seccomp *Seccomp `json:"seccomp"`
|
||||||
|
|
||||||
|
// Hooks are a collection of actions to perform at various container lifecycle events.
|
||||||
|
// Hooks are not able to be marshaled to json but they are also not needed to.
|
||||||
|
Hooks *Hooks `json:"-"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Hooks struct {
|
||||||
|
// Prestart commands are executed after the container namespaces are created,
|
||||||
|
// but before the user supplied command is executed from init.
|
||||||
|
Prestart []Hook
|
||||||
|
|
||||||
|
// Poststop commands are executed after the container init process exits.
|
||||||
|
Poststop []Hook
|
||||||
|
}
|
||||||
|
|
||||||
|
// HookState is the payload provided to a hook on execution.
|
||||||
|
type HookState struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Pid int `json:"pid"`
|
||||||
|
Root string `json:"root"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Hook interface {
|
||||||
|
// Run executes the hook with the provided state.
|
||||||
|
Run(HookState) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFunctionHooks will call the provided function when the hook is run.
|
||||||
|
func NewFunctionHook(f func(HookState) error) FuncHook {
|
||||||
|
return FuncHook{
|
||||||
|
run: f,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type FuncHook struct {
|
||||||
|
run func(HookState) error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f FuncHook) Run(s HookState) error {
|
||||||
|
return f.run(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Command struct {
|
||||||
|
Path string `json:"path"`
|
||||||
|
Args []string `json:"args"`
|
||||||
|
Env []string `json:"env"`
|
||||||
|
Dir string `json:"dir"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCommandHooks will execute the provided command when the hook is run.
|
||||||
|
func NewCommandHook(cmd Command) CommandHook {
|
||||||
|
return CommandHook{
|
||||||
|
Command: cmd,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type CommandHook struct {
|
||||||
|
Command
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c Command) Run(s HookState) error {
|
||||||
|
b, err := json.Marshal(s)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
cmd := exec.Cmd{
|
||||||
|
Path: c.Path,
|
||||||
|
Args: c.Args,
|
||||||
|
Env: c.Env,
|
||||||
|
Stdin: bytes.NewReader(b),
|
||||||
|
}
|
||||||
|
return cmd.Run()
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,10 +25,3 @@ type Mount struct {
|
||||||
// Optional Command to be run after Source is mounted.
|
// Optional Command to be run after Source is mounted.
|
||||||
PostmountCmds []Command `json:"postmount_cmds"`
|
PostmountCmds []Command `json:"postmount_cmds"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Command struct {
|
|
||||||
Path string `json:"path"`
|
|
||||||
Args []string `json:"args"`
|
|
||||||
Env []string `json:"env"`
|
|
||||||
Dir string `json:"dir"`
|
|
||||||
}
|
|
||||||
|
|
|
@ -185,6 +185,7 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
|
||||||
parentPipe: parentPipe,
|
parentPipe: parentPipe,
|
||||||
manager: c.cgroupManager,
|
manager: c.cgroupManager,
|
||||||
config: c.newInitConfig(p),
|
config: c.newInitConfig(p),
|
||||||
|
container: c,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -247,6 +248,17 @@ func (c *linuxContainer) Destroy() error {
|
||||||
err = rerr
|
err = rerr
|
||||||
}
|
}
|
||||||
c.initProcess = nil
|
c.initProcess = nil
|
||||||
|
if c.config.Hooks != nil {
|
||||||
|
s := configs.HookState{
|
||||||
|
ID: c.id,
|
||||||
|
Root: c.config.Rootfs,
|
||||||
|
}
|
||||||
|
for _, hook := range c.config.Hooks.Poststop {
|
||||||
|
if err := hook.Run(s); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -299,7 +311,7 @@ func (c *linuxContainer) checkCriuVersion() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
const descriptors_filename = "descriptors.json"
|
const descriptorsFilename = "descriptors.json"
|
||||||
|
|
||||||
func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) {
|
func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) {
|
||||||
mountDest := m.Destination
|
mountDest := m.Destination
|
||||||
|
@ -406,7 +418,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptors_filename), fdsJSON, 0655)
|
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -532,13 +544,19 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for _, i := range criuOpts.VethPairs {
|
||||||
|
veth := new(criurpc.CriuVethPair)
|
||||||
|
veth.IfOut = proto.String(i.HostInterfaceName)
|
||||||
|
veth.IfIn = proto.String(i.ContainerInterfaceName)
|
||||||
|
req.Opts.Veths = append(req.Opts.Veths, veth)
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
fds []string
|
fds []string
|
||||||
fdJSON []byte
|
fdJSON []byte
|
||||||
)
|
)
|
||||||
|
|
||||||
if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptors_filename)); err != nil {
|
if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -568,6 +586,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logPath := filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile())
|
||||||
criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
|
criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
|
||||||
criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
|
criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
|
||||||
defer criuClient.Close()
|
defer criuClient.Close()
|
||||||
|
@ -631,7 +650,8 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if !resp.GetSuccess() {
|
if !resp.GetSuccess() {
|
||||||
return fmt.Errorf("criu failed: type %s errno %d", req.GetType().String(), resp.GetCrErrno())
|
typeString := req.GetType().String()
|
||||||
|
return fmt.Errorf("criu failed: type %s errno %d\nlog file: %s", typeString, resp.GetCrErrno(), logPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
t := resp.GetType()
|
t := resp.GetType()
|
||||||
|
@ -671,7 +691,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if !st.Success() {
|
if !st.Success() {
|
||||||
return fmt.Errorf("criu failed: %s", st.String())
|
return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,11 @@ type CriuPageServerInfo struct {
|
||||||
Port int32 // port number of CRIU page server
|
Port int32 // port number of CRIU page server
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type VethPairName struct {
|
||||||
|
ContainerInterfaceName string
|
||||||
|
HostInterfaceName string
|
||||||
|
}
|
||||||
|
|
||||||
type CriuOpts struct {
|
type CriuOpts struct {
|
||||||
ImagesDirectory string // directory for storing image files
|
ImagesDirectory string // directory for storing image files
|
||||||
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
|
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
|
||||||
|
@ -14,4 +19,5 @@ type CriuOpts struct {
|
||||||
ShellJob bool // allow to dump and restore shell jobs
|
ShellJob bool // allow to dump and restore shell jobs
|
||||||
FileLocks bool // handle file locks, for safety
|
FileLocks bool // handle file locks, for safety
|
||||||
PageServer CriuPageServerInfo // allow to dump to criu page server
|
PageServer CriuPageServerInfo // allow to dump to criu page server
|
||||||
|
VethPairs []VethPairName // pass the veth to criu when restore
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,9 @@ package libcontainer
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
|
||||||
|
@ -13,7 +15,6 @@ import (
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
"github.com/opencontainers/runc/libcontainer/netlink"
|
"github.com/opencontainers/runc/libcontainer/netlink"
|
||||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
|
||||||
"github.com/opencontainers/runc/libcontainer/system"
|
"github.com/opencontainers/runc/libcontainer/system"
|
||||||
"github.com/opencontainers/runc/libcontainer/user"
|
"github.com/opencontainers/runc/libcontainer/user"
|
||||||
"github.com/opencontainers/runc/libcontainer/utils"
|
"github.com/opencontainers/runc/libcontainer/utils"
|
||||||
|
@ -239,6 +240,11 @@ func setupRlimits(config *configs.Config) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func setOomScoreAdj(oomScoreAdj int) error {
|
||||||
|
path := "/proc/self/oom_score_adj"
|
||||||
|
return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0700)
|
||||||
|
}
|
||||||
|
|
||||||
// killCgroupProcesses freezes then iterates over all the processes inside the
|
// killCgroupProcesses freezes then iterates over all the processes inside the
|
||||||
// manager's cgroups sending a SIGKILL to each process then waiting for them to
|
// manager's cgroups sending a SIGKILL to each process then waiting for them to
|
||||||
// exit.
|
// exit.
|
||||||
|
@ -270,61 +276,3 @@ func killCgroupProcesses(m cgroups.Manager) error {
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func finalizeSeccomp(config *initConfig) error {
|
|
||||||
if config.Config.Seccomp == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
context := seccomp.New()
|
|
||||||
for _, s := range config.Config.Seccomp.Syscalls {
|
|
||||||
ss := &seccomp.Syscall{
|
|
||||||
Value: uint32(s.Value),
|
|
||||||
Action: seccompAction(s.Action),
|
|
||||||
}
|
|
||||||
if len(s.Args) > 0 {
|
|
||||||
ss.Args = seccompArgs(s.Args)
|
|
||||||
}
|
|
||||||
context.Add(ss)
|
|
||||||
}
|
|
||||||
return context.Load()
|
|
||||||
}
|
|
||||||
|
|
||||||
func seccompAction(a configs.Action) seccomp.Action {
|
|
||||||
switch a {
|
|
||||||
case configs.Kill:
|
|
||||||
return seccomp.Kill
|
|
||||||
case configs.Trap:
|
|
||||||
return seccomp.Trap
|
|
||||||
case configs.Allow:
|
|
||||||
return seccomp.Allow
|
|
||||||
}
|
|
||||||
return seccomp.Error(syscall.Errno(int(a)))
|
|
||||||
}
|
|
||||||
|
|
||||||
func seccompArgs(args []*configs.Arg) seccomp.Args {
|
|
||||||
var sa []seccomp.Arg
|
|
||||||
for _, a := range args {
|
|
||||||
sa = append(sa, seccomp.Arg{
|
|
||||||
Index: uint32(a.Index),
|
|
||||||
Op: seccompOperator(a.Op),
|
|
||||||
Value: uint(a.Value),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return seccomp.Args{sa}
|
|
||||||
}
|
|
||||||
|
|
||||||
func seccompOperator(o configs.Operator) seccomp.Operator {
|
|
||||||
switch o {
|
|
||||||
case configs.EqualTo:
|
|
||||||
return seccomp.EqualTo
|
|
||||||
case configs.NotEqualTo:
|
|
||||||
return seccomp.NotEqualTo
|
|
||||||
case configs.GreatherThan:
|
|
||||||
return seccomp.GreatherThan
|
|
||||||
case configs.LessThan:
|
|
||||||
return seccomp.LessThan
|
|
||||||
case configs.MaskEqualTo:
|
|
||||||
return seccomp.MaskEqualTo
|
|
||||||
}
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ func SetFileCreateLabel(fileLabel string) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func Relabel(path string, fileLabel string, relabel string) error {
|
func Relabel(path string, fileLabel string, shared bool) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,3 +59,13 @@ func DupSecOpt(src string) []string {
|
||||||
func DisableSecOpt() []string {
|
func DisableSecOpt() []string {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate checks that the label does not include unexpected options
|
||||||
|
func Validate(label string) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsShared checks that the label includes a "shared" mark
|
||||||
|
func IsShared(label string) bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
|
@ -9,6 +9,8 @@ import (
|
||||||
"github.com/opencontainers/runc/libcontainer/selinux"
|
"github.com/opencontainers/runc/libcontainer/selinux"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var ErrIncompatibleLabel = fmt.Errorf("Bad SELinux option z and Z can not be used together")
|
||||||
|
|
||||||
// InitLabels returns the process label and file labels to be used within
|
// InitLabels returns the process label and file labels to be used within
|
||||||
// the container. A list of options can be passed into this function to alter
|
// the container. A list of options can be passed into this function to alter
|
||||||
// the labels. The labels returned will include a random MCS String, that is
|
// the labels. The labels returned will include a random MCS String, that is
|
||||||
|
@ -95,28 +97,24 @@ func SetFileCreateLabel(fileLabel string) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Change the label of path to the filelabel string. If the relabel string
|
// Change the label of path to the filelabel string.
|
||||||
// is "z", relabel will change the MCS label to s0. This will allow all
|
// It changes the MCS label to s0 if shared is true.
|
||||||
// containers to share the content. If the relabel string is a "Z" then
|
// This will allow all containers to share the content.
|
||||||
// the MCS label should continue to be used. SELinux will use this field
|
func Relabel(path string, fileLabel string, shared bool) error {
|
||||||
// to make sure the content can not be shared by other containes.
|
if !selinux.SelinuxEnabled() {
|
||||||
func Relabel(path string, fileLabel string, relabel string) error {
|
return nil
|
||||||
exclude_path := []string{"/", "/usr", "/etc"}
|
}
|
||||||
|
|
||||||
if fileLabel == "" {
|
if fileLabel == "" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if !strings.ContainsAny(relabel, "zZ") {
|
|
||||||
return nil
|
exclude_paths := map[string]bool{"/": true, "/usr": true, "/etc": true}
|
||||||
|
if exclude_paths[path] {
|
||||||
|
return fmt.Errorf("Relabeling of %s is not allowed", path)
|
||||||
}
|
}
|
||||||
for _, p := range exclude_path {
|
|
||||||
if path == p {
|
if shared {
|
||||||
return fmt.Errorf("Relabeling of %s is not allowed", path)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if strings.Contains(relabel, "z") && strings.Contains(relabel, "Z") {
|
|
||||||
return fmt.Errorf("Bad SELinux option z and Z can not be used together")
|
|
||||||
}
|
|
||||||
if strings.Contains(relabel, "z") {
|
|
||||||
c := selinux.NewContext(fileLabel)
|
c := selinux.NewContext(fileLabel)
|
||||||
c["level"] = "s0"
|
c["level"] = "s0"
|
||||||
fileLabel = c.Get()
|
fileLabel = c.Get()
|
||||||
|
@ -161,3 +159,16 @@ func DupSecOpt(src string) []string {
|
||||||
func DisableSecOpt() []string {
|
func DisableSecOpt() []string {
|
||||||
return selinux.DisableSecOpt()
|
return selinux.DisableSecOpt()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate checks that the label does not include unexpected options
|
||||||
|
func Validate(label string) error {
|
||||||
|
if strings.Contains(label, "z") && strings.Contains(label, "Z") {
|
||||||
|
return ErrIncompatibleLabel
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsShared checks that the label includes a "shared" mark
|
||||||
|
func IsShared(label string) bool {
|
||||||
|
return strings.Contains(label, "z")
|
||||||
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// +build arm ppc64
|
// +build arm ppc64 ppc64le
|
||||||
|
|
||||||
package netlink
|
package netlink
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// +build !arm,!ppc64
|
// +build !arm,!ppc64,!ppc64le
|
||||||
|
|
||||||
package netlink
|
package netlink
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@ import (
|
||||||
"syscall"
|
"syscall"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
"github.com/opencontainers/runc/libcontainer/system"
|
"github.com/opencontainers/runc/libcontainer/system"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -138,11 +139,9 @@ func (p *setnsProcess) terminate() error {
|
||||||
|
|
||||||
func (p *setnsProcess) wait() (*os.ProcessState, error) {
|
func (p *setnsProcess) wait() (*os.ProcessState, error) {
|
||||||
err := p.cmd.Wait()
|
err := p.cmd.Wait()
|
||||||
if err != nil {
|
|
||||||
return p.cmd.ProcessState, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return p.cmd.ProcessState, nil
|
// Return actual ProcessState even on Wait error
|
||||||
|
return p.cmd.ProcessState, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *setnsProcess) pid() int {
|
func (p *setnsProcess) pid() int {
|
||||||
|
@ -175,9 +174,9 @@ func (p *initProcess) externalDescriptors() []string {
|
||||||
return p.fds
|
return p.fds
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *initProcess) start() error {
|
func (p *initProcess) start() (err error) {
|
||||||
defer p.parentPipe.Close()
|
defer p.parentPipe.Close()
|
||||||
err := p.cmd.Start()
|
err = p.cmd.Start()
|
||||||
p.childPipe.Close()
|
p.childPipe.Close()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return newSystemError(err)
|
return newSystemError(err)
|
||||||
|
@ -202,6 +201,18 @@ func (p *initProcess) start() error {
|
||||||
p.manager.Destroy()
|
p.manager.Destroy()
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
if p.config.Config.Hooks != nil {
|
||||||
|
s := configs.HookState{
|
||||||
|
ID: p.container.id,
|
||||||
|
Pid: p.pid(),
|
||||||
|
Root: p.config.Config.Rootfs,
|
||||||
|
}
|
||||||
|
for _, hook := range p.config.Config.Hooks.Prestart {
|
||||||
|
if err := hook.Run(s); err != nil {
|
||||||
|
return newSystemError(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
if err := p.createNetworkInterfaces(); err != nil {
|
if err := p.createNetworkInterfaces(); err != nil {
|
||||||
return newSystemError(err)
|
return newSystemError(err)
|
||||||
}
|
}
|
||||||
|
@ -286,9 +297,7 @@ func (p *initProcess) setExternalDescriptors(newFds []string) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func getPipeFds(pid int) ([]string, error) {
|
func getPipeFds(pid int) ([]string, error) {
|
||||||
var fds []string
|
fds := make([]string, 3)
|
||||||
|
|
||||||
fds = make([]string, 3)
|
|
||||||
|
|
||||||
dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
|
dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
|
||||||
for i := 0; i < 3; i++ {
|
for i := 0; i < 3; i++ {
|
||||||
|
|
|
@ -27,6 +27,8 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
|
||||||
if err := prepareRoot(config); err != nil {
|
if err := prepareRoot(config); err != nil {
|
||||||
return newSystemError(err)
|
return newSystemError(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
setupDev := len(config.Devices) == 0
|
||||||
for _, m := range config.Mounts {
|
for _, m := range config.Mounts {
|
||||||
for _, precmd := range m.PremountCmds {
|
for _, precmd := range m.PremountCmds {
|
||||||
if err := mountCmd(precmd); err != nil {
|
if err := mountCmd(precmd); err != nil {
|
||||||
|
@ -43,14 +45,16 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err := createDevices(config); err != nil {
|
if !setupDev {
|
||||||
return newSystemError(err)
|
if err := createDevices(config); err != nil {
|
||||||
}
|
return newSystemError(err)
|
||||||
if err := setupPtmx(config, console); err != nil {
|
}
|
||||||
return newSystemError(err)
|
if err := setupPtmx(config, console); err != nil {
|
||||||
}
|
return newSystemError(err)
|
||||||
if err := setupDevSymlinks(config.Rootfs); err != nil {
|
}
|
||||||
return newSystemError(err)
|
if err := setupDevSymlinks(config.Rootfs); err != nil {
|
||||||
|
return newSystemError(err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if err := syscall.Chdir(config.Rootfs); err != nil {
|
if err := syscall.Chdir(config.Rootfs); err != nil {
|
||||||
return newSystemError(err)
|
return newSystemError(err)
|
||||||
|
@ -63,8 +67,10 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return newSystemError(err)
|
return newSystemError(err)
|
||||||
}
|
}
|
||||||
if err := reOpenDevNull(config.Rootfs); err != nil {
|
if !setupDev {
|
||||||
return newSystemError(err)
|
if err := reOpenDevNull(config.Rootfs); err != nil {
|
||||||
|
return newSystemError(err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if config.Readonlyfs {
|
if config.Readonlyfs {
|
||||||
if err := setReadonly(); err != nil {
|
if err := setReadonly(); err != nil {
|
||||||
|
@ -131,6 +137,11 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
|
return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
|
||||||
|
case "securityfs":
|
||||||
|
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
|
||||||
case "bind":
|
case "bind":
|
||||||
stat, err := os.Stat(m.Source)
|
stat, err := os.Stat(m.Source)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -160,7 +171,11 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if m.Relabel != "" {
|
if m.Relabel != "" {
|
||||||
if err := label.Relabel(m.Source, mountLabel, m.Relabel); err != nil {
|
if err := label.Validate(m.Relabel); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
shared := label.IsShared(m.Relabel)
|
||||||
|
if err := label.Relabel(m.Source, mountLabel, shared); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,34 +0,0 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package seccomp
|
|
||||||
|
|
||||||
import "strings"
|
|
||||||
|
|
||||||
type bpfLabel struct {
|
|
||||||
label string
|
|
||||||
location uint32
|
|
||||||
}
|
|
||||||
|
|
||||||
type bpfLabels []bpfLabel
|
|
||||||
|
|
||||||
// labelIndex returns the index for the label if it exists in the slice.
|
|
||||||
// if it does not exist in the slice it appends the label lb to the end
|
|
||||||
// of the slice and returns the index.
|
|
||||||
func labelIndex(labels *bpfLabels, lb string) uint32 {
|
|
||||||
var id uint32
|
|
||||||
for id = 0; id < uint32(len(*labels)); id++ {
|
|
||||||
if strings.EqualFold(lb, (*labels)[id].label) {
|
|
||||||
return id
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*labels = append(*labels, bpfLabel{lb, 0xffffffff})
|
|
||||||
return id
|
|
||||||
}
|
|
||||||
|
|
||||||
func scmpBpfStmt(code uint16, k uint32) sockFilter {
|
|
||||||
return sockFilter{code, 0, 0, k}
|
|
||||||
}
|
|
||||||
|
|
||||||
func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter {
|
|
||||||
return sockFilter{code, jt, jf, k}
|
|
||||||
}
|
|
53
vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/config.go
поставляемый
Normal file
53
vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/config.go
поставляемый
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
package seccomp
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ConvertStringToOperator converts a string into a Seccomp comparison operator.
|
||||||
|
// Comparison operators use the names they are assigned by Libseccomp's header.
|
||||||
|
// Attempting to convert a string that is not a valid operator results in an
|
||||||
|
// error.
|
||||||
|
func ConvertStringToOperator(in string) (configs.Operator, error) {
|
||||||
|
switch in {
|
||||||
|
case "SCMP_CMP_NE":
|
||||||
|
return configs.NotEqualTo, nil
|
||||||
|
case "SCMP_CMP_LT":
|
||||||
|
return configs.LessThan, nil
|
||||||
|
case "SCMP_CMP_LE":
|
||||||
|
return configs.LessThanOrEqualTo, nil
|
||||||
|
case "SCMP_CMP_EQ":
|
||||||
|
return configs.EqualTo, nil
|
||||||
|
case "SCMP_CMP_GE":
|
||||||
|
return configs.GreaterThan, nil
|
||||||
|
case "SCMP_CMP_GT":
|
||||||
|
return configs.GreaterThanOrEqualTo, nil
|
||||||
|
case "SCMP_CMP_MASKED_EQ":
|
||||||
|
return configs.MaskEqualTo, nil
|
||||||
|
default:
|
||||||
|
return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ConvertStringToAction converts a string into a Seccomp rule match action.
|
||||||
|
// Actions use the named they are assigned in Libseccomp's header, though some
|
||||||
|
// (notable, SCMP_ACT_TRACE) are not available in this implementation and will
|
||||||
|
// return errors.
|
||||||
|
// Attempting to convert a string that is not a valid action results in an
|
||||||
|
// error.
|
||||||
|
func ConvertStringToAction(in string) (configs.Action, error) {
|
||||||
|
switch in {
|
||||||
|
case "SCMP_ACT_KILL":
|
||||||
|
return configs.Kill, nil
|
||||||
|
case "SCMP_ACT_ERRNO":
|
||||||
|
return configs.Errno, nil
|
||||||
|
case "SCMP_ACT_TRAP":
|
||||||
|
return configs.Trap, nil
|
||||||
|
case "SCMP_ACT_ALLOW":
|
||||||
|
return configs.Allow, nil
|
||||||
|
default:
|
||||||
|
return 0, fmt.Errorf("string %s is not a valid action for seccomp", in)
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,146 +0,0 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package seccomp
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"syscall"
|
|
||||||
)
|
|
||||||
|
|
||||||
const labelTemplate = "lb-%d-%d"
|
|
||||||
|
|
||||||
// Action is the type of action that will be taken when a
|
|
||||||
// syscall is performed.
|
|
||||||
type Action int
|
|
||||||
|
|
||||||
const (
|
|
||||||
Kill Action = iota - 3 // Kill the calling process of the syscall.
|
|
||||||
Trap // Trap and coredump the calling process of the syscall.
|
|
||||||
Allow // Allow the syscall to be completed.
|
|
||||||
)
|
|
||||||
|
|
||||||
// Syscall is the specified syscall, action, and any type of arguments
|
|
||||||
// to filter on.
|
|
||||||
type Syscall struct {
|
|
||||||
// Value is the syscall number.
|
|
||||||
Value uint32
|
|
||||||
// Action is the action to perform when the specified syscall is made.
|
|
||||||
Action Action
|
|
||||||
// Args are filters that can be specified on the arguments to the syscall.
|
|
||||||
Args Args
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Syscall) scmpAction() uint32 {
|
|
||||||
switch s.Action {
|
|
||||||
case Allow:
|
|
||||||
return retAllow
|
|
||||||
case Trap:
|
|
||||||
return retTrap
|
|
||||||
case Kill:
|
|
||||||
return retKill
|
|
||||||
}
|
|
||||||
return actionErrno(uint32(s.Action))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Arg represents an argument to the syscall with the argument's index,
|
|
||||||
// the operator to apply when matching, and the argument's value at that time.
|
|
||||||
type Arg struct {
|
|
||||||
Index uint32 // index of args which start from zero
|
|
||||||
Op Operator // operation, such as EQ/NE/GE/LE
|
|
||||||
Value uint // the value of arg
|
|
||||||
}
|
|
||||||
|
|
||||||
type Args [][]Arg
|
|
||||||
|
|
||||||
var (
|
|
||||||
ErrUnresolvedLabel = errors.New("seccomp: unresolved label")
|
|
||||||
ErrDuplicateLabel = errors.New("seccomp: duplicate label use")
|
|
||||||
ErrUnsupportedOperation = errors.New("seccomp: unsupported operation for argument")
|
|
||||||
)
|
|
||||||
|
|
||||||
// Error returns an Action that will be used to send the calling
|
|
||||||
// process the specified errno when the syscall is made.
|
|
||||||
func Error(code syscall.Errno) Action {
|
|
||||||
return Action(code)
|
|
||||||
}
|
|
||||||
|
|
||||||
// New returns a new syscall context for use.
|
|
||||||
func New() *Context {
|
|
||||||
return &Context{
|
|
||||||
syscalls: make(map[uint32]*Syscall),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Context holds syscalls for the current process to limit the type of
|
|
||||||
// actions the calling process can make.
|
|
||||||
type Context struct {
|
|
||||||
syscalls map[uint32]*Syscall
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add will add the specified syscall, action, and arguments to the seccomp
|
|
||||||
// Context.
|
|
||||||
func (c *Context) Add(s *Syscall) {
|
|
||||||
c.syscalls[s.Value] = s
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove removes the specified syscall configuration from the Context.
|
|
||||||
func (c *Context) Remove(call uint32) {
|
|
||||||
delete(c.syscalls, call)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load will apply the Context to the calling process makeing any secccomp process changes
|
|
||||||
// apply after the context is loaded.
|
|
||||||
func (c *Context) Load() error {
|
|
||||||
filter, err := c.newFilter()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := prctl(prSetNoNewPrivileges, 1, 0, 0, 0); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
prog := newSockFprog(filter)
|
|
||||||
return prog.set()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Context) newFilter() ([]sockFilter, error) {
|
|
||||||
var (
|
|
||||||
labels bpfLabels
|
|
||||||
f = newFilter()
|
|
||||||
)
|
|
||||||
for _, s := range c.syscalls {
|
|
||||||
f.addSyscall(s, &labels)
|
|
||||||
}
|
|
||||||
f.allow()
|
|
||||||
// process args for the syscalls
|
|
||||||
for _, s := range c.syscalls {
|
|
||||||
if err := f.addArguments(s, &labels); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// apply labels for arguments
|
|
||||||
idx := int32(len(*f) - 1)
|
|
||||||
for ; idx >= 0; idx-- {
|
|
||||||
lf := &(*f)[idx]
|
|
||||||
if lf.code != (syscall.BPF_JMP + syscall.BPF_JA) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
rel := int32(lf.jt)<<8 | int32(lf.jf)
|
|
||||||
if ((jumpJT << 8) | jumpJF) == rel {
|
|
||||||
if labels[lf.k].location == 0xffffffff {
|
|
||||||
return nil, ErrUnresolvedLabel
|
|
||||||
}
|
|
||||||
lf.k = labels[lf.k].location - uint32(idx+1)
|
|
||||||
lf.jt = 0
|
|
||||||
lf.jf = 0
|
|
||||||
} else if ((labelJT << 8) | labelJF) == rel {
|
|
||||||
if labels[lf.k].location != 0xffffffff {
|
|
||||||
return nil, ErrDuplicateLabel
|
|
||||||
}
|
|
||||||
labels[lf.k].location = uint32(idx)
|
|
||||||
lf.k = 0
|
|
||||||
lf.jt = 0
|
|
||||||
lf.jf = 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return *f, nil
|
|
||||||
}
|
|
|
@ -1,118 +0,0 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package seccomp
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"syscall"
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
type sockFilter struct {
|
|
||||||
code uint16
|
|
||||||
jt uint8
|
|
||||||
jf uint8
|
|
||||||
k uint32
|
|
||||||
}
|
|
||||||
|
|
||||||
func newFilter() *filter {
|
|
||||||
var f filter
|
|
||||||
f = append(f, sockFilter{
|
|
||||||
pfLD + syscall.BPF_W + syscall.BPF_ABS,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
uint32(unsafe.Offsetof(secData.nr)),
|
|
||||||
})
|
|
||||||
return &f
|
|
||||||
}
|
|
||||||
|
|
||||||
type filter []sockFilter
|
|
||||||
|
|
||||||
func (f *filter) addSyscall(s *Syscall, labels *bpfLabels) {
|
|
||||||
if len(s.Args) == 0 {
|
|
||||||
f.call(s.Value, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction()))
|
|
||||||
} else {
|
|
||||||
if len(s.Args[0]) > 0 {
|
|
||||||
lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[0][0].Index)
|
|
||||||
f.call(s.Value,
|
|
||||||
scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
|
|
||||||
jumpJT, jumpJF))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *filter) addArguments(s *Syscall, labels *bpfLabels) error {
|
|
||||||
for i := 0; len(s.Args) > i; i++ {
|
|
||||||
if len(s.Args[i]) > 0 {
|
|
||||||
lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[i][0].Index)
|
|
||||||
f.label(labels, lb)
|
|
||||||
f.arg(s.Args[i][0].Index)
|
|
||||||
}
|
|
||||||
for j := 0; j < len(s.Args[i]); j++ {
|
|
||||||
var jf sockFilter
|
|
||||||
if len(s.Args)-1 > i && len(s.Args[i+1]) > 0 {
|
|
||||||
lbj := fmt.Sprintf(labelTemplate, s.Value, s.Args[i+1][0].Index)
|
|
||||||
jf = scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA,
|
|
||||||
labelIndex(labels, lbj), jumpJT, jumpJF)
|
|
||||||
} else {
|
|
||||||
jf = scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction())
|
|
||||||
}
|
|
||||||
if err := f.op(s.Args[i][j].Op, s.Args[i][j].Value, jf); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
f.allow()
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *filter) label(labels *bpfLabels, lb string) {
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), labelJT, labelJF))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *filter) call(nr uint32, jt sockFilter) {
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, nr, 0, 1))
|
|
||||||
*f = append(*f, jt)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *filter) allow() {
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retAllow))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *filter) deny() {
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retTrap))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *filter) arg(index uint32) {
|
|
||||||
arg(f, index)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *filter) op(operation Operator, v uint, jf sockFilter) error {
|
|
||||||
switch operation {
|
|
||||||
case EqualTo:
|
|
||||||
jumpEqualTo(f, v, jf)
|
|
||||||
case NotEqualTo:
|
|
||||||
jumpNotEqualTo(f, v, jf)
|
|
||||||
case GreatherThan:
|
|
||||||
jumpGreaterThan(f, v, jf)
|
|
||||||
case LessThan:
|
|
||||||
jumpLessThan(f, v, jf)
|
|
||||||
case MaskEqualTo:
|
|
||||||
jumpMaskEqualTo(f, v, jf)
|
|
||||||
default:
|
|
||||||
return ErrUnsupportedOperation
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func arg(f *filter, idx uint32) {
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.low(idx)))
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 0))
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.hi(idx)))
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 1))
|
|
||||||
}
|
|
||||||
|
|
||||||
func jump(f *filter, labels *bpfLabels, lb string) {
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
|
|
||||||
jumpJT, jumpJF))
|
|
||||||
}
|
|
|
@ -1,68 +0,0 @@
|
||||||
// +build linux,amd64
|
|
||||||
|
|
||||||
package seccomp
|
|
||||||
|
|
||||||
// Using BPF filters
|
|
||||||
//
|
|
||||||
// ref: http://www.gsp.com/cgi-bin/man.cgi?topic=bpf
|
|
||||||
import "syscall"
|
|
||||||
|
|
||||||
func jumpGreaterThan(f *filter, v uint, jt sockFilter) {
|
|
||||||
lo := uint32(uint64(v) % 0x100000000)
|
|
||||||
hi := uint32(uint64(v) / 0x100000000)
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 4, 0))
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGE+syscall.BPF_K, (lo), 0, 2))
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
|
||||||
*f = append(*f, jt)
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
|
||||||
}
|
|
||||||
|
|
||||||
func jumpEqualTo(f *filter, v uint, jt sockFilter) {
|
|
||||||
lo := uint32(uint64(v) % 0x100000000)
|
|
||||||
hi := uint32(uint64(v) / 0x100000000)
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (lo), 0, 2))
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
|
||||||
*f = append(*f, jt)
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
|
||||||
}
|
|
||||||
|
|
||||||
func jumpLessThan(f *filter, v uint, jt sockFilter) {
|
|
||||||
lo := uint32(uint64(v) % 0x100000000)
|
|
||||||
hi := uint32(uint64(v) / 0x100000000)
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 6, 0))
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 3))
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (lo), 2, 0))
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
|
||||||
*f = append(*f, jt)
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
|
||||||
}
|
|
||||||
|
|
||||||
func jumpNotEqualTo(f *filter, v uint, jt sockFilter) {
|
|
||||||
lo := uint32(uint64(v) % 0x100000000)
|
|
||||||
hi := uint32(uint64(v) / 0x100000000)
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 5, 0))
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 2, 0))
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
|
||||||
*f = append(*f, jt)
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
|
||||||
}
|
|
||||||
|
|
||||||
// this checks for a value inside a mask. The evalusation is equal to doing
|
|
||||||
// CLONE_NEWUSER & syscallMask == CLONE_NEWUSER
|
|
||||||
func jumpMaskEqualTo(f *filter, v uint, jt sockFilter) {
|
|
||||||
lo := uint32(uint64(v) % 0x100000000)
|
|
||||||
hi := uint32(uint64(v) / 0x100000000)
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 0, 6))
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_ALU+syscall.BPF_AND, uint32(v)))
|
|
||||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 0, 2))
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
|
||||||
*f = append(*f, jt)
|
|
||||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
|
||||||
}
|
|
165
vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
поставляемый
Normal file
165
vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
поставляемый
Normal file
|
@ -0,0 +1,165 @@
|
||||||
|
// +build linux,cgo,seccomp
|
||||||
|
|
||||||
|
package seccomp
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"syscall"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
libseccomp "github.com/seccomp/libseccomp-golang"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
actAllow = libseccomp.ActAllow
|
||||||
|
actTrap = libseccomp.ActTrap
|
||||||
|
actKill = libseccomp.ActKill
|
||||||
|
actErrno = libseccomp.ActErrno.SetReturnCode(int16(syscall.EPERM))
|
||||||
|
)
|
||||||
|
|
||||||
|
// Filters given syscalls in a container, preventing them from being used
|
||||||
|
// Started in the container init process, and carried over to all child processes
|
||||||
|
// Setns calls, however, require a separate invocation, as they are not children
|
||||||
|
// of the init until they join the namespace
|
||||||
|
func InitSeccomp(config *configs.Seccomp) error {
|
||||||
|
if config == nil {
|
||||||
|
return fmt.Errorf("cannot initialize Seccomp - nil config passed")
|
||||||
|
}
|
||||||
|
|
||||||
|
defaultAction, err := getAction(config.DefaultAction)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error initializing seccomp - invalid default action")
|
||||||
|
}
|
||||||
|
|
||||||
|
filter, err := libseccomp.NewFilter(defaultAction)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error creating filter: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unset no new privs bit
|
||||||
|
if err := filter.SetNoNewPrivsBit(false); err != nil {
|
||||||
|
return fmt.Errorf("error setting no new privileges: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a rule for each syscall
|
||||||
|
for _, call := range config.Syscalls {
|
||||||
|
if call == nil {
|
||||||
|
return fmt.Errorf("encountered nil syscall while initializing Seccomp")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = matchCall(filter, call); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = filter.Load(); err != nil {
|
||||||
|
return fmt.Errorf("error loading seccomp filter into kernel: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert Libcontainer Action to Libseccomp ScmpAction
|
||||||
|
func getAction(act configs.Action) (libseccomp.ScmpAction, error) {
|
||||||
|
switch act {
|
||||||
|
case configs.Kill:
|
||||||
|
return actKill, nil
|
||||||
|
case configs.Errno:
|
||||||
|
return actErrno, nil
|
||||||
|
case configs.Trap:
|
||||||
|
return actTrap, nil
|
||||||
|
case configs.Allow:
|
||||||
|
return actAllow, nil
|
||||||
|
default:
|
||||||
|
return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert Libcontainer Operator to Libseccomp ScmpCompareOp
|
||||||
|
func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) {
|
||||||
|
switch op {
|
||||||
|
case configs.EqualTo:
|
||||||
|
return libseccomp.CompareEqual, nil
|
||||||
|
case configs.NotEqualTo:
|
||||||
|
return libseccomp.CompareNotEqual, nil
|
||||||
|
case configs.GreaterThan:
|
||||||
|
return libseccomp.CompareGreater, nil
|
||||||
|
case configs.GreaterThanOrEqualTo:
|
||||||
|
return libseccomp.CompareGreaterEqual, nil
|
||||||
|
case configs.LessThan:
|
||||||
|
return libseccomp.CompareLess, nil
|
||||||
|
case configs.LessThanOrEqualTo:
|
||||||
|
return libseccomp.CompareLessOrEqual, nil
|
||||||
|
case configs.MaskEqualTo:
|
||||||
|
return libseccomp.CompareMaskedEqual, nil
|
||||||
|
default:
|
||||||
|
return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert Libcontainer Arg to Libseccomp ScmpCondition
|
||||||
|
func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
|
||||||
|
cond := libseccomp.ScmpCondition{}
|
||||||
|
|
||||||
|
if arg == nil {
|
||||||
|
return cond, fmt.Errorf("cannot convert nil to syscall condition")
|
||||||
|
}
|
||||||
|
|
||||||
|
op, err := getOperator(arg.Op)
|
||||||
|
if err != nil {
|
||||||
|
return cond, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a rule to match a single syscall
|
||||||
|
func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
|
||||||
|
if call == nil || filter == nil {
|
||||||
|
return fmt.Errorf("cannot use nil as syscall to block")
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(call.Name) == 0 {
|
||||||
|
return fmt.Errorf("empty string is not a valid syscall")
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we can't resolve the syscall, assume it's not supported on this kernel
|
||||||
|
// Ignore it, don't error out
|
||||||
|
callNum, err := libseccomp.GetSyscallFromName(call.Name)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("Error resolving syscall name %s: %s - ignoring syscall.", call.Name, err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert the call's action to the libseccomp equivalent
|
||||||
|
callAct, err := getAction(call.Action)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unconditional match - just add the rule
|
||||||
|
if len(call.Args) == 0 {
|
||||||
|
if err = filter.AddRule(callNum, callAct); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Conditional match - convert the per-arg rules into library format
|
||||||
|
conditions := []libseccomp.ScmpCondition{}
|
||||||
|
|
||||||
|
for _, cond := range call.Args {
|
||||||
|
newCond, err := getCondition(cond)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
conditions = append(conditions, newCond)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
|
@ -1,124 +0,0 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
// Package seccomp provides native seccomp ( https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt ) support for go.
|
|
||||||
package seccomp
|
|
||||||
|
|
||||||
import (
|
|
||||||
"syscall"
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Operator that is used for argument comparison.
|
|
||||||
type Operator int
|
|
||||||
|
|
||||||
const (
|
|
||||||
EqualTo Operator = iota
|
|
||||||
NotEqualTo
|
|
||||||
GreatherThan
|
|
||||||
LessThan
|
|
||||||
MaskEqualTo
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
jumpJT = 0xff
|
|
||||||
jumpJF = 0xff
|
|
||||||
labelJT = 0xfe
|
|
||||||
labelJF = 0xfe
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
pfLD = 0x0
|
|
||||||
retKill = 0x00000000
|
|
||||||
retTrap = 0x00030000
|
|
||||||
retAllow = 0x7fff0000
|
|
||||||
modeFilter = 0x2
|
|
||||||
prSetNoNewPrivileges = 0x26
|
|
||||||
)
|
|
||||||
|
|
||||||
func actionErrno(errno uint32) uint32 {
|
|
||||||
return 0x00050000 | (errno & 0x0000ffff)
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
secData = struct {
|
|
||||||
nr int32
|
|
||||||
arch uint32
|
|
||||||
insPointer uint64
|
|
||||||
args [6]uint64
|
|
||||||
}{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}}
|
|
||||||
)
|
|
||||||
|
|
||||||
var isLittle = func() bool {
|
|
||||||
var (
|
|
||||||
x = 0x1234
|
|
||||||
p = unsafe.Pointer(&x)
|
|
||||||
p2 = (*[unsafe.Sizeof(0)]byte)(p)
|
|
||||||
)
|
|
||||||
if p2[0] == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}()
|
|
||||||
|
|
||||||
var endian endianSupport
|
|
||||||
|
|
||||||
type endianSupport struct {
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e endianSupport) hi(i uint32) uint32 {
|
|
||||||
if isLittle {
|
|
||||||
return e.little(i)
|
|
||||||
}
|
|
||||||
return e.big(i)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e endianSupport) low(i uint32) uint32 {
|
|
||||||
if isLittle {
|
|
||||||
return e.big(i)
|
|
||||||
}
|
|
||||||
return e.little(i)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (endianSupport) big(idx uint32) uint32 {
|
|
||||||
if idx >= 6 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return uint32(unsafe.Offsetof(secData.args)) + 8*idx
|
|
||||||
}
|
|
||||||
|
|
||||||
func (endianSupport) little(idx uint32) uint32 {
|
|
||||||
if idx < 0 || idx >= 6 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return uint32(unsafe.Offsetof(secData.args)) +
|
|
||||||
uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch))
|
|
||||||
}
|
|
||||||
|
|
||||||
func prctl(option int, arg2, arg3, arg4, arg5 uintptr) error {
|
|
||||||
_, _, err := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
|
|
||||||
if err != 0 {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func newSockFprog(filter []sockFilter) *sockFprog {
|
|
||||||
return &sockFprog{
|
|
||||||
len: uint16(len(filter)),
|
|
||||||
filt: filter,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type sockFprog struct {
|
|
||||||
len uint16
|
|
||||||
filt []sockFilter
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *sockFprog) set() error {
|
|
||||||
_, _, err := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP),
|
|
||||||
uintptr(modeFilter), uintptr(unsafe.Pointer(s)))
|
|
||||||
if err != 0 {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
|
@ -1,3 +1,19 @@
|
||||||
// +build !linux
|
// +build !linux !cgo !seccomp
|
||||||
|
|
||||||
package seccomp
|
package seccomp
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported")
|
||||||
|
|
||||||
|
// Seccomp not supported, do nothing
|
||||||
|
func InitSeccomp(config *configs.Seccomp) error {
|
||||||
|
if config != nil {
|
||||||
|
return ErrSeccompNotEnabled
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
|
@ -7,6 +7,7 @@ import (
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||||
"github.com/opencontainers/runc/libcontainer/label"
|
"github.com/opencontainers/runc/libcontainer/label"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||||
"github.com/opencontainers/runc/libcontainer/system"
|
"github.com/opencontainers/runc/libcontainer/system"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -20,6 +21,14 @@ func (l *linuxSetnsInit) Init() error {
|
||||||
if err := setupRlimits(l.config.Config); err != nil {
|
if err := setupRlimits(l.config.Config); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if l.config.Config.Seccomp != nil {
|
||||||
|
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
if err := finalizeNamespace(l.config); err != nil {
|
if err := finalizeNamespace(l.config); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,6 +9,7 @@ import (
|
||||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
"github.com/opencontainers/runc/libcontainer/label"
|
"github.com/opencontainers/runc/libcontainer/label"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||||
"github.com/opencontainers/runc/libcontainer/system"
|
"github.com/opencontainers/runc/libcontainer/system"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -46,6 +47,10 @@ func (l *linuxStandardInit) Init() error {
|
||||||
if err := setupRlimits(l.config.Config); err != nil {
|
if err := setupRlimits(l.config.Config); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
label.Init()
|
label.Init()
|
||||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||||
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||||
|
@ -85,6 +90,11 @@ func (l *linuxStandardInit) Init() error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if l.config.Config.Seccomp != nil {
|
||||||
|
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
if err := finalizeNamespace(l.config); err != nil {
|
if err := finalizeNamespace(l.config); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -99,8 +109,5 @@ func (l *linuxStandardInit) Init() error {
|
||||||
if syscall.Getppid() != l.parentPid {
|
if syscall.Getppid() != l.parentPid {
|
||||||
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
||||||
}
|
}
|
||||||
if err := finalizeSeccomp(l.config); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
||||||
}
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче