From 6d022bda3bddb930948fec99ad35c402ac79e8f7 Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Mon, 15 Jun 2015 18:18:38 -0400 Subject: [PATCH] Updates libcontainer to v2.2.1 Signed-off-by: Mrunal Patel --- daemon/stats_linux.go | 6 +- hack/vendor.sh | 2 +- .../src/github.com/Sirupsen/logrus/README.md | 2 +- .../docker/libcontainer/CONTRIBUTING.md | 6 +- .../github.com/docker/libcontainer/README.md | 2 +- .../github.com/docker/libcontainer/ROADMAP.md | 2 +- .../github.com/docker/libcontainer/SPEC.md | 18 +-- .../libcontainer/cgroups/fs/apply_raw.go | 18 ++- .../docker/libcontainer/cgroups/fs/hugetlb.go | 45 +++++- .../docker/libcontainer/cgroups/fs/memory.go | 76 ++++++--- .../docker/libcontainer/cgroups/stats.go | 35 +++-- .../cgroups/systemd/apply_systemd.go | 46 ++++-- .../docker/libcontainer/cgroups/utils.go | 40 ++++- .../docker/libcontainer/configs/cgroup.go | 6 + .../docker/libcontainer/configs/config.go | 41 ++++- .../{config_linux.go => config_unix.go} | 2 + .../libcontainer/configs/device_defaults.go | 2 +- .../libcontainer/configs/hugepage_limit.go | 9 ++ ...namespaces_linux.go => namespaces_unix.go} | 2 +- .../docker/libcontainer/console_freebsd.go | 13 ++ .../docker/libcontainer/console_linux.go | 6 +- .../{devices_linux.go => devices_unix.go} | 2 + .../docker/libcontainer/devices/number.go | 2 +- .../docker/libcontainer/init_linux.go | 71 ++++++++- ...linux_arm.go => netlink_linux_armppc64.go} | 2 + .../netlink/netlink_linux_notarm.go | 2 +- .../docker/libcontainer/nsenter/README.md | 2 +- .../docker/libcontainer/nsenter/nsexec.c | 6 +- .../docker/libcontainer/rootfs_linux.go | 2 +- .../docker/libcontainer/seccomp/bpf.go | 32 ++++ .../docker/libcontainer/seccomp/context.go | 144 ++++++++++++++++++ .../docker/libcontainer/seccomp/filter.go | 116 ++++++++++++++ .../docker/libcontainer/seccomp/jump_amd64.go | 68 +++++++++ .../docker/libcontainer/seccomp/seccomp.go | 122 +++++++++++++++ .../libcontainer/standard_init_linux.go | 3 + .../docker/libcontainer/stats_freebsd.go | 5 + .../docker/libcontainer/system/setns_linux.go | 8 +- .../docker/libcontainer/system/sysconfig.go | 2 +- .../docker/libcontainer/user/user.go | 57 +++++++ .../docker/libcontainer/utils/utils.go | 3 + vendor/src/github.com/docker/libtrust/util.go | 2 +- .../mistifyio/go-zfs/CONTRIBUTING.md | 2 +- .../src/github.com/mistifyio/go-zfs/README.md | 2 +- .../github.com/vishvananda/netlink/README.md | 2 +- .../github.com/vishvananda/netns/README.md | 2 +- 45 files changed, 945 insertions(+), 93 deletions(-) rename vendor/src/github.com/docker/libcontainer/configs/{config_linux.go => config_unix.go} (98%) create mode 100644 vendor/src/github.com/docker/libcontainer/configs/hugepage_limit.go rename vendor/src/github.com/docker/libcontainer/configs/{namespaces_linux.go => namespaces_unix.go} (98%) create mode 100644 vendor/src/github.com/docker/libcontainer/console_freebsd.go rename vendor/src/github.com/docker/libcontainer/devices/{devices_linux.go => devices_unix.go} (98%) rename vendor/src/github.com/docker/libcontainer/netlink/{netlink_linux_arm.go => netlink_linux_armppc64.go} (70%) create mode 100644 vendor/src/github.com/docker/libcontainer/seccomp/bpf.go create mode 100644 vendor/src/github.com/docker/libcontainer/seccomp/context.go create mode 100644 vendor/src/github.com/docker/libcontainer/seccomp/filter.go create mode 100644 vendor/src/github.com/docker/libcontainer/seccomp/jump_amd64.go create mode 100644 vendor/src/github.com/docker/libcontainer/seccomp/seccomp.go create mode 100644 vendor/src/github.com/docker/libcontainer/stats_freebsd.go diff --git a/daemon/stats_linux.go b/daemon/stats_linux.go index 146f09491d..9db3f17fe7 100644 --- a/daemon/stats_linux.go +++ b/daemon/stats_linux.go @@ -52,10 +52,10 @@ func convertStatsToAPITypes(ls *libcontainer.Stats) *types.Stats { } mem := cs.MemoryStats s.MemoryStats = types.MemoryStats{ - Usage: mem.Usage, - MaxUsage: mem.MaxUsage, + Usage: mem.Usage.Usage, + MaxUsage: mem.Usage.MaxUsage, Stats: mem.Stats, - Failcnt: mem.Failcnt, + Failcnt: mem.Usage.Failcnt, } } diff --git a/hack/vendor.sh b/hack/vendor.sh index 9c0a7c2f07..e6385f3d06 100755 --- a/hack/vendor.sh +++ b/hack/vendor.sh @@ -29,7 +29,7 @@ clone git github.com/hashicorp/consul v0.5.2 # get distribution packages clone git github.com/docker/distribution b9eeb328080d367dbde850ec6e94f1e4ac2b5efe -clone git github.com/docker/libcontainer v2.1.1 +clone git github.com/docker/libcontainer v2.2.1 # libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh) clone git github.com/coreos/go-systemd v2 clone git github.com/godbus/dbus v2 diff --git a/vendor/src/github.com/Sirupsen/logrus/README.md b/vendor/src/github.com/Sirupsen/logrus/README.md index 72e7a78288..3578deaec0 100644 --- a/vendor/src/github.com/Sirupsen/logrus/README.md +++ b/vendor/src/github.com/Sirupsen/logrus/README.md @@ -324,7 +324,7 @@ func (f *JSONFormatter) Format(entry *Entry) ([]byte, error) { #### Logger as an `io.Writer` -Logrus can be transformed into an `io.Writer`. That writer is the end of an `io.Pipe` and it is your responsibility to close it. +Logrus can be transormed into an `io.Writer`. That writer is the end of an `io.Pipe` and it is your responsibility to close it. ```go w := logger.Writer() diff --git a/vendor/src/github.com/docker/libcontainer/CONTRIBUTING.md b/vendor/src/github.com/docker/libcontainer/CONTRIBUTING.md index c848f4dbcb..667cc5a63f 100644 --- a/vendor/src/github.com/docker/libcontainer/CONTRIBUTING.md +++ b/vendor/src/github.com/docker/libcontainer/CONTRIBUTING.md @@ -24,21 +24,21 @@ The following packages are required to compile libcontainer natively. - git - cgutils -You can develop on OS X, but you are limited to Dockerfile-based builds only. +You can develop on OSX, but you are limited to Dockerfile-based builds only. ### Building libcontainer from Dockerfile make all This is the easiest way of building libcontainer. -As this build is done using Docker, you can even run this from [OS X](https://github.com/boot2docker/boot2docker) +As this build is done using Docker, you can even run this from [OSX](https://github.com/boot2docker/boot2docker) ### Testing changes with "nsinit" make sh This will create an container that runs `nsinit exec sh` on a busybox rootfs with the configuration from ['minimal.json'](https://github.com/docker/libcontainer/blob/master/sample_configs/minimal.json). -Like the previous command, you can run this on OS X too! +Like the previous command, you can run this on OSX too! ### Building libcontainer directly diff --git a/vendor/src/github.com/docker/libcontainer/README.md b/vendor/src/github.com/docker/libcontainer/README.md index 8072bde40f..26bb82dde1 100644 --- a/vendor/src/github.com/docker/libcontainer/README.md +++ b/vendor/src/github.com/docker/libcontainer/README.md @@ -1,4 +1,4 @@ -## libcontainer - reference implementation for containers [![Build Status](https://jenkins.dockerproject.com/buildStatus/icon?job=Libcontainer Master)](https://jenkins.dockerproject.com/job/Libcontainer%20Master/) +## libcontainer - reference implementation for containers [![Build Status](https://jenkins.dockerproject.org/buildStatus/icon?job=Libcontainer%20Master)](https://jenkins.dockerproject.org/job/Libcontainer%20Master/) Libcontainer provides a native Go implementation for creating containers with namespaces, cgroups, capabilities, and filesystem access controls. diff --git a/vendor/src/github.com/docker/libcontainer/ROADMAP.md b/vendor/src/github.com/docker/libcontainer/ROADMAP.md index 7412460ef8..f59035351a 100644 --- a/vendor/src/github.com/docker/libcontainer/ROADMAP.md +++ b/vendor/src/github.com/docker/libcontainer/ROADMAP.md @@ -3,7 +3,7 @@ This document is a high-level overview of where we want to take libcontainer next. It is a curated selection of planned improvements which are either important, difficult, or both. -For a more complete view of planned and requested improvements, see [the GitHub issues](https://github.com/docker/libcontainer/issues). +For a more complete view of planned and requested improvements, see [the Github issues](https://github.com/docker/libcontainer/issues). To suggest changes to the roadmap, including additions, please write the change as if it were already in effect, and make a pull request. diff --git a/vendor/src/github.com/docker/libcontainer/SPEC.md b/vendor/src/github.com/docker/libcontainer/SPEC.md index 39ccd68eae..430a31fe08 100644 --- a/vendor/src/github.com/docker/libcontainer/SPEC.md +++ b/vendor/src/github.com/docker/libcontainer/SPEC.md @@ -47,20 +47,20 @@ unmount all the mounts that were setup within that namespace. For a container to execute properly there are certain filesystems that are required to be mounted within the rootfs that the runtime will setup. -| Path | Type | Flags | Data | -| ----------- | ------ | -------------------------------------- | --------------------------------------- | -| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | | -| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 | -| /dev/shm | shm | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k | -| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | | -| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid5 | -| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | | +| Path | Type | Flags | Data | +| ----------- | ------ | -------------------------------------- | ---------------------------------------- | +| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | | +| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 | +| /dev/shm | tmpfs | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k | +| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | | +| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid=5 | +| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | | After a container's filesystems are mounted within the newly created mount namespace `/dev` will need to be populated with a set of device nodes. It is expected that a rootfs does not need to have any device nodes specified -for `/dev` within the rootfs as the container will setup the correct devices +for `/dev` witin the rootfs as the container will setup the correct devices that are required for executing a container's process. | Path | Mode | Access | diff --git a/vendor/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go b/vendor/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go index 8a68618e09..b272182b99 100644 --- a/vendor/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go +++ b/vendor/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go @@ -30,6 +30,7 @@ var ( "freezer": &FreezerGroup{}, } CgroupProcesses = "cgroup.procs" + HugePageSizes, _ = cgroups.GetHugePageSize() ) type subsystem interface { @@ -44,6 +45,7 @@ type subsystem interface { } type Manager struct { + mu sync.Mutex Cgroups *configs.Cgroup Paths map[string]string } @@ -82,7 +84,6 @@ type data struct { } func (m *Manager) Apply(pid int) error { - if m.Cgroups == nil { return nil } @@ -128,14 +129,25 @@ func (m *Manager) Apply(pid int) error { } func (m *Manager) Destroy() error { - return cgroups.RemovePaths(m.Paths) + m.mu.Lock() + defer m.mu.Unlock() + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil } func (m *Manager) GetPaths() map[string]string { - return m.Paths + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths } func (m *Manager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() stats := cgroups.NewStats() for name, path := range m.Paths { sys, ok := subsystems[name] diff --git a/vendor/src/github.com/docker/libcontainer/cgroups/fs/hugetlb.go b/vendor/src/github.com/docker/libcontainer/cgroups/fs/hugetlb.go index 277e87fe89..4b82649334 100644 --- a/vendor/src/github.com/docker/libcontainer/cgroups/fs/hugetlb.go +++ b/vendor/src/github.com/docker/libcontainer/cgroups/fs/hugetlb.go @@ -3,6 +3,10 @@ package fs import ( + "fmt" + "strconv" + "strings" + "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" ) @@ -11,14 +15,25 @@ type HugetlbGroup struct { } func (s *HugetlbGroup) Apply(d *data) error { - // we just want to join this group even though we don't set anything - if _, err := d.join("hugetlb"); err != nil && !cgroups.IsNotFound(err) { + dir, err := d.join("hugetlb") + if err != nil && !cgroups.IsNotFound(err) { return err } + + if err := s.Set(dir, d.c); err != nil { + return err + } + return nil } func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error { + for _, hugetlb := range cgroup.HugetlbLimit { + if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.Itoa(hugetlb.Limit)); err != nil { + return err + } + } + return nil } @@ -27,5 +42,31 @@ func (s *HugetlbGroup) Remove(d *data) error { } func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error { + hugetlbStats := cgroups.HugetlbStats{} + for _, pageSize := range HugePageSizes { + usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".") + value, err := getCgroupParamUint(path, usage) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", usage, err) + } + hugetlbStats.Usage = value + + maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".") + value, err = getCgroupParamUint(path, maxUsage) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", maxUsage, err) + } + hugetlbStats.MaxUsage = value + + failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".") + value, err = getCgroupParamUint(path, failcnt) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", failcnt, err) + } + hugetlbStats.Failcnt = value + + stats.HugetlbStats[pageSize] = hugetlbStats + } + return nil } diff --git a/vendor/src/github.com/docker/libcontainer/cgroups/fs/memory.go b/vendor/src/github.com/docker/libcontainer/cgroups/fs/memory.go index bff7cafd76..dccdee6953 100644 --- a/vendor/src/github.com/docker/libcontainer/cgroups/fs/memory.go +++ b/vendor/src/github.com/docker/libcontainer/cgroups/fs/memory.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "strconv" + "strings" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" @@ -45,12 +46,6 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error { return err } } - // By default, MemorySwap is set to twice the size of Memory. - if cgroup.MemorySwap == 0 && cgroup.Memory != 0 { - if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Memory*2, 10)); err != nil { - return err - } - } if cgroup.MemorySwap > 0 { if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.MemorySwap, 10)); err != nil { return err @@ -67,6 +62,11 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error { return err } } + if cgroup.MemorySwappiness >= 0 && cgroup.MemorySwappiness <= 100 { + if err := writeFile(path, "memory.swappiness", strconv.FormatInt(cgroup.MemorySwappiness, 10)); err != nil { + return err + } + } return nil } @@ -94,24 +94,62 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { } stats.MemoryStats.Stats[t] = v } - - // Set memory usage and max historical usage. - value, err := getCgroupParamUint(path, "memory.usage_in_bytes") - if err != nil { - return fmt.Errorf("failed to parse memory.usage_in_bytes - %v", err) - } - stats.MemoryStats.Usage = value stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"] - value, err = getCgroupParamUint(path, "memory.max_usage_in_bytes") + + memoryUsage, err := getMemoryData(path, "") if err != nil { - return fmt.Errorf("failed to parse memory.max_usage_in_bytes - %v", err) + return err } - stats.MemoryStats.MaxUsage = value - value, err = getCgroupParamUint(path, "memory.failcnt") + stats.MemoryStats.Usage = memoryUsage + swapUsage, err := getMemoryData(path, "memsw") if err != nil { - return fmt.Errorf("failed to parse memory.failcnt - %v", err) + return err } - stats.MemoryStats.Failcnt = value + stats.MemoryStats.SwapUsage = swapUsage + kernelUsage, err := getMemoryData(path, "kmem") + if err != nil { + return err + } + stats.MemoryStats.KernelUsage = kernelUsage return nil } + +func getMemoryData(path, name string) (cgroups.MemoryData, error) { + memoryData := cgroups.MemoryData{} + + moduleName := "memory" + if name != "" { + moduleName = strings.Join([]string{"memory", name}, ".") + } + usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".") + maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".") + failcnt := strings.Join([]string{moduleName, "failcnt"}, ".") + + value, err := getCgroupParamUint(path, usage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err) + } + memoryData.Usage = value + value, err = getCgroupParamUint(path, maxUsage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err) + } + memoryData.MaxUsage = value + value, err = getCgroupParamUint(path, failcnt) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err) + } + memoryData.Failcnt = value + + return memoryData, nil +} diff --git a/vendor/src/github.com/docker/libcontainer/cgroups/stats.go b/vendor/src/github.com/docker/libcontainer/cgroups/stats.go index 6d81a12e10..bda32b20c3 100644 --- a/vendor/src/github.com/docker/libcontainer/cgroups/stats.go +++ b/vendor/src/github.com/docker/libcontainer/cgroups/stats.go @@ -32,18 +32,21 @@ type CpuStats struct { ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` } +type MemoryData struct { + Usage uint64 `json:"usage,omitempty"` + MaxUsage uint64 `json:"max_usage,omitempty"` + Failcnt uint64 `json:"failcnt"` +} type MemoryStats struct { - // current res_counter usage for memory - Usage uint64 `json:"usage,omitempty"` // memory used for cache Cache uint64 `json:"cache,omitempty"` - // maximum usage ever recorded. - MaxUsage uint64 `json:"max_usage,omitempty"` - // TODO(vishh): Export these as stronger types. - // all the stats exported via memory.stat. - Stats map[string]uint64 `json:"stats,omitempty"` - // number of times memory usage hits limits. - Failcnt uint64 `json:"failcnt"` + // usage of memory + Usage MemoryData `json:"usage,omitempty"` + // usage of memory + swap + SwapUsage MemoryData `json:"swap_usage,omitempty"` + // usafe of kernel memory + KernelUsage MemoryData `json:"kernel_usage,omitempty"` + Stats map[string]uint64 `json:"stats,omitempty"` } type BlkioStatEntry struct { @@ -65,13 +68,25 @@ type BlkioStats struct { SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` } +type HugetlbStats struct { + // current res_counter usage for hugetlb + Usage uint64 `json:"usage,omitempty"` + // maximum usage ever recorded. + MaxUsage uint64 `json:"max_usage,omitempty"` + // number of times htgetlb usage allocation failure. + Failcnt uint64 `json:"failcnt"` +} + type Stats struct { CpuStats CpuStats `json:"cpu_stats,omitempty"` MemoryStats MemoryStats `json:"memory_stats,omitempty"` BlkioStats BlkioStats `json:"blkio_stats,omitempty"` + // the map is in the format "size of hugepage: stats of the hugepage" + HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` } func NewStats() *Stats { memoryStats := MemoryStats{Stats: make(map[string]uint64)} - return &Stats{MemoryStats: memoryStats} + hugetlbStats := make(map[string]HugetlbStats) + return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats} } diff --git a/vendor/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go b/vendor/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go index c2782285e3..fd7f680b50 100644 --- a/vendor/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go +++ b/vendor/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go @@ -20,6 +20,7 @@ import ( ) type Manager struct { + mu sync.Mutex Cgroups *configs.Cgroup Paths map[string]string } @@ -222,6 +223,9 @@ func (m *Manager) Apply(pid int) error { return err } + if err := joinHugetlb(c, pid); err != nil { + return err + } // FIXME: Systemd does have `BlockIODeviceWeight` property, but we got problem // using that (at least on systemd 208, see https://github.com/docker/libcontainer/pull/354), // so use fs work around for now. @@ -253,11 +257,21 @@ func (m *Manager) Apply(pid int) error { } func (m *Manager) Destroy() error { - return cgroups.RemovePaths(m.Paths) + m.mu.Lock() + defer m.mu.Unlock() + theConn.StopUnit(getUnitName(m.Cgroups), "replace") + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil } func (m *Manager) GetPaths() map[string]string { - return m.Paths + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths } func writeFile(dir, file, data string) error { @@ -391,6 +405,8 @@ func (m *Manager) GetPids() ([]int, error) { } func (m *Manager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() stats := cgroups.NewStats() for name, path := range m.Paths { sys, ok := subsystems[name] @@ -453,14 +469,8 @@ func joinMemory(c *configs.Cgroup, pid int) error { } // -1 disables memoryswap - if c.Memory != 0 && c.MemorySwap >= 0 { - memorySwap := c.MemorySwap - - if memorySwap == 0 { - // By default, MemorySwap is set to twice the size of RAM. - memorySwap = c.Memory * 2 - } - err = writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(memorySwap, 10)) + if c.MemorySwap > 0 { + err = writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.MemorySwap, 10)) if err != nil { return err } @@ -472,6 +482,12 @@ func joinMemory(c *configs.Cgroup, pid int) error { return err } } + if c.MemorySwappiness >= 0 && c.MemorySwappiness <= 100 { + err = writeFile(path, "memory.swappiness", strconv.FormatInt(c.MemorySwappiness, 10)) + if err != nil { + return err + } + } return nil } @@ -526,3 +542,13 @@ func joinBlkio(c *configs.Cgroup, pid int) error { return nil } + +func joinHugetlb(c *configs.Cgroup, pid int) error { + path, err := join(c, "hugetlb", pid) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + hugetlb := subsystems["hugetlb"] + return hugetlb.Set(path, c) +} diff --git a/vendor/src/github.com/docker/libcontainer/cgroups/utils.go b/vendor/src/github.com/docker/libcontainer/cgroups/utils.go index 5486883996..8ab80a7f2c 100644 --- a/vendor/src/github.com/docker/libcontainer/cgroups/utils.go +++ b/vendor/src/github.com/docker/libcontainer/cgroups/utils.go @@ -14,24 +14,28 @@ import ( "time" "github.com/docker/docker/pkg/mount" + "github.com/docker/docker/pkg/units" ) // https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt func FindCgroupMountpoint(subsystem string) (string, error) { - mounts, err := mount.GetMounts() + f, err := os.Open("/proc/self/mountinfo") if err != nil { return "", err } - - for _, mount := range mounts { - if mount.Fstype == "cgroup" { - for _, opt := range strings.Split(mount.VfsOpts, ",") { - if opt == subsystem { - return mount.Mountpoint, nil - } + scanner := bufio.NewScanner(f) + for scanner.Scan() { + txt := scanner.Text() + fields := strings.Split(txt, " ") + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if opt == subsystem { + return fields[4], nil } } } + if err := scanner.Err(); err != nil { + return "", err + } return "", NewNotFoundError(subsystem) } @@ -238,3 +242,23 @@ func RemovePaths(paths map[string]string) (err error) { } return fmt.Errorf("Failed to remove paths: %s", paths) } + +func GetHugePageSize() ([]string, error) { + var pageSizes []string + sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"} + files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages") + if err != nil { + return pageSizes, err + } + for _, st := range files { + nameArray := strings.Split(st.Name(), "-") + pageSize, err := units.RAMInBytes(nameArray[1]) + if err != nil { + return []string{}, err + } + sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList) + pageSizes = append(pageSizes, sizeString) + } + + return pageSizes, nil +} diff --git a/vendor/src/github.com/docker/libcontainer/configs/cgroup.go b/vendor/src/github.com/docker/libcontainer/configs/cgroup.go index 55a81ded2f..140b530d66 100644 --- a/vendor/src/github.com/docker/libcontainer/configs/cgroup.go +++ b/vendor/src/github.com/docker/libcontainer/configs/cgroup.go @@ -78,12 +78,18 @@ type Cgroup struct { // set the freeze value for the process Freezer FreezerState `json:"freezer"` + // Hugetlb limit (in bytes) + HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"` + // Parent slice to use for systemd TODO: remove in favor or parent Slice string `json:"slice"` // Whether to disable OOM Killer OomKillDisable bool `json:"oom_kill_disable"` + // Tuning swappiness behaviour per cgroup + MemorySwappiness int64 `json:"memory_swappiness"` + // Set priority of network traffic for container NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"` diff --git a/vendor/src/github.com/docker/libcontainer/configs/config.go b/vendor/src/github.com/docker/libcontainer/configs/config.go index 293af0a9b2..04ea91ffd0 100644 --- a/vendor/src/github.com/docker/libcontainer/configs/config.go +++ b/vendor/src/github.com/docker/libcontainer/configs/config.go @@ -13,6 +13,40 @@ type IDMap struct { Size int `json:"size"` } +type Seccomp struct { + Syscalls []*Syscall `json:"syscalls"` +} + +type Action int + +const ( + Kill Action = iota - 3 + Trap + Allow +) + +type Operator int + +const ( + EqualTo Operator = iota + NotEqualTo + GreatherThan + LessThan + MaskEqualTo +) + +type Arg struct { + Index int `json:"index"` + Value uint32 `json:"value"` + Op Operator `json:"op"` +} + +type Syscall struct { + Value int `json:"value"` + Action Action `json:"action"` + Args []*Arg `json:"args"` +} + // TODO Windows. Many of these fields should be factored out into those parts // which are common across platforms, and those which are platform specific. @@ -85,7 +119,7 @@ type Config struct { // AdditionalGroups specifies the gids that should be added to supplementary groups // in addition to those that the user belongs to. - AdditionalGroups []int `json:"additional_groups"` + AdditionalGroups []string `json:"additional_groups"` // UidMappings is an array of User ID mappings for User Namespaces UidMappings []IDMap `json:"uid_mappings"` @@ -104,4 +138,9 @@ type Config struct { // SystemProperties is a map of properties and their values. It is the equivalent of using // sysctl -w my.property.name value in Linux. SystemProperties map[string]string `json:"system_properties"` + + // Seccomp allows actions to be taken whenever a syscall is made within the container. + // By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno + // can be specified on a per syscall basis. + Seccomp *Seccomp `json:"seccomp"` } diff --git a/vendor/src/github.com/docker/libcontainer/configs/config_linux.go b/vendor/src/github.com/docker/libcontainer/configs/config_unix.go similarity index 98% rename from vendor/src/github.com/docker/libcontainer/configs/config_linux.go rename to vendor/src/github.com/docker/libcontainer/configs/config_unix.go index 97544b2abc..89f580bfa3 100644 --- a/vendor/src/github.com/docker/libcontainer/configs/config_linux.go +++ b/vendor/src/github.com/docker/libcontainer/configs/config_unix.go @@ -1,3 +1,5 @@ +// +build freebsd linux + package configs import "fmt" diff --git a/vendor/src/github.com/docker/libcontainer/configs/device_defaults.go b/vendor/src/github.com/docker/libcontainer/configs/device_defaults.go index b0966b9754..0ce040fd34 100644 --- a/vendor/src/github.com/docker/libcontainer/configs/device_defaults.go +++ b/vendor/src/github.com/docker/libcontainer/configs/device_defaults.go @@ -1,4 +1,4 @@ -// +build linux +// +build linux freebsd package configs diff --git a/vendor/src/github.com/docker/libcontainer/configs/hugepage_limit.go b/vendor/src/github.com/docker/libcontainer/configs/hugepage_limit.go new file mode 100644 index 0000000000..1cce8d09be --- /dev/null +++ b/vendor/src/github.com/docker/libcontainer/configs/hugepage_limit.go @@ -0,0 +1,9 @@ +package configs + +type HugepageLimit struct { + // which type of hugepage to limit. + Pagesize string `json:"page_size"` + + // usage limit for hugepage. + Limit int `json:"limit"` +} diff --git a/vendor/src/github.com/docker/libcontainer/configs/namespaces_linux.go b/vendor/src/github.com/docker/libcontainer/configs/namespaces_unix.go similarity index 98% rename from vendor/src/github.com/docker/libcontainer/configs/namespaces_linux.go rename to vendor/src/github.com/docker/libcontainer/configs/namespaces_unix.go index c937b49ff4..7bc9085468 100644 --- a/vendor/src/github.com/docker/libcontainer/configs/namespaces_linux.go +++ b/vendor/src/github.com/docker/libcontainer/configs/namespaces_unix.go @@ -1,4 +1,4 @@ -// +build linux +// +build linux freebsd package configs diff --git a/vendor/src/github.com/docker/libcontainer/console_freebsd.go b/vendor/src/github.com/docker/libcontainer/console_freebsd.go new file mode 100644 index 0000000000..4d20b8da40 --- /dev/null +++ b/vendor/src/github.com/docker/libcontainer/console_freebsd.go @@ -0,0 +1,13 @@ +// +build freebsd + +package libcontainer + +import ( + "errors" +) + +// newConsole returns an initalized console that can be used within a container by copying bytes +// from the master side to the slave that is attached as the tty for the container's init process. +func newConsole(uid, gid int) (Console, error) { + return nil, errors.New("libcontainer console is not supported on FreeBSD") +} diff --git a/vendor/src/github.com/docker/libcontainer/console_linux.go b/vendor/src/github.com/docker/libcontainer/console_linux.go index d5fec6fd6e..e35ac529db 100644 --- a/vendor/src/github.com/docker/libcontainer/console_linux.go +++ b/vendor/src/github.com/docker/libcontainer/console_linux.go @@ -44,7 +44,7 @@ func newConsoleFromPath(slavePath string) *linuxConsole { } } -// linuxConsole is a linux pseudo TTY for use within a container. +// linuxConsole is a linux psuedo TTY for use within a container. type linuxConsole struct { master *os.File slavePath string @@ -92,7 +92,7 @@ func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error { return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "") } -// dupStdio opens the slavePath for the console and dup2s the fds to the current +// dupStdio opens the slavePath for the console and dups the fds to the current // processes stdio, fd 0,1,2. func (c *linuxConsole) dupStdio() error { slave, err := c.open(syscall.O_RDWR) @@ -101,7 +101,7 @@ func (c *linuxConsole) dupStdio() error { } fd := int(slave.Fd()) for _, i := range []int{0, 1, 2} { - if err := syscall.Dup2(fd, i); err != nil { + if err := syscall.Dup3(fd, i, 0); err != nil { return err } } diff --git a/vendor/src/github.com/docker/libcontainer/devices/devices_linux.go b/vendor/src/github.com/docker/libcontainer/devices/devices_unix.go similarity index 98% rename from vendor/src/github.com/docker/libcontainer/devices/devices_linux.go rename to vendor/src/github.com/docker/libcontainer/devices/devices_unix.go index 7a11eaf11b..a4df06c3a8 100644 --- a/vendor/src/github.com/docker/libcontainer/devices/devices_linux.go +++ b/vendor/src/github.com/docker/libcontainer/devices/devices_unix.go @@ -1,3 +1,5 @@ +// +build linux freebsd + package devices import ( diff --git a/vendor/src/github.com/docker/libcontainer/devices/number.go b/vendor/src/github.com/docker/libcontainer/devices/number.go index e9c3e516a1..885b6e5dd9 100644 --- a/vendor/src/github.com/docker/libcontainer/devices/number.go +++ b/vendor/src/github.com/docker/libcontainer/devices/number.go @@ -1,4 +1,4 @@ -// +build linux +// +build linux freebsd package devices diff --git a/vendor/src/github.com/docker/libcontainer/init_linux.go b/vendor/src/github.com/docker/libcontainer/init_linux.go index 1771fd1930..f36e354f2e 100644 --- a/vendor/src/github.com/docker/libcontainer/init_linux.go +++ b/vendor/src/github.com/docker/libcontainer/init_linux.go @@ -13,6 +13,7 @@ import ( "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/netlink" + "github.com/docker/libcontainer/seccomp" "github.com/docker/libcontainer/system" "github.com/docker/libcontainer/user" "github.com/docker/libcontainer/utils" @@ -176,10 +177,20 @@ func setupUser(config *initConfig) error { if err != nil { return err } - suppGroups := append(execUser.Sgids, config.Config.AdditionalGroups...) + + var addGroups []int + if len(config.Config.AdditionalGroups) > 0 { + addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath) + if err != nil { + return err + } + } + + suppGroups := append(execUser.Sgids, addGroups...) if err := syscall.Setgroups(suppGroups); err != nil { return err } + if err := system.Setgid(execUser.Gid); err != nil { return err } @@ -259,3 +270,61 @@ func killCgroupProcesses(m cgroups.Manager) error { } return nil } + +func finalizeSeccomp(config *initConfig) error { + if config.Config.Seccomp == nil { + return nil + } + context := seccomp.New() + for _, s := range config.Config.Seccomp.Syscalls { + ss := &seccomp.Syscall{ + Value: uint32(s.Value), + Action: seccompAction(s.Action), + } + if len(s.Args) > 0 { + ss.Args = seccompArgs(s.Args) + } + context.Add(ss) + } + return context.Load() +} + +func seccompAction(a configs.Action) seccomp.Action { + switch a { + case configs.Kill: + return seccomp.Kill + case configs.Trap: + return seccomp.Trap + case configs.Allow: + return seccomp.Allow + } + return seccomp.Error(syscall.Errno(int(a))) +} + +func seccompArgs(args []*configs.Arg) seccomp.Args { + var sa []seccomp.Arg + for _, a := range args { + sa = append(sa, seccomp.Arg{ + Index: uint32(a.Index), + Op: seccompOperator(a.Op), + Value: uint(a.Value), + }) + } + return seccomp.Args{sa} +} + +func seccompOperator(o configs.Operator) seccomp.Operator { + switch o { + case configs.EqualTo: + return seccomp.EqualTo + case configs.NotEqualTo: + return seccomp.NotEqualTo + case configs.GreatherThan: + return seccomp.GreatherThan + case configs.LessThan: + return seccomp.LessThan + case configs.MaskEqualTo: + return seccomp.MaskEqualTo + } + return 0 +} diff --git a/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_arm.go b/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_armppc64.go similarity index 70% rename from vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_arm.go rename to vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_armppc64.go index 779e58a771..965e0bfbc7 100644 --- a/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_arm.go +++ b/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_armppc64.go @@ -1,3 +1,5 @@ +// +build arm ppc64 ppc64le + package netlink func ifrDataByte(b byte) uint8 { diff --git a/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_notarm.go b/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_notarm.go index f151722a1b..7446279892 100644 --- a/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_notarm.go +++ b/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_notarm.go @@ -1,4 +1,4 @@ -// +build !arm +// +build !arm,!ppc64,!ppc64le package netlink diff --git a/vendor/src/github.com/docker/libcontainer/nsenter/README.md b/vendor/src/github.com/docker/libcontainer/nsenter/README.md index 7da6dbe9a2..d1a60ef985 100644 --- a/vendor/src/github.com/docker/libcontainer/nsenter/README.md +++ b/vendor/src/github.com/docker/libcontainer/nsenter/README.md @@ -18,7 +18,7 @@ which will give the process of the container that should be joined. Namespaces f be found from `/proc/[pid]/ns` and set by `setns` syscall. And then get the pipe number from `_LIBCONTAINER_INITPIPE`, error message could -be transferred through it. If tty is added, `_LIBCONTAINER_CONSOLE_PATH` will +be transfered through it. If tty is added, `_LIBCONTAINER_CONSOLE_PATH` will have value and start a console for output. Finally, `nsexec()` will clone a child process , exit the parent process and let diff --git a/vendor/src/github.com/docker/libcontainer/nsenter/nsexec.c b/vendor/src/github.com/docker/libcontainer/nsenter/nsexec.c index d8e45f3cda..d78e1691c6 100644 --- a/vendor/src/github.com/docker/libcontainer/nsenter/nsexec.c +++ b/vendor/src/github.com/docker/libcontainer/nsenter/nsexec.c @@ -148,15 +148,15 @@ void nsexec() pr_perror("ioctl TIOCSCTTY failed"); exit(1); } - if (dup2(consolefd, STDIN_FILENO) != STDIN_FILENO) { + if (dup3(consolefd, STDIN_FILENO, 0) != STDIN_FILENO) { pr_perror("Failed to dup 0"); exit(1); } - if (dup2(consolefd, STDOUT_FILENO) != STDOUT_FILENO) { + if (dup3(consolefd, STDOUT_FILENO, 0) != STDOUT_FILENO) { pr_perror("Failed to dup 1"); exit(1); } - if (dup2(consolefd, STDERR_FILENO) != STDERR_FILENO) { + if (dup3(consolefd, STDERR_FILENO, 0) != STDERR_FILENO) { pr_perror("Failed to dup 2"); exit(1); } diff --git a/vendor/src/github.com/docker/libcontainer/rootfs_linux.go b/vendor/src/github.com/docker/libcontainer/rootfs_linux.go index 4ddfff1fe2..0b0c3815cb 100644 --- a/vendor/src/github.com/docker/libcontainer/rootfs_linux.go +++ b/vendor/src/github.com/docker/libcontainer/rootfs_linux.go @@ -272,7 +272,7 @@ func reOpenDevNull(rootfs string) error { } if stat.Rdev == devNullStat.Rdev { // Close and re-open the fd. - if err := syscall.Dup2(int(file.Fd()), fd); err != nil { + if err := syscall.Dup3(int(file.Fd()), fd, 0); err != nil { return err } } diff --git a/vendor/src/github.com/docker/libcontainer/seccomp/bpf.go b/vendor/src/github.com/docker/libcontainer/seccomp/bpf.go new file mode 100644 index 0000000000..a4b3bdf7a5 --- /dev/null +++ b/vendor/src/github.com/docker/libcontainer/seccomp/bpf.go @@ -0,0 +1,32 @@ +package seccomp + +import "strings" + +type bpfLabel struct { + label string + location uint32 +} + +type bpfLabels []bpfLabel + +// labelIndex returns the index for the label if it exists in the slice. +// if it does not exist in the slice it appends the label lb to the end +// of the slice and returns the index. +func labelIndex(labels *bpfLabels, lb string) uint32 { + var id uint32 + for id = 0; id < uint32(len(*labels)); id++ { + if strings.EqualFold(lb, (*labels)[id].label) { + return id + } + } + *labels = append(*labels, bpfLabel{lb, 0xffffffff}) + return id +} + +func scmpBpfStmt(code uint16, k uint32) sockFilter { + return sockFilter{code, 0, 0, k} +} + +func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter { + return sockFilter{code, jt, jf, k} +} diff --git a/vendor/src/github.com/docker/libcontainer/seccomp/context.go b/vendor/src/github.com/docker/libcontainer/seccomp/context.go new file mode 100644 index 0000000000..c8d4e73144 --- /dev/null +++ b/vendor/src/github.com/docker/libcontainer/seccomp/context.go @@ -0,0 +1,144 @@ +package seccomp + +import ( + "errors" + "syscall" +) + +const labelTemplate = "lb-%d-%d" + +// Action is the type of action that will be taken when a +// syscall is performed. +type Action int + +const ( + Kill Action = iota - 3 // Kill the calling process of the syscall. + Trap // Trap and coredump the calling process of the syscall. + Allow // Allow the syscall to be completed. +) + +// Syscall is the specified syscall, action, and any type of arguments +// to filter on. +type Syscall struct { + // Value is the syscall number. + Value uint32 + // Action is the action to perform when the specified syscall is made. + Action Action + // Args are filters that can be specified on the arguments to the syscall. + Args Args +} + +func (s *Syscall) scmpAction() uint32 { + switch s.Action { + case Allow: + return retAllow + case Trap: + return retTrap + case Kill: + return retKill + } + return actionErrno(uint32(s.Action)) +} + +// Arg represents an argument to the syscall with the argument's index, +// the operator to apply when matching, and the argument's value at that time. +type Arg struct { + Index uint32 // index of args which start from zero + Op Operator // operation, such as EQ/NE/GE/LE + Value uint // the value of arg +} + +type Args [][]Arg + +var ( + ErrUnresolvedLabel = errors.New("seccomp: unresolved label") + ErrDuplicateLabel = errors.New("seccomp: duplicate label use") + ErrUnsupportedOperation = errors.New("seccomp: unsupported operation for argument") +) + +// Error returns an Action that will be used to send the calling +// process the specified errno when the syscall is made. +func Error(code syscall.Errno) Action { + return Action(code) +} + +// New returns a new syscall context for use. +func New() *Context { + return &Context{ + syscalls: make(map[uint32]*Syscall), + } +} + +// Context holds syscalls for the current process to limit the type of +// actions the calling process can make. +type Context struct { + syscalls map[uint32]*Syscall +} + +// Add will add the specified syscall, action, and arguments to the seccomp +// Context. +func (c *Context) Add(s *Syscall) { + c.syscalls[s.Value] = s +} + +// Remove removes the specified syscall configuration from the Context. +func (c *Context) Remove(call uint32) { + delete(c.syscalls, call) +} + +// Load will apply the Context to the calling process makeing any secccomp process changes +// apply after the context is loaded. +func (c *Context) Load() error { + filter, err := c.newFilter() + if err != nil { + return err + } + if err := prctl(prSetNoNewPrivileges, 1, 0, 0, 0); err != nil { + return err + } + prog := newSockFprog(filter) + return prog.set() +} + +func (c *Context) newFilter() ([]sockFilter, error) { + var ( + labels bpfLabels + f = newFilter() + ) + for _, s := range c.syscalls { + f.addSyscall(s, &labels) + } + f.allow() + // process args for the syscalls + for _, s := range c.syscalls { + if err := f.addArguments(s, &labels); err != nil { + return nil, err + } + } + // apply labels for arguments + idx := int32(len(*f) - 1) + for ; idx >= 0; idx-- { + lf := &(*f)[idx] + if lf.code != (syscall.BPF_JMP + syscall.BPF_JA) { + continue + } + rel := int32(lf.jt)<<8 | int32(lf.jf) + if ((jumpJT << 8) | jumpJF) == rel { + if labels[lf.k].location == 0xffffffff { + return nil, ErrUnresolvedLabel + } + lf.k = labels[lf.k].location - uint32(idx+1) + lf.jt = 0 + lf.jf = 0 + } else if ((labelJT << 8) | labelJF) == rel { + if labels[lf.k].location != 0xffffffff { + return nil, ErrDuplicateLabel + } + labels[lf.k].location = uint32(idx) + lf.k = 0 + lf.jt = 0 + lf.jf = 0 + } + } + return *f, nil +} diff --git a/vendor/src/github.com/docker/libcontainer/seccomp/filter.go b/vendor/src/github.com/docker/libcontainer/seccomp/filter.go new file mode 100644 index 0000000000..370cdf087e --- /dev/null +++ b/vendor/src/github.com/docker/libcontainer/seccomp/filter.go @@ -0,0 +1,116 @@ +package seccomp + +import ( + "fmt" + "syscall" + "unsafe" +) + +type sockFilter struct { + code uint16 + jt uint8 + jf uint8 + k uint32 +} + +func newFilter() *filter { + var f filter + f = append(f, sockFilter{ + pfLD + syscall.BPF_W + syscall.BPF_ABS, + 0, + 0, + uint32(unsafe.Offsetof(secData.nr)), + }) + return &f +} + +type filter []sockFilter + +func (f *filter) addSyscall(s *Syscall, labels *bpfLabels) { + if len(s.Args) == 0 { + f.call(s.Value, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction())) + } else { + if len(s.Args[0]) > 0 { + lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[0][0].Index) + f.call(s.Value, + scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), + jumpJT, jumpJF)) + } + } +} + +func (f *filter) addArguments(s *Syscall, labels *bpfLabels) error { + for i := 0; len(s.Args) > i; i++ { + if len(s.Args[i]) > 0 { + lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[i][0].Index) + f.label(labels, lb) + f.arg(s.Args[i][0].Index) + } + for j := 0; j < len(s.Args[i]); j++ { + var jf sockFilter + if len(s.Args)-1 > i && len(s.Args[i+1]) > 0 { + lbj := fmt.Sprintf(labelTemplate, s.Value, s.Args[i+1][0].Index) + jf = scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, + labelIndex(labels, lbj), jumpJT, jumpJF) + } else { + jf = scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction()) + } + if err := f.op(s.Args[i][j].Op, s.Args[i][j].Value, jf); err != nil { + return err + } + } + f.allow() + } + return nil +} + +func (f *filter) label(labels *bpfLabels, lb string) { + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), labelJT, labelJF)) +} + +func (f *filter) call(nr uint32, jt sockFilter) { + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, nr, 0, 1)) + *f = append(*f, jt) +} + +func (f *filter) allow() { + *f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retAllow)) +} + +func (f *filter) deny() { + *f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retTrap)) +} + +func (f *filter) arg(index uint32) { + arg(f, index) +} + +func (f *filter) op(operation Operator, v uint, jf sockFilter) error { + switch operation { + case EqualTo: + jumpEqualTo(f, v, jf) + case NotEqualTo: + jumpNotEqualTo(f, v, jf) + case GreatherThan: + jumpGreaterThan(f, v, jf) + case LessThan: + jumpLessThan(f, v, jf) + case MaskEqualTo: + jumpMaskEqualTo(f, v, jf) + default: + return ErrUnsupportedOperation + } + return nil +} + +func arg(f *filter, idx uint32) { + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.low(idx))) + *f = append(*f, scmpBpfStmt(syscall.BPF_ST, 0)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.hi(idx))) + *f = append(*f, scmpBpfStmt(syscall.BPF_ST, 1)) +} + +func jump(f *filter, labels *bpfLabels, lb string) { + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), + jumpJT, jumpJF)) +} diff --git a/vendor/src/github.com/docker/libcontainer/seccomp/jump_amd64.go b/vendor/src/github.com/docker/libcontainer/seccomp/jump_amd64.go new file mode 100644 index 0000000000..f0d07716a4 --- /dev/null +++ b/vendor/src/github.com/docker/libcontainer/seccomp/jump_amd64.go @@ -0,0 +1,68 @@ +// +build linux,amd64 + +package seccomp + +// Using BPF filters +// +// ref: http://www.gsp.com/cgi-bin/man.cgi?topic=bpf +import "syscall" + +func jumpGreaterThan(f *filter, v uint, jt sockFilter) { + lo := uint32(uint64(v) % 0x100000000) + hi := uint32(uint64(v) / 0x100000000) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 4, 0)) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGE+syscall.BPF_K, (lo), 0, 2)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) + *f = append(*f, jt) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) +} + +func jumpEqualTo(f *filter, v uint, jt sockFilter) { + lo := uint32(uint64(v) % 0x100000000) + hi := uint32(uint64(v) / 0x100000000) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (lo), 0, 2)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) + *f = append(*f, jt) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) +} + +func jumpLessThan(f *filter, v uint, jt sockFilter) { + lo := uint32(uint64(v) % 0x100000000) + hi := uint32(uint64(v) / 0x100000000) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 6, 0)) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 3)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (lo), 2, 0)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) + *f = append(*f, jt) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) +} + +func jumpNotEqualTo(f *filter, v uint, jt sockFilter) { + lo := uint32(uint64(v) % 0x100000000) + hi := uint32(uint64(v) / 0x100000000) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 5, 0)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 2, 0)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) + *f = append(*f, jt) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) +} + +// this checks for a value inside a mask. The evalusation is equal to doing +// CLONE_NEWUSER & syscallMask == CLONE_NEWUSER +func jumpMaskEqualTo(f *filter, v uint, jt sockFilter) { + lo := uint32(uint64(v) % 0x100000000) + hi := uint32(uint64(v) / 0x100000000) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 0, 6)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) + *f = append(*f, scmpBpfStmt(syscall.BPF_ALU+syscall.BPF_AND, uint32(v))) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 0, 2)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) + *f = append(*f, jt) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) +} diff --git a/vendor/src/github.com/docker/libcontainer/seccomp/seccomp.go b/vendor/src/github.com/docker/libcontainer/seccomp/seccomp.go new file mode 100644 index 0000000000..78d7d85334 --- /dev/null +++ b/vendor/src/github.com/docker/libcontainer/seccomp/seccomp.go @@ -0,0 +1,122 @@ +// Package seccomp provides native seccomp ( https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt ) support for go. +package seccomp + +import ( + "syscall" + "unsafe" +) + +// Operator that is used for argument comparison. +type Operator int + +const ( + EqualTo Operator = iota + NotEqualTo + GreatherThan + LessThan + MaskEqualTo +) + +const ( + jumpJT = 0xff + jumpJF = 0xff + labelJT = 0xfe + labelJF = 0xfe +) + +const ( + pfLD = 0x0 + retKill = 0x00000000 + retTrap = 0x00030000 + retAllow = 0x7fff0000 + modeFilter = 0x2 + prSetNoNewPrivileges = 0x26 +) + +func actionErrno(errno uint32) uint32 { + return 0x00050000 | (errno & 0x0000ffff) +} + +var ( + secData = struct { + nr int32 + arch uint32 + insPointer uint64 + args [6]uint64 + }{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}} +) + +var isLittle = func() bool { + var ( + x = 0x1234 + p = unsafe.Pointer(&x) + p2 = (*[unsafe.Sizeof(0)]byte)(p) + ) + if p2[0] == 0 { + return false + } + return true +}() + +var endian endianSupport + +type endianSupport struct { +} + +func (e endianSupport) hi(i uint32) uint32 { + if isLittle { + return e.little(i) + } + return e.big(i) +} + +func (e endianSupport) low(i uint32) uint32 { + if isLittle { + return e.big(i) + } + return e.little(i) +} + +func (endianSupport) big(idx uint32) uint32 { + if idx >= 6 { + return 0 + } + return uint32(unsafe.Offsetof(secData.args)) + 8*idx +} + +func (endianSupport) little(idx uint32) uint32 { + if idx < 0 || idx >= 6 { + return 0 + } + return uint32(unsafe.Offsetof(secData.args)) + + uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch)) +} + +func prctl(option int, arg2, arg3, arg4, arg5 uintptr) error { + _, _, err := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0) + if err != 0 { + return err + } + return nil +} + +func newSockFprog(filter []sockFilter) *sockFprog { + return &sockFprog{ + len: uint16(len(filter)), + filt: filter, + } +} + +type sockFprog struct { + len uint16 + filt []sockFilter +} + +func (s *sockFprog) set() error { + _, _, err := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP), + uintptr(modeFilter), uintptr(unsafe.Pointer(s))) + if err != 0 { + return err + } + return nil +} diff --git a/vendor/src/github.com/docker/libcontainer/standard_init_linux.go b/vendor/src/github.com/docker/libcontainer/standard_init_linux.go index 251c09f696..445c1fa29c 100644 --- a/vendor/src/github.com/docker/libcontainer/standard_init_linux.go +++ b/vendor/src/github.com/docker/libcontainer/standard_init_linux.go @@ -99,5 +99,8 @@ func (l *linuxStandardInit) Init() error { if syscall.Getppid() != l.parentPid { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } + if err := finalizeSeccomp(l.config); err != nil { + return err + } return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) } diff --git a/vendor/src/github.com/docker/libcontainer/stats_freebsd.go b/vendor/src/github.com/docker/libcontainer/stats_freebsd.go new file mode 100644 index 0000000000..f8d1d689ce --- /dev/null +++ b/vendor/src/github.com/docker/libcontainer/stats_freebsd.go @@ -0,0 +1,5 @@ +package libcontainer + +type Stats struct { + Interfaces []*NetworkInterface +} diff --git a/vendor/src/github.com/docker/libcontainer/system/setns_linux.go b/vendor/src/github.com/docker/libcontainer/system/setns_linux.go index a3c4cbb273..615ff4c827 100644 --- a/vendor/src/github.com/docker/libcontainer/system/setns_linux.go +++ b/vendor/src/github.com/docker/libcontainer/system/setns_linux.go @@ -21,16 +21,20 @@ var setNsMap = map[string]uintptr{ "linux/s390x": 339, } +var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] + +func SysSetns() uint32 { + return uint32(sysSetns) +} + func Setns(fd uintptr, flags uintptr) error { ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] if !exists { return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH) } - _, _, err := syscall.RawSyscall(ns, fd, flags, 0) if err != 0 { return err } - return nil } diff --git a/vendor/src/github.com/docker/libcontainer/system/sysconfig.go b/vendor/src/github.com/docker/libcontainer/system/sysconfig.go index b8434f1050..b3a07cba3e 100644 --- a/vendor/src/github.com/docker/libcontainer/system/sysconfig.go +++ b/vendor/src/github.com/docker/libcontainer/system/sysconfig.go @@ -1,4 +1,4 @@ -// +build cgo,linux +// +build cgo,linux cgo,freebsd package system diff --git a/vendor/src/github.com/docker/libcontainer/user/user.go b/vendor/src/github.com/docker/libcontainer/user/user.go index d7439f12e3..13226dbfa7 100644 --- a/vendor/src/github.com/docker/libcontainer/user/user.go +++ b/vendor/src/github.com/docker/libcontainer/user/user.go @@ -348,3 +348,60 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) ( return user, nil } + +// GetAdditionalGroupsPath looks up a list of groups by name or group id +// against the group file. If a group name cannot be found, an error will be +// returned. If a group id cannot be found, it will be returned as-is. +func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) { + groupReader, err := os.Open(groupPath) + if err != nil { + return nil, fmt.Errorf("Failed to open group file: %v", err) + } + defer groupReader.Close() + + groups, err := ParseGroupFilter(groupReader, func(g Group) bool { + for _, ag := range additionalGroups { + if g.Name == ag || strconv.Itoa(g.Gid) == ag { + return true + } + } + return false + }) + if err != nil { + return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err) + } + + gidMap := make(map[int]struct{}) + for _, ag := range additionalGroups { + var found bool + for _, g := range groups { + // if we found a matched group either by name or gid, take the + // first matched as correct + if g.Name == ag || strconv.Itoa(g.Gid) == ag { + if _, ok := gidMap[g.Gid]; !ok { + gidMap[g.Gid] = struct{}{} + found = true + break + } + } + } + // we asked for a group but didn't find it. let's check to see + // if we wanted a numeric group + if !found { + gid, err := strconv.Atoi(ag) + if err != nil { + return nil, fmt.Errorf("Unable to find group %s", ag) + } + // Ensure gid is inside gid range. + if gid < minId || gid > maxId { + return nil, ErrRange + } + gidMap[gid] = struct{}{} + } + } + gids := []int{} + for gid := range gidMap { + gids = append(gids, gid) + } + return gids, nil +} diff --git a/vendor/src/github.com/docker/libcontainer/utils/utils.go b/vendor/src/github.com/docker/libcontainer/utils/utils.go index 094bce5300..26a0fb7d09 100644 --- a/vendor/src/github.com/docker/libcontainer/utils/utils.go +++ b/vendor/src/github.com/docker/libcontainer/utils/utils.go @@ -21,6 +21,9 @@ func GenerateRandomName(prefix string, size int) (string, error) { if _, err := io.ReadFull(rand.Reader, id); err != nil { return "", err } + if size > 64 { + size = 64 + } return prefix + hex.EncodeToString(id)[:size], nil } diff --git a/vendor/src/github.com/docker/libtrust/util.go b/vendor/src/github.com/docker/libtrust/util.go index 153f7b462c..4d5a6200a8 100644 --- a/vendor/src/github.com/docker/libtrust/util.go +++ b/vendor/src/github.com/docker/libtrust/util.go @@ -16,7 +16,7 @@ import ( ) // joseBase64UrlEncode encodes the given data using the standard base64 url -// encoding format but with all trailing '=' characters omitted in accordance +// encoding format but with all trailing '=' characters ommitted in accordance // with the jose specification. // http://tools.ietf.org/html/draft-ietf-jose-json-web-signature-31#section-2 func joseBase64UrlEncode(b []byte) string { diff --git a/vendor/src/github.com/mistifyio/go-zfs/CONTRIBUTING.md b/vendor/src/github.com/mistifyio/go-zfs/CONTRIBUTING.md index 849323a5f1..f1880c19e5 100644 --- a/vendor/src/github.com/mistifyio/go-zfs/CONTRIBUTING.md +++ b/vendor/src/github.com/mistifyio/go-zfs/CONTRIBUTING.md @@ -4,7 +4,7 @@ We always welcome contributions to help make `go-zfs` better. Please take a mome ### Reporting issues ### -We use [GitHub issues](https://github.com/mistifyio/go-zfs/issues) to track bug reports, feature requests, and submitting pull requests. +We use [Github issues](https://github.com/mistifyio/go-zfs/issues) to track bug reports, feature requests, and submitting pull requests. If you find a bug: diff --git a/vendor/src/github.com/mistifyio/go-zfs/README.md b/vendor/src/github.com/mistifyio/go-zfs/README.md index fef80d727b..2515e588e0 100644 --- a/vendor/src/github.com/mistifyio/go-zfs/README.md +++ b/vendor/src/github.com/mistifyio/go-zfs/README.md @@ -29,7 +29,7 @@ The tests have decent examples for most functions. ```go //assuming a zpool named test -//error handling omitted +//error handling ommitted f, err := zfs.CreateFilesystem("test/snapshot-test", nil) diff --git a/vendor/src/github.com/vishvananda/netlink/README.md b/vendor/src/github.com/vishvananda/netlink/README.md index 734384cfcd..555f886523 100644 --- a/vendor/src/github.com/vishvananda/netlink/README.md +++ b/vendor/src/github.com/vishvananda/netlink/README.md @@ -8,7 +8,7 @@ the kernel. It can be used to add and remove interfaces, set ip addresses and routes, and configure ipsec. Netlink communication requires elevated privileges, so in most cases this code needs to be run as root. Since low-level netlink messages are inscrutable at best, the library attempts -to provide an api that is loosely modeled on the CLI provided by iproute2. +to provide an api that is loosely modeled on the CLI provied by iproute2. Actions like `ip link add` will be accomplished via a similarly named function like AddLink(). This library began its life as a fork of the netlink functionality in diff --git a/vendor/src/github.com/vishvananda/netns/README.md b/vendor/src/github.com/vishvananda/netns/README.md index 57d195f2c7..24a4003ae6 100644 --- a/vendor/src/github.com/vishvananda/netns/README.md +++ b/vendor/src/github.com/vishvananda/netns/README.md @@ -38,7 +38,7 @@ func main() { newns, _ := netns.New() defer newns.Close() - // Do something with the network namespace + // Do something with tne network namespace ifaces, _ := net.Interfaces() fmt.Printf("Interfaces: %v\n", ifaces)