Merge pull request #9074 from rhatdan/shm

Allow IPC namespace to be shared between containers or with the host
This commit is contained in:
Michael Crosby 2014-11-14 10:34:00 -08:00
Родитель 7b7af6dbae 497fc8876e
Коммит 07996d82c7
9 изменённых файлов: 298 добавлений и 4 удалений

Просмотреть файл

@ -233,6 +233,18 @@ func populateCommand(c *Container, env []string) error {
return fmt.Errorf("invalid network mode: %s", c.hostConfig.NetworkMode) return fmt.Errorf("invalid network mode: %s", c.hostConfig.NetworkMode)
} }
ipc := &execdriver.Ipc{}
if c.hostConfig.IpcMode.IsContainer() {
ic, err := c.getIpcContainer()
if err != nil {
return err
}
ipc.ContainerID = ic.ID
} else {
ipc.HostIpc = c.hostConfig.IpcMode.IsHost()
}
// Build lists of devices allowed and created within the container. // Build lists of devices allowed and created within the container.
userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices)) userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices))
for i, deviceMapping := range c.hostConfig.Devices { for i, deviceMapping := range c.hostConfig.Devices {
@ -274,6 +286,7 @@ func populateCommand(c *Container, env []string) error {
InitPath: "/.dockerinit", InitPath: "/.dockerinit",
WorkingDir: c.Config.WorkingDir, WorkingDir: c.Config.WorkingDir,
Network: en, Network: en,
Ipc: ipc,
Resources: resources, Resources: resources,
AllowedDevices: allowedDevices, AllowedDevices: allowedDevices,
AutoCreatedDevices: autoCreatedDevices, AutoCreatedDevices: autoCreatedDevices,
@ -1250,10 +1263,25 @@ func (container *Container) GetMountLabel() string {
return container.MountLabel return container.MountLabel
} }
func (container *Container) getIpcContainer() (*Container, error) {
containerID := container.hostConfig.IpcMode.Container()
c := container.daemon.Get(containerID)
if c == nil {
return nil, fmt.Errorf("no such container to join IPC: %s", containerID)
}
if !c.IsRunning() {
return nil, fmt.Errorf("cannot join IPC of a non running container: %s", containerID)
}
return c, nil
}
func (container *Container) getNetworkedContainer() (*Container, error) { func (container *Container) getNetworkedContainer() (*Container, error) {
parts := strings.SplitN(string(container.hostConfig.NetworkMode), ":", 2) parts := strings.SplitN(string(container.hostConfig.NetworkMode), ":", 2)
switch parts[0] { switch parts[0] {
case "container": case "container":
if len(parts) != 2 {
return nil, fmt.Errorf("no container specified to join network")
}
nc := container.daemon.Get(parts[1]) nc := container.daemon.Get(parts[1])
if nc == nil { if nc == nil {
return nil, fmt.Errorf("no such container to join network: %s", parts[1]) return nil, fmt.Errorf("no such container to join network: %s", parts[1])

Просмотреть файл

@ -1,10 +1,13 @@
package daemon package daemon
import ( import (
"fmt"
"github.com/docker/docker/engine" "github.com/docker/docker/engine"
"github.com/docker/docker/graph" "github.com/docker/docker/graph"
"github.com/docker/docker/pkg/parsers" "github.com/docker/docker/pkg/parsers"
"github.com/docker/docker/runconfig" "github.com/docker/docker/runconfig"
"github.com/docker/libcontainer/label"
) )
func (daemon *Daemon) ContainerCreate(job *engine.Job) engine.Status { func (daemon *Daemon) ContainerCreate(job *engine.Job) engine.Status {
@ -80,6 +83,12 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
if warnings, err = daemon.mergeAndVerifyConfig(config, img); err != nil { if warnings, err = daemon.mergeAndVerifyConfig(config, img); err != nil {
return nil, nil, err return nil, nil, err
} }
if hostConfig != nil && config.SecurityOpt == nil {
config.SecurityOpt, err = daemon.GenerateSecurityOpt(hostConfig.IpcMode)
if err != nil {
return nil, nil, err
}
}
if container, err = daemon.newContainer(name, config, img); err != nil { if container, err = daemon.newContainer(name, config, img); err != nil {
return nil, nil, err return nil, nil, err
} }
@ -99,3 +108,20 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
} }
return container, warnings, nil return container, warnings, nil
} }
func (daemon *Daemon) GenerateSecurityOpt(ipcMode runconfig.IpcMode) ([]string, error) {
if ipcMode.IsHost() {
return label.DisableSecOpt(), nil
}
if ipcContainer := ipcMode.Container(); ipcContainer != "" {
c := daemon.Get(ipcContainer)
if c == nil {
return nil, fmt.Errorf("no such container to join IPC: %s", ipcContainer)
}
if !c.IsRunning() {
return nil, fmt.Errorf("cannot join IPC of a non running container: %s", ipcContainer)
}
return label.DupSecOpt(c.ProcessLabel), nil
}
return nil, nil
}

Просмотреть файл

@ -71,6 +71,12 @@ type Network struct {
HostNetworking bool `json:"host_networking"` HostNetworking bool `json:"host_networking"`
} }
// IPC settings of the container
type Ipc struct {
ContainerID string `json:"container_id"` // id of the container to join ipc.
HostIpc bool `json:"host_ipc"`
}
type NetworkInterface struct { type NetworkInterface struct {
Gateway string `json:"gateway"` Gateway string `json:"gateway"`
IPAddress string `json:"ip"` IPAddress string `json:"ip"`
@ -115,6 +121,7 @@ type Command struct {
WorkingDir string `json:"working_dir"` WorkingDir string `json:"working_dir"`
ConfigPath string `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver ConfigPath string `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
Network *Network `json:"network"` Network *Network `json:"network"`
Ipc *Ipc `json:"ipc"`
Resources *Resources `json:"resources"` Resources *Resources `json:"resources"`
Mounts []Mount `json:"mounts"` Mounts []Mount `json:"mounts"`
AllowedDevices []*devices.Device `json:"allowed_devices"` AllowedDevices []*devices.Device `json:"allowed_devices"`

Просмотреть файл

@ -36,6 +36,10 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
container.MountConfig.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != "" container.MountConfig.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
container.RestrictSys = true container.RestrictSys = true
if err := d.createIpc(container, c); err != nil {
return nil, err
}
if err := d.createNetwork(container, c); err != nil { if err := d.createNetwork(container, c); err != nil {
return nil, err return nil, err
} }
@ -124,6 +128,28 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com
return nil return nil
} }
func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command) error {
if c.Ipc.HostIpc {
container.Namespaces["NEWIPC"] = false
return nil
}
if c.Ipc.ContainerID != "" {
d.Lock()
active := d.activeContainers[c.Ipc.ContainerID]
d.Unlock()
if active == nil || active.cmd.Process == nil {
return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
}
cmd := active.cmd
container.IpcNsPath = filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc")
}
return nil
}
func (d *driver) setPrivileged(container *libcontainer.Config) (err error) { func (d *driver) setPrivileged(container *libcontainer.Config) (err error) {
container.Capabilities = capabilities.GetAllCapabilities() container.Capabilities = capabilities.GetAllCapabilities()
container.Cgroups.AllowAllDevices = true container.Cgroups.AllowAllDevices = true

Просмотреть файл

@ -23,6 +23,7 @@ docker-run - Run a command in a new container
[**--expose**[=*[]*]] [**--expose**[=*[]*]]
[**-h**|**--hostname**[=*HOSTNAME*]] [**-h**|**--hostname**[=*HOSTNAME*]]
[**-i**|**--interactive**[=*false*]] [**-i**|**--interactive**[=*false*]]
[**--ipc**[=*[]*]]
[**--security-opt**[=*[]*]] [**--security-opt**[=*[]*]]
[**--link**[=*[]*]] [**--link**[=*[]*]]
[**--lxc-conf**[=*[]*]] [**--lxc-conf**[=*[]*]]
@ -142,6 +143,12 @@ ENTRYPOINT.
**-i**, **--interactive**=*true*|*false* **-i**, **--interactive**=*true*|*false*
When set to true, keep stdin open even if not attached. The default is false. When set to true, keep stdin open even if not attached. The default is false.
**--ipc**=[]
Set the IPC mode for the container
**container**:<*name*|*id*>: reuses another container's IPC stack
**host**: use the host's IPC stack inside the container.
Note: the host mode gives the container full access to local IPC and is therefore considered insecure.
**--security-opt**=*secdriver*:*name*:*value* **--security-opt**=*secdriver*:*name*:*value*
"label:user:USER" : Set the label user for the container "label:user:USER" : Set the label user for the container
"label:role:ROLE" : Set the label role for the container "label:role:ROLE" : Set the label role for the container
@ -183,10 +190,11 @@ and foreground Docker containers.
**--net**="bridge" **--net**="bridge"
Set the Network mode for the container Set the Network mode for the container
'bridge': creates a new network stack for the container on the docker bridge **bridge**: creates a new network stack for the container on the docker bridge
'none': no networking for this container **none**: no networking for this container
'container:<name|id>': reuses another container network stack **container**:<*name*|*id*>: reuses another container's network stack
'host': use the host network stack inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure. **host**: use the host network stack inside the container.
Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.
**--mac-address**=*macaddress* **--mac-address**=*macaddress*
Set the MAC address for the container's Ethernet device: Set the MAC address for the container's Ethernet device:
@ -310,6 +318,71 @@ youd like to connect instead, as in:
# docker run -a stdin -a stdout -i -t fedora /bin/bash # docker run -a stdin -a stdout -i -t fedora /bin/bash
## Sharing IPC between containers
Using shm_server.c available here: http://www.cs.cf.ac.uk/Dave/C/node27.html
Testing `--ipc=host` mode:
Host shows a shared memory segment with 7 pids attached, happens to be from httpd:
```
$ sudo ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
0x01128e25 0 root 600 1000 7
```
Now run a regular container, and it correctly does NOT see the shared memory segment from the host:
```
$ sudo docker run -it shm ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
```
Run a container with the new `--ipc=host` option, and it now sees the shared memory segment from the host httpd:
```
$ sudo docker run -it --ipc=host shm ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
0x01128e25 0 root 600 1000 7
```
Testing `--ipc=container:CONTAINERID` mode:
Start a container with a program to create a shared memory segment:
```
sudo docker run -it shm bash
$ sudo shm/shm_server &
$ sudo ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
0x0000162e 0 root 666 27 1
```
Create a 2nd container correctly shows no shared memory segment from 1st container:
```
$ sudo docker run shm ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
```
Create a 3rd container using the new --ipc=container:CONTAINERID option, now it shows the shared memory segment from the first:
```
$ sudo docker run -it --ipc=container:ed735b2264ac shm ipcs -m
$ sudo ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
0x0000162e 0 root 666 27 1
```
## Linking Containers ## Linking Containers
The link feature allows multiple containers to communicate with each other. For The link feature allows multiple containers to communicate with each other. For

Просмотреть файл

@ -50,6 +50,7 @@ following options.
- [Container Identification](#container-identification) - [Container Identification](#container-identification)
- [Name (--name)](#name-name) - [Name (--name)](#name-name)
- [PID Equivalent](#pid-equivalent) - [PID Equivalent](#pid-equivalent)
- [IPC Settings](#ipc-settings)
- [Network Settings](#network-settings) - [Network Settings](#network-settings)
- [Clean Up (--rm)](#clean-up-rm) - [Clean Up (--rm)](#clean-up-rm)
- [Runtime Constraints on CPU and Memory](#runtime-constraints-on-cpu-and-memory) - [Runtime Constraints on CPU and Memory](#runtime-constraints-on-cpu-and-memory)
@ -131,6 +132,22 @@ While not strictly a means of identifying a container, you can specify a version
image you'd like to run the container with by adding `image[:tag]` to the command. For image you'd like to run the container with by adding `image[:tag]` to the command. For
example, `docker run ubuntu:14.04`. example, `docker run ubuntu:14.04`.
## IPC Settings
--ipc="" : Set the IPC mode for the container,
'container:<name|id>': reuses another container's IPC namespace
'host': use the host's IPC namespace inside the container
By default, all containers have the IPC namespace enabled
IPC (POSIX/SysV IPC) namespace provides separation of named shared memory segments, semaphores and message queues.
Shared memory segments are used to accelerate inter-process communication at
memory speed, rather than through pipes or through the network stack. Shared
memory is commonly used by databases and custom-built (typically C/OpenMPI,
C++/using boost libraries) high performance applications for scientific
computing and financial services industries. If these types of applications
are broken into multiple containers, you might need to share the IPC mechanisms
of the containers.
## Network settings ## Network settings
--dns=[] : Set custom dns servers for the container --dns=[] : Set custom dns servers for the container

Просмотреть файл

@ -2568,3 +2568,73 @@ func TestRunUnknownCommand(t *testing.T) {
logDone("run - Unknown Command") logDone("run - Unknown Command")
} }
func TestRunModeIpcHost(t *testing.T) {
hostIpc, err := os.Readlink("/proc/1/ns/ipc")
if err != nil {
t.Fatal(err)
}
cmd := exec.Command(dockerBinary, "run", "--ipc=host", "busybox", "readlink", "/proc/self/ns/ipc")
out2, _, err := runCommandWithOutput(cmd)
if err != nil {
t.Fatal(err, out2)
}
out2 = strings.Trim(out2, "\n")
if hostIpc != out2 {
t.Fatalf("IPC different with --ipc=host %s != %s\n", hostIpc, out2)
}
cmd = exec.Command(dockerBinary, "run", "busybox", "readlink", "/proc/self/ns/ipc")
out2, _, err = runCommandWithOutput(cmd)
if err != nil {
t.Fatal(err, out2)
}
out2 = strings.Trim(out2, "\n")
if hostIpc == out2 {
t.Fatalf("IPC should be different without --ipc=host %s != %s\n", hostIpc, out2)
}
deleteAllContainers()
logDone("run - hostname and several network modes")
}
func TestRunModeIpcContainer(t *testing.T) {
cmd := exec.Command(dockerBinary, "run", "-d", "busybox", "top")
out, _, err := runCommandWithOutput(cmd)
if err != nil {
t.Fatal(err, out)
}
id := strings.TrimSpace(out)
state, err := inspectField(id, "State.Running")
if err != nil {
t.Fatal(err)
}
if state != "true" {
t.Fatal("Container state is 'not running'")
}
pid1, err := inspectField(id, "State.Pid")
if err != nil {
t.Fatal(err)
}
parentContainerIpc, err := os.Readlink(fmt.Sprintf("/proc/%s/ns/ipc", pid1))
if err != nil {
t.Fatal(err)
}
cmd = exec.Command(dockerBinary, "run", fmt.Sprintf("--ipc=container:%s", id), "busybox", "readlink", "/proc/self/ns/ipc")
out2, _, err := runCommandWithOutput(cmd)
if err != nil {
t.Fatal(err, out2)
}
out2 = strings.Trim(out2, "\n")
if parentContainerIpc != out2 {
t.Fatalf("IPC different with --ipc=container:%s %s != %s\n", id, parentContainerIpc, out2)
}
deleteAllContainers()
logDone("run - hostname and several network modes")
}

Просмотреть файл

@ -28,6 +28,44 @@ func (n NetworkMode) IsNone() bool {
return n == "none" return n == "none"
} }
type IpcMode string
// IsPrivate indicates whether container use it's private ipc stack
func (n IpcMode) IsPrivate() bool {
return !(n.IsHost() || n.IsContainer())
}
func (n IpcMode) IsHost() bool {
return n == "host"
}
func (n IpcMode) IsContainer() bool {
parts := strings.SplitN(string(n), ":", 2)
return len(parts) > 1 && parts[0] == "container"
}
func (n IpcMode) Valid() bool {
parts := strings.Split(string(n), ":")
switch mode := parts[0]; mode {
case "", "host":
case "container":
if len(parts) != 2 || parts[1] == "" {
return false
}
default:
return false
}
return true
}
func (n IpcMode) Container() string {
parts := strings.SplitN(string(n), ":", 2)
if len(parts) > 1 {
return parts[1]
}
return ""
}
type DeviceMapping struct { type DeviceMapping struct {
PathOnHost string PathOnHost string
PathInContainer string PathInContainer string
@ -53,6 +91,7 @@ type HostConfig struct {
VolumesFrom []string VolumesFrom []string
Devices []DeviceMapping Devices []DeviceMapping
NetworkMode NetworkMode NetworkMode NetworkMode
IpcMode IpcMode
CapAdd []string CapAdd []string
CapDrop []string CapDrop []string
RestartPolicy RestartPolicy RestartPolicy RestartPolicy
@ -84,6 +123,7 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
Privileged: job.GetenvBool("Privileged"), Privileged: job.GetenvBool("Privileged"),
PublishAllPorts: job.GetenvBool("PublishAllPorts"), PublishAllPorts: job.GetenvBool("PublishAllPorts"),
NetworkMode: NetworkMode(job.Getenv("NetworkMode")), NetworkMode: NetworkMode(job.Getenv("NetworkMode")),
IpcMode: IpcMode(job.Getenv("IpcMode")),
} }
job.GetenvJson("LxcConf", &hostConfig.LxcConf) job.GetenvJson("LxcConf", &hostConfig.LxcConf)

Просмотреть файл

@ -60,6 +60,7 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
flCpuset = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)") flCpuset = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)")
flNetMode = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:<name|id>': reuses another container network stack\n'host': use the host network stack inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.") flNetMode = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:<name|id>': reuses another container network stack\n'host': use the host network stack inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.")
flMacAddress = cmd.String([]string{"-mac-address"}, "", "Container MAC address (e.g. 92:d0:c6:0a:29:33)") flMacAddress = cmd.String([]string{"-mac-address"}, "", "Container MAC address (e.g. 92:d0:c6:0a:29:33)")
flIpcMode = cmd.String([]string{"-ipc"}, "", "Default is to create a private IPC namespace (POSIX SysV IPC) for the container\n'container:<name|id>': reuses another container shared memory, semaphores and message queues\n'host': use the host shared memory,semaphores and message queues inside the container. Note: the host mode gives the container full access to local shared memory and is therefore considered insecure.")
flRestartPolicy = cmd.String([]string{"-restart"}, "", "Restart policy to apply when a container exits (no, on-failure[:max-retry], always)") flRestartPolicy = cmd.String([]string{"-restart"}, "", "Restart policy to apply when a container exits (no, on-failure[:max-retry], always)")
) )
@ -241,6 +242,11 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
// parse the '-e' and '--env' after, to allow override // parse the '-e' and '--env' after, to allow override
envVariables = append(envVariables, flEnv.GetAll()...) envVariables = append(envVariables, flEnv.GetAll()...)
ipcMode := IpcMode(*flIpcMode)
if !ipcMode.Valid() {
return nil, nil, cmd, fmt.Errorf("--ipc: invalid IPC mode: %v", err)
}
netMode, err := parseNetMode(*flNetMode) netMode, err := parseNetMode(*flNetMode)
if err != nil { if err != nil {
return nil, nil, cmd, fmt.Errorf("--net: invalid net mode: %v", err) return nil, nil, cmd, fmt.Errorf("--net: invalid net mode: %v", err)
@ -289,6 +295,7 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
ExtraHosts: flExtraHosts.GetAll(), ExtraHosts: flExtraHosts.GetAll(),
VolumesFrom: flVolumesFrom.GetAll(), VolumesFrom: flVolumesFrom.GetAll(),
NetworkMode: netMode, NetworkMode: netMode,
IpcMode: ipcMode,
Devices: deviceMappings, Devices: deviceMappings,
CapAdd: flCapAdd.GetAll(), CapAdd: flCapAdd.GetAll(),
CapDrop: flCapDrop.GetAll(), CapDrop: flCapDrop.GetAll(),