diff --git a/pkg/libcontainer/README.md b/pkg/libcontainer/README.md index 89a4ec0c48..36553af5bc 100644 --- a/pkg/libcontainer/README.md +++ b/pkg/libcontainer/README.md @@ -45,12 +45,17 @@ Sample `container.json` file: "AUDIT_WRITE", "AUDIT_CONTROL", "MAC_OVERRIDE", - "MAC_ADMIN" + "MAC_ADMIN", + "NET_ADMIN" ], "network": { + "type": "veth", + "context": { + "bridge": "docker0", + "prefix": "dock" + }, "address": "172.17.0.100/16", "gateway": "172.17.42.1", - "bridge": "docker0", "mtu": 1500 }, "cgroups": { diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index 3c1b62b65a..4a47977334 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -4,6 +4,10 @@ import ( "github.com/dotcloud/docker/pkg/cgroups" ) +// Context is a generic key value pair that allows +// arbatrary data to be sent +type Context map[string]string + // Container defines configuration options for how a // container is setup inside a directory and how a process should be executed type Container struct { @@ -24,8 +28,9 @@ type Container struct { // The network configuration can be omited from a container causing the // container to be setup with the host's networking stack type Network struct { - Address string `json:"address,omitempty"` - Gateway string `json:"gateway,omitempty"` - Bridge string `json:"bridge,omitempty"` - Mtu int `json:"mtu,omitempty"` + Type string `json:"type,omitempty"` // type of networking to setup i.e. veth, macvlan, etc + Context Context `json:"context,omitempty"` // generic context for type specific networking options + Address string `json:"address,omitempty"` + Gateway string `json:"gateway,omitempty"` + Mtu int `json:"mtu,omitempty"` } diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index 07e52df428..c2b21f8609 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -28,12 +28,17 @@ "AUDIT_WRITE", "AUDIT_CONTROL", "MAC_OVERRIDE", - "MAC_ADMIN" + "MAC_ADMIN", + "NET_ADMIN" ], "network": { + "type": "veth", + "context": { + "bridge": "docker0", + "prefix": "dock" + }, "address": "172.17.0.100/16", "gateway": "172.17.42.1", - "bridge": "docker0", "mtu": 1500 }, "cgroups": { diff --git a/pkg/libcontainer/network/strategy.go b/pkg/libcontainer/network/strategy.go new file mode 100644 index 0000000000..8ecc11a24d --- /dev/null +++ b/pkg/libcontainer/network/strategy.go @@ -0,0 +1,32 @@ +package network + +import ( + "errors" + "github.com/dotcloud/docker/pkg/libcontainer" +) + +var ( + ErrNotValidStrategyType = errors.New("not a valid network strategy type") +) + +var strategies = map[string]NetworkStrategy{ + "veth": &Veth{}, +} + +// NetworkStrategy represends a specific network configuration for +// a containers networking stack +type NetworkStrategy interface { + Create(*libcontainer.Network, int) (libcontainer.Context, error) + Initialize(*libcontainer.Network, libcontainer.Context) error +} + +// GetStrategy returns the specific network strategy for the +// provided type. If no strategy is registered for the type an +// ErrNotValidStrategyType is returned. +func GetStrategy(tpe string) (NetworkStrategy, error) { + s, exists := strategies[tpe] + if !exists { + return nil, ErrNotValidStrategyType + } + return s, nil +} diff --git a/pkg/libcontainer/network/veth.go b/pkg/libcontainer/network/veth.go new file mode 100644 index 0000000000..61fec5500c --- /dev/null +++ b/pkg/libcontainer/network/veth.go @@ -0,0 +1,103 @@ +package network + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/utils" + "log" +) + +type Veth struct { +} + +func (v *Veth) Create(n *libcontainer.Network, nspid int) (libcontainer.Context, error) { + log.Printf("creating veth network") + var ( + bridge string + prefix string + exists bool + ) + if bridge, exists = n.Context["bridge"]; !exists { + return nil, fmt.Errorf("bridge does not exist in network context") + } + if prefix, exists = n.Context["prefix"]; !exists { + return nil, fmt.Errorf("veth prefix does not exist in network context") + } + name1, name2, err := createVethPair(prefix) + if err != nil { + return nil, err + } + context := libcontainer.Context{ + "vethHost": name1, + "vethChild": name2, + } + log.Printf("veth pair created %s <> %s", name1, name2) + if err := SetInterfaceMaster(name1, bridge); err != nil { + return context, err + } + if err := SetMtu(name1, n.Mtu); err != nil { + return context, err + } + if err := InterfaceUp(name1); err != nil { + return context, err + } + log.Printf("setting %s inside %d namespace", name2, nspid) + if err := SetInterfaceInNamespacePid(name2, nspid); err != nil { + return context, err + } + return context, nil +} + +func (v *Veth) Initialize(config *libcontainer.Network, context libcontainer.Context) error { + var ( + vethChild string + exists bool + ) + if vethChild, exists = context["vethChild"]; !exists { + return fmt.Errorf("vethChild does not exist in network context") + } + if err := InterfaceDown(vethChild); err != nil { + return fmt.Errorf("interface down %s %s", vethChild, err) + } + if err := ChangeInterfaceName(vethChild, "eth0"); err != nil { + return fmt.Errorf("change %s to eth0 %s", vethChild, err) + } + if err := SetInterfaceIp("eth0", config.Address); err != nil { + return fmt.Errorf("set eth0 ip %s", err) + } + if err := SetMtu("eth0", config.Mtu); err != nil { + return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err) + } + if err := InterfaceUp("eth0"); err != nil { + return fmt.Errorf("eth0 up %s", err) + } + if err := SetMtu("lo", config.Mtu); err != nil { + return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err) + } + if err := InterfaceUp("lo"); err != nil { + return fmt.Errorf("lo up %s", err) + } + if config.Gateway != "" { + if err := SetDefaultGateway(config.Gateway); err != nil { + return fmt.Errorf("set gateway to %s %s", config.Gateway, err) + } + } + return nil +} + +// createVethPair will automatically generage two random names for +// the veth pair and ensure that they have been created +func createVethPair(prefix string) (name1 string, name2 string, err error) { + name1, err = utils.GenerateRandomName(prefix, 4) + if err != nil { + return + } + name2, err = utils.GenerateRandomName(prefix, 4) + if err != nil { + return + } + if err = CreateVethPair(name1, name2); err != nil { + return + } + return +} diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index b2eaa0bc65..6c4d7666a2 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -3,10 +3,10 @@ package nsinit import ( + "encoding/json" "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/network" - "github.com/dotcloud/docker/pkg/libcontainer/utils" "github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/term" "io" @@ -19,11 +19,11 @@ import ( // Exec performes setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, master *os.File, logFile string, args []string) (int, error) { +func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, + master *os.File, logFile string, args []string) (int, error) { var ( - console string - err error - + console string + err error inPipe io.WriteCloser outPipe, errPipe io.ReadCloser ) @@ -46,7 +46,7 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. command := CreateCommand(container, console, logFile, r.Fd(), args) if !container.Tty { - log.Printf("opening pipes on command") + log.Printf("opening std pipes") if inPipe, err = command.StdinPipe(); err != nil { return -1, err } @@ -78,15 +78,9 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. return -1, err } } - - if container.Network != nil { - log.Printf("creating veth pair") - vethPair, err := InitializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid) - if err != nil { - return -1, err - } - log.Printf("sending %s as veth pair name", vethPair) - SendVethName(w, vethPair) + if err := InitializeNetworking(container, command.Process.Pid, w); err != nil { + command.Process.Kill() + return -1, err } // Sync with child @@ -104,7 +98,7 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. command.Process.Kill() return -1, err } - defer term.RestoreTerminal(uintptr(syscall.Stdin), state) + defer term.RestoreTerminal(os.Stdin.Fd(), state) } else { log.Printf("starting copy for std pipes") go func() { @@ -125,39 +119,34 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } -// SendVethName writes the veth pair name to the child's stdin then closes the -// pipe so that the child stops waiting for more data -func SendVethName(pipe io.Writer, name string) { - fmt.Fprint(pipe, name) +func InitializeNetworking(container *libcontainer.Container, nspid int, pipe io.Writer) error { + if container.Network != nil { + log.Printf("creating host network configuration type %s", container.Network.Type) + strategy, err := network.GetStrategy(container.Network.Type) + if err != nil { + return err + } + networkContext, err := strategy.Create(container.Network, nspid) + if err != nil { + return err + } + log.Printf("sending %v as network context", networkContext) + if err := SendContext(pipe, networkContext); err != nil { + return err + } + } + return nil } -// initializeContainerVeth will create a veth pair and setup the host's -// side of the pair by setting the specified bridge as the master and bringing -// up the interface. -// -// Then will with set the other side of the veth pair into the container's namespaced -// using the pid and returns the veth's interface name to provide to the container to -// finish setting up the interface inside the namespace -func InitializeContainerVeth(bridge string, mtu, nspid int) (string, error) { - name1, name2, err := createVethPair() +// SendContext writes the veth pair name to the child's stdin then closes the +// pipe so that the child stops waiting for more data +func SendContext(pipe io.Writer, context libcontainer.Context) error { + data, err := json.Marshal(context) if err != nil { - return "", err + return err } - log.Printf("veth pair created %s <> %s", name1, name2) - if err := network.SetInterfaceMaster(name1, bridge); err != nil { - return "", err - } - if err := network.SetMtu(name1, mtu); err != nil { - return "", err - } - if err := network.InterfaceUp(name1); err != nil { - return "", err - } - log.Printf("setting %s inside %d namespace", name2, nspid) - if err := network.SetInterfaceInNamespacePid(name2, nspid); err != nil { - return "", err - } - return name2, nil + pipe.Write(data) + return nil } // SetupWindow gets the parent window size and sets the master @@ -190,29 +179,13 @@ func CreateMasterAndConsole() (*os.File, string, error) { return master, console, nil } -// createVethPair will automatically generage two random names for -// the veth pair and ensure that they have been created -func createVethPair() (name1 string, name2 string, err error) { - name1, err = utils.GenerateRandomName("dock", 4) - if err != nil { - return - } - name2, err = utils.GenerateRandomName("dock", 4) - if err != nil { - return - } - if err = network.CreateVethPair(name1, name2); err != nil { - return - } - return -} - // writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container func writePidFile(command *exec.Cmd) error { return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(command.Process.Pid)), 0655) } func deletePidFile() error { + log.Printf("removing .nspid file") return os.Remove(".nspid") } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 04716ba645..f530d4a52a 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -3,6 +3,7 @@ package nsinit import ( + "encoding/json" "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/capabilities" @@ -27,13 +28,10 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe log.Printf("initializing namespace at %s", rootfs) // We always read this as it is a way to sync with the parent as well - tempVethName, err := getVethName(pipe) + context, err := GetContextFromParent(pipe) if err != nil { return err } - if tempVethName != "" { - log.Printf("received veth name %s", tempVethName) - } if console != "" { log.Printf("setting up console for %s", console) // close pipes so that we can replace it with the pty @@ -62,7 +60,7 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { return fmt.Errorf("setup mount namespace %s", err) } - if err := setupVethNetwork(container.Network, tempVethName); err != nil { + if err := setupNetwork(container.Network, context); err != nil { return fmt.Errorf("setup networking %s", err) } if err := system.Sethostname(container.Hostname); err != nil { @@ -145,46 +143,29 @@ func openTerminal(name string, flag int) (*os.File, error) { // setupVethNetwork uses the Network config if it is not nil to initialize // the new veth interface inside the container for use by changing the name to eth0 // setting the MTU and IP address along with the default gateway -func setupVethNetwork(config *libcontainer.Network, tempVethName string) error { +func setupNetwork(config *libcontainer.Network, context libcontainer.Context) error { if config != nil { - if err := network.InterfaceDown(tempVethName); err != nil { - return fmt.Errorf("interface down %s %s", tempVethName, err) - } - if err := network.ChangeInterfaceName(tempVethName, "eth0"); err != nil { - return fmt.Errorf("change %s to eth0 %s", tempVethName, err) - } - if err := network.SetInterfaceIp("eth0", config.Address); err != nil { - return fmt.Errorf("set eth0 ip %s", err) - } - if err := network.SetMtu("eth0", config.Mtu); err != nil { - return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err) - } - if err := network.InterfaceUp("eth0"); err != nil { - return fmt.Errorf("eth0 up %s", err) - } - if err := network.SetMtu("lo", config.Mtu); err != nil { - return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err) - } - if err := network.InterfaceUp("lo"); err != nil { - return fmt.Errorf("lo up %s", err) - } - if config.Gateway != "" { - if err := network.SetDefaultGateway(config.Gateway); err != nil { - return fmt.Errorf("set gateway to %s %s", config.Gateway, err) - } + strategy, err := network.GetStrategy(config.Type) + if err != nil { + return err } + return strategy.Initialize(config, context) } return nil } -// getVethName reads from Stdin the temp veth name -// sent by the parent processes after the veth pair -// has been created and setup -func getVethName(pipe io.ReadCloser) (string, error) { +func GetContextFromParent(pipe io.ReadCloser) (libcontainer.Context, error) { defer pipe.Close() data, err := ioutil.ReadAll(pipe) if err != nil { - return "", fmt.Errorf("error reading from stdin %s", err) + return nil, fmt.Errorf("error reading from stdin %s", err) } - return string(data), nil + var context libcontainer.Context + if len(data) > 0 { + if err := json.Unmarshal(data, &context); err != nil { + return nil, err + } + log.Printf("received context %v", context) + } + return context, nil }