2013-02-21 05:47:09 +04:00
package docker
import (
2013-02-22 06:33:23 +04:00
"encoding/binary"
2013-02-25 22:45:23 +04:00
"errors"
2013-02-21 06:20:18 +04:00
"fmt"
2013-10-05 06:25:15 +04:00
"github.com/dotcloud/docker/iptables"
2013-09-19 12:57:11 +04:00
"github.com/dotcloud/docker/netlink"
2013-10-19 01:15:24 +04:00
"github.com/dotcloud/docker/proxy"
2013-05-15 02:37:35 +04:00
"github.com/dotcloud/docker/utils"
2013-02-28 23:50:02 +04:00
"log"
2013-02-21 05:47:09 +04:00
"net"
2013-02-28 23:50:02 +04:00
"strconv"
2013-04-05 09:56:12 +04:00
"sync"
2013-12-02 21:03:21 +04:00
"syscall"
"unsafe"
2013-02-21 05:47:09 +04:00
)
const (
2013-04-06 01:16:19 +04:00
DefaultNetworkBridge = "docker0"
2013-07-22 04:49:09 +04:00
DisableNetworkBridge = "none"
2013-12-20 03:16:54 +04:00
DefaultNetworkMtu = 1500
2013-04-04 16:33:28 +04:00
portRangeStart = 49153
portRangeEnd = 65535
2013-12-02 21:03:21 +04:00
siocBRADDBR = 0x89a0
2013-02-21 05:47:09 +04:00
)
2013-02-28 23:50:02 +04:00
// Calculates the first and last IP addresses in an IPNet
2013-02-22 06:33:23 +04:00
func networkRange ( network * net . IPNet ) ( net . IP , net . IP ) {
netIP := network . IP . To4 ( )
firstIP := netIP . Mask ( network . Mask )
lastIP := net . IPv4 ( 0 , 0 , 0 , 0 ) . To4 ( )
for i := 0 ; i < len ( lastIP ) ; i ++ {
lastIP [ i ] = netIP [ i ] | ^ network . Mask [ i ]
}
return firstIP , lastIP
}
2013-04-04 01:53:54 +04:00
// Detects overlap between one IPNet and another
func networkOverlaps ( netX * net . IPNet , netY * net . IPNet ) bool {
firstIP , _ := networkRange ( netX )
if netY . Contains ( firstIP ) {
return true
}
firstIP , _ = networkRange ( netY )
if netX . Contains ( firstIP ) {
return true
}
return false
}
2013-02-28 23:50:02 +04:00
// Converts a 4 bytes IP into a 32 bit integer
2013-03-31 02:32:10 +04:00
func ipToInt ( ip net . IP ) int32 {
return int32 ( binary . BigEndian . Uint32 ( ip . To4 ( ) ) )
2013-02-22 06:33:23 +04:00
}
2013-02-28 23:50:02 +04:00
// Converts 32 bit integer into a 4 bytes IP address
2013-06-04 22:00:22 +04:00
func intToIP ( n int32 ) net . IP {
2013-03-31 02:32:10 +04:00
b := make ( [ ] byte , 4 )
binary . BigEndian . PutUint32 ( b , uint32 ( n ) )
return net . IP ( b )
2013-02-22 06:33:23 +04:00
}
2013-02-28 23:50:02 +04:00
// Given a netmask, calculates the number of available hosts
2013-03-31 02:32:10 +04:00
func networkSize ( mask net . IPMask ) int32 {
2013-02-26 02:06:22 +04:00
m := net . IPv4Mask ( 0 , 0 , 0 , 0 )
2013-02-22 06:33:23 +04:00
for i := 0 ; i < net . IPv4len ; i ++ {
2013-02-26 02:06:22 +04:00
m [ i ] = ^ mask [ i ]
2013-02-22 06:33:23 +04:00
}
2013-03-31 02:32:10 +04:00
return int32 ( binary . BigEndian . Uint32 ( m ) ) + 1
2013-02-22 06:33:23 +04:00
}
2013-09-19 12:57:11 +04:00
func checkRouteOverlaps ( networks [ ] * net . IPNet , dockerNetwork * net . IPNet ) error {
for _ , network := range networks {
if networkOverlaps ( dockerNetwork , network ) {
return fmt . Errorf ( "Network %s is already routed: '%s'" , dockerNetwork , network )
2013-04-04 02:57:57 +04:00
}
}
return nil
}
2013-08-18 07:49:33 +04:00
func checkNameserverOverlaps ( nameservers [ ] string , dockerNetwork * net . IPNet ) error {
if len ( nameservers ) > 0 {
for _ , ns := range nameservers {
_ , nsNetwork , err := net . ParseCIDR ( ns )
if err != nil {
return err
}
if networkOverlaps ( dockerNetwork , nsNetwork ) {
return fmt . Errorf ( "%s overlaps nameserver %s" , dockerNetwork , nsNetwork )
}
}
}
return nil
}
2013-07-22 23:06:24 +04:00
// CreateBridgeIface creates a network bridge interface on the host system with the name `ifaceName`,
// and attempts to configure it with an address which doesn't conflict with any other interface on the host.
// If it can't find an address which doesn't conflict, it will return an error.
2013-10-05 06:25:15 +04:00
func CreateBridgeIface ( config * DaemonConfig ) error {
2013-07-22 23:06:24 +04:00
addrs := [ ] string {
// Here we don't follow the convention of using the 1st IP of the range for the gateway.
// This is to use the same gateway IPs as the /24 ranges, which predate the /16 ranges.
// In theory this shouldn't matter - in practice there's bound to be a few scripts relying
// on the internal addressing or other stupid things like that.
// The shouldn't, but hey, let's not break them unless we really have to.
2013-08-07 04:24:10 +04:00
"172.17.42.1/16" , // Don't use 172.16.0.0/16, it conflicts with EC2 DNS 172.16.0.23
"10.0.42.1/16" , // Don't even try using the entire /8, that's too intrusive
2013-07-22 23:06:24 +04:00
"10.1.42.1/16" ,
"10.42.42.1/16" ,
"172.16.42.1/24" ,
"172.16.43.1/24" ,
"172.16.44.1/24" ,
"10.0.42.1/24" ,
"10.0.43.1/24" ,
"192.168.42.1/24" ,
"192.168.43.1/24" ,
"192.168.44.1/24" ,
}
2013-04-04 02:57:57 +04:00
2013-08-18 07:49:33 +04:00
nameservers := [ ] string { }
resolvConf , _ := utils . GetResolvConf ( )
// we don't check for an error here, because we don't really care
// if we can't read /etc/resolv.conf. So instead we skip the append
// if resolvConf is nil. It either doesn't exist, or we can't read it
// for some reason.
if resolvConf != nil {
nameservers = append ( nameservers , utils . GetNameserversAsCIDR ( resolvConf ) ... )
}
2013-04-04 02:57:57 +04:00
var ifaceAddr string
2013-12-13 19:47:19 +04:00
if len ( config . BridgeIp ) != 0 {
_ , _ , err := net . ParseCIDR ( config . BridgeIp )
2013-04-04 02:57:57 +04:00
if err != nil {
return err
}
2013-12-13 19:47:19 +04:00
ifaceAddr = config . BridgeIp
} else {
for _ , addr := range addrs {
_ , dockerNetwork , err := net . ParseCIDR ( addr )
if err != nil {
return err
}
routes , err := netlink . NetworkGetRoutes ( )
if err != nil {
return err
}
if err := checkRouteOverlaps ( routes , dockerNetwork ) ; err == nil {
if err := checkNameserverOverlaps ( nameservers , dockerNetwork ) ; err == nil {
ifaceAddr = addr
break
}
} else {
utils . Debugf ( "%s: %s" , addr , err )
2013-08-18 07:49:33 +04:00
}
2013-04-04 02:57:57 +04:00
}
}
if ifaceAddr == "" {
2013-10-05 06:25:15 +04:00
return fmt . Errorf ( "Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'" , config . BridgeIface , config . BridgeIface )
2013-04-04 02:57:57 +04:00
}
2013-10-05 06:25:15 +04:00
utils . Debugf ( "Creating bridge %s with network %s" , config . BridgeIface , ifaceAddr )
2013-04-04 02:57:57 +04:00
2013-12-02 21:03:21 +04:00
if err := createBridgeIface ( config . BridgeIface ) ; err != nil {
return err
2013-04-04 02:57:57 +04:00
}
2013-09-19 12:57:11 +04:00
iface , err := net . InterfaceByName ( config . BridgeIface )
if err != nil {
return err
}
ipAddr , ipNet , err := net . ParseCIDR ( ifaceAddr )
if err != nil {
return err
}
if netlink . NetworkLinkAddIp ( iface , ipAddr , ipNet ) ; err != nil {
return fmt . Errorf ( "Unable to add private network: %s" , err )
2013-04-04 02:57:57 +04:00
}
2013-09-19 12:57:11 +04:00
if err := netlink . NetworkLinkUp ( iface ) ; err != nil {
return fmt . Errorf ( "Unable to start network bridge: %s" , err )
2013-04-04 02:57:57 +04:00
}
2013-10-11 00:48:22 +04:00
2013-04-04 02:57:57 +04:00
return nil
}
2013-12-02 21:03:21 +04:00
// Create the actual bridge device. This is more backward-compatible than
// netlink.NetworkLinkAdd and works on RHEL 6.
func createBridgeIface ( name string ) error {
s , err := syscall . Socket ( syscall . AF_INET6 , syscall . SOCK_STREAM , syscall . IPPROTO_IP )
if err != nil {
2013-12-13 22:39:49 +04:00
utils . Debugf ( "Bridge socket creation failed IPv6 probably not enabled: %v" , err )
s , err = syscall . Socket ( syscall . AF_INET , syscall . SOCK_STREAM , syscall . IPPROTO_IP )
if err != nil {
return fmt . Errorf ( "Error creating bridge creation socket: %s" , err )
}
2013-12-02 21:03:21 +04:00
}
defer syscall . Close ( s )
nameBytePtr , err := syscall . BytePtrFromString ( name )
if err != nil {
return fmt . Errorf ( "Error converting bridge name %s to byte array: %s" , name , err )
}
if _ , _ , err := syscall . Syscall ( syscall . SYS_IOCTL , uintptr ( s ) , siocBRADDBR , uintptr ( unsafe . Pointer ( nameBytePtr ) ) ) ; err != 0 {
return fmt . Errorf ( "Error creating bridge: %s" , err )
}
return nil
}
2013-02-28 23:50:02 +04:00
// Return the IPv4 address of a network interface
2013-02-26 02:06:22 +04:00
func getIfaceAddr ( name string ) ( net . Addr , error ) {
2013-02-21 06:20:18 +04:00
iface , err := net . InterfaceByName ( name )
if err != nil {
return nil , err
}
addrs , err := iface . Addrs ( )
if err != nil {
return nil , err
}
var addrs4 [ ] net . Addr
for _ , addr := range addrs {
ip := ( addr . ( * net . IPNet ) ) . IP
if ip4 := ip . To4 ( ) ; len ( ip4 ) == net . IPv4len {
addrs4 = append ( addrs4 , addr )
}
}
switch {
case len ( addrs4 ) == 0 :
2013-02-28 23:50:02 +04:00
return nil , fmt . Errorf ( "Interface %v has no IP addresses" , name )
2013-02-21 06:20:18 +04:00
case len ( addrs4 ) > 1 :
2013-03-21 20:19:22 +04:00
fmt . Printf ( "Interface %v has more than 1 IPv4 address. Defaulting to using %v\n" ,
name , ( addrs4 [ 0 ] . ( * net . IPNet ) ) . IP )
2013-02-21 06:20:18 +04:00
}
return addrs4 [ 0 ] , nil
}
2013-02-28 23:50:02 +04:00
// Port mapper takes care of mapping external ports to containers by setting
// up iptables rules.
// It keeps track of all mappings and is able to unmap at will
type PortMapper struct {
2013-11-21 20:26:07 +04:00
tcpMapping map [ string ] * net . TCPAddr
tcpProxies map [ string ] proxy . Proxy
udpMapping map [ string ] * net . UDPAddr
udpProxies map [ string ] proxy . Proxy
2013-02-28 23:50:02 +04:00
2013-11-21 20:26:07 +04:00
iptables * iptables . Chain
defaultIp net . IP
proxyFactoryFunc func ( net . Addr , net . Addr ) ( proxy . Proxy , error )
2013-02-28 23:50:02 +04:00
}
2013-10-05 06:25:15 +04:00
func ( mapper * PortMapper ) Map ( ip net . IP , port int , backendAddr net . Addr ) error {
2013-11-21 20:26:07 +04:00
mapKey := ( & net . TCPAddr { Port : port , IP : ip } ) . String ( )
if _ , exists := mapper . tcpProxies [ mapKey ] ; exists {
return fmt . Errorf ( "Port %s is already in use" , mapKey )
}
2013-06-12 02:46:23 +04:00
if _ , isTCP := backendAddr . ( * net . TCPAddr ) ; isTCP {
backendPort := backendAddr . ( * net . TCPAddr ) . Port
backendIP := backendAddr . ( * net . TCPAddr ) . IP
2013-10-05 06:25:15 +04:00
if mapper . iptables != nil {
if err := mapper . iptables . Forward ( iptables . Add , ip , port , "tcp" , backendIP . String ( ) , backendPort ) ; err != nil {
return err
}
2013-06-12 02:46:23 +04:00
}
2013-11-21 20:26:07 +04:00
mapper . tcpMapping [ mapKey ] = backendAddr . ( * net . TCPAddr )
proxy , err := mapper . proxyFactoryFunc ( & net . TCPAddr { IP : ip , Port : port } , backendAddr )
2013-04-20 06:35:44 +04:00
if err != nil {
2013-10-05 06:25:15 +04:00
mapper . Unmap ( ip , port , "tcp" )
2013-04-20 06:35:44 +04:00
return err
}
2013-11-21 20:26:07 +04:00
mapper . tcpProxies [ mapKey ] = proxy
2013-06-12 02:46:23 +04:00
go proxy . Run ( )
} else {
backendPort := backendAddr . ( * net . UDPAddr ) . Port
backendIP := backendAddr . ( * net . UDPAddr ) . IP
2013-10-05 06:25:15 +04:00
if mapper . iptables != nil {
if err := mapper . iptables . Forward ( iptables . Add , ip , port , "udp" , backendIP . String ( ) , backendPort ) ; err != nil {
return err
}
2013-06-12 02:46:23 +04:00
}
2013-11-21 20:26:07 +04:00
mapper . udpMapping [ mapKey ] = backendAddr . ( * net . UDPAddr )
proxy , err := mapper . proxyFactoryFunc ( & net . UDPAddr { IP : ip , Port : port } , backendAddr )
2013-04-20 06:35:44 +04:00
if err != nil {
2013-10-05 06:25:15 +04:00
mapper . Unmap ( ip , port , "udp" )
2013-06-12 02:46:23 +04:00
return err
2013-04-20 06:35:44 +04:00
}
2013-11-21 20:26:07 +04:00
mapper . udpProxies [ mapKey ] = proxy
2013-06-12 02:46:23 +04:00
go proxy . Run ( )
2013-04-20 06:35:44 +04:00
}
2013-06-12 02:46:23 +04:00
return nil
2013-04-20 06:35:44 +04:00
}
2013-10-05 06:25:15 +04:00
func ( mapper * PortMapper ) Unmap ( ip net . IP , port int , proto string ) error {
2013-11-21 20:26:07 +04:00
mapKey := ( & net . TCPAddr { Port : port , IP : ip } ) . String ( )
2013-06-12 02:46:23 +04:00
if proto == "tcp" {
2013-11-21 20:26:07 +04:00
backendAddr , ok := mapper . tcpMapping [ mapKey ]
2013-06-12 02:46:23 +04:00
if ! ok {
2013-11-21 20:26:07 +04:00
return fmt . Errorf ( "Port tcp/%s is not mapped" , mapKey )
2013-06-12 02:46:23 +04:00
}
2013-11-21 20:26:07 +04:00
if proxy , exists := mapper . tcpProxies [ mapKey ] ; exists {
2013-06-12 02:46:23 +04:00
proxy . Close ( )
2013-11-21 20:26:07 +04:00
delete ( mapper . tcpProxies , mapKey )
2013-06-12 02:46:23 +04:00
}
2013-10-05 06:25:15 +04:00
if mapper . iptables != nil {
if err := mapper . iptables . Forward ( iptables . Delete , ip , port , proto , backendAddr . IP . String ( ) , backendAddr . Port ) ; err != nil {
return err
}
2013-06-12 02:46:23 +04:00
}
2013-11-21 20:26:07 +04:00
delete ( mapper . tcpMapping , mapKey )
2013-06-12 02:46:23 +04:00
} else {
2013-11-21 20:26:07 +04:00
backendAddr , ok := mapper . udpMapping [ mapKey ]
2013-06-12 02:46:23 +04:00
if ! ok {
2013-11-21 20:26:07 +04:00
return fmt . Errorf ( "Port udp/%s is not mapped" , mapKey )
2013-06-12 02:46:23 +04:00
}
2013-11-21 20:26:07 +04:00
if proxy , exists := mapper . udpProxies [ mapKey ] ; exists {
2013-06-12 02:46:23 +04:00
proxy . Close ( )
2013-11-21 20:26:07 +04:00
delete ( mapper . udpProxies , mapKey )
2013-06-12 02:46:23 +04:00
}
2013-10-05 06:25:15 +04:00
if mapper . iptables != nil {
if err := mapper . iptables . Forward ( iptables . Delete , ip , port , proto , backendAddr . IP . String ( ) , backendAddr . Port ) ; err != nil {
return err
}
2013-06-12 02:46:23 +04:00
}
2013-11-21 20:26:07 +04:00
delete ( mapper . udpMapping , mapKey )
2013-02-22 06:33:23 +04:00
}
2013-02-28 23:50:02 +04:00
return nil
}
2013-10-05 06:25:15 +04:00
func newPortMapper ( config * DaemonConfig ) ( * PortMapper , error ) {
// We can always try removing the iptables
if err := iptables . RemoveExistingChain ( "DOCKER" ) ; err != nil {
2013-02-26 02:06:22 +04:00
return nil , err
2013-02-21 05:47:09 +04:00
}
2013-10-05 06:25:15 +04:00
var chain * iptables . Chain
if config . EnableIptables {
var err error
chain , err = iptables . NewChain ( "DOCKER" , config . BridgeIface )
if err != nil {
return nil , fmt . Errorf ( "Failed to create DOCKER chain: %s" , err )
}
}
mapper := & PortMapper {
2013-11-21 20:26:07 +04:00
tcpMapping : make ( map [ string ] * net . TCPAddr ) ,
tcpProxies : make ( map [ string ] proxy . Proxy ) ,
udpMapping : make ( map [ string ] * net . UDPAddr ) ,
udpProxies : make ( map [ string ] proxy . Proxy ) ,
iptables : chain ,
defaultIp : config . DefaultIp ,
proxyFactoryFunc : proxy . NewProxy ,
2013-02-28 23:50:02 +04:00
}
return mapper , nil
2013-02-21 05:47:09 +04:00
}
2013-02-25 22:45:23 +04:00
2013-08-12 21:53:06 +04:00
// Port allocator: Automatically allocate and release networking ports
2013-02-28 23:50:02 +04:00
type PortAllocator struct {
2013-07-03 02:46:32 +04:00
sync . Mutex
2013-11-21 20:26:07 +04:00
inUse map [ string ] struct { }
2013-10-09 02:42:02 +04:00
fountain chan int
quit chan bool
2013-02-25 22:45:23 +04:00
}
2013-04-05 09:56:12 +04:00
func ( alloc * PortAllocator ) runFountain ( ) {
for {
for port := portRangeStart ; port < portRangeEnd ; port ++ {
2013-10-09 02:42:02 +04:00
select {
case alloc . fountain <- port :
case quit := <- alloc . quit :
if quit {
return
}
}
2013-04-05 09:56:12 +04:00
}
2013-02-25 22:45:23 +04:00
}
}
2013-04-05 09:56:12 +04:00
// FIXME: Release can no longer fail, change its prototype to reflect that.
2013-11-21 20:26:07 +04:00
func ( alloc * PortAllocator ) Release ( addr net . IP , port int ) error {
mapKey := ( & net . TCPAddr { Port : port , IP : addr } ) . String ( )
2013-05-15 02:37:35 +04:00
utils . Debugf ( "Releasing %d" , port )
2013-07-03 02:46:32 +04:00
alloc . Lock ( )
2013-11-21 20:26:07 +04:00
delete ( alloc . inUse , mapKey )
2013-07-03 02:46:32 +04:00
alloc . Unlock ( )
2013-04-05 09:56:12 +04:00
return nil
}
2013-11-21 20:26:07 +04:00
func ( alloc * PortAllocator ) Acquire ( addr net . IP , port int ) ( int , error ) {
mapKey := ( & net . TCPAddr { Port : port , IP : addr } ) . String ( )
utils . Debugf ( "Acquiring %s" , mapKey )
2013-04-05 09:56:12 +04:00
if port == 0 {
// Allocate a port from the fountain
for port := range alloc . fountain {
2013-11-21 20:26:07 +04:00
if _ , err := alloc . Acquire ( addr , port ) ; err == nil {
2013-04-05 09:56:12 +04:00
return port , nil
}
}
return - 1 , fmt . Errorf ( "Port generator ended unexpectedly" )
2013-02-25 22:45:23 +04:00
}
2013-07-03 02:46:32 +04:00
alloc . Lock ( )
defer alloc . Unlock ( )
2013-11-21 20:26:07 +04:00
if _ , inUse := alloc . inUse [ mapKey ] ; inUse {
2013-04-05 09:56:12 +04:00
return - 1 , fmt . Errorf ( "Port already in use: %d" , port )
}
2013-11-21 20:26:07 +04:00
alloc . inUse [ mapKey ] = struct { } { }
2013-04-05 09:56:12 +04:00
return port , nil
2013-02-25 22:45:23 +04:00
}
2013-10-09 02:42:02 +04:00
func ( alloc * PortAllocator ) Close ( ) error {
alloc . quit <- true
close ( alloc . quit )
close ( alloc . fountain )
return nil
}
2013-04-05 09:56:12 +04:00
func newPortAllocator ( ) ( * PortAllocator , error ) {
allocator := & PortAllocator {
2013-11-21 20:26:07 +04:00
inUse : make ( map [ string ] struct { } ) ,
2013-04-06 00:03:04 +04:00
fountain : make ( chan int ) ,
2013-10-09 02:42:02 +04:00
quit : make ( chan bool ) ,
2013-04-05 09:56:12 +04:00
}
go allocator . runFountain ( )
2013-02-28 23:50:02 +04:00
return allocator , nil
}
2013-08-12 21:53:06 +04:00
// IP allocator: Automatically allocate and release networking ports
2013-02-28 23:50:02 +04:00
type IPAllocator struct {
2013-03-31 02:32:10 +04:00
network * net . IPNet
queueAlloc chan allocatedIP
queueReleased chan net . IP
inUse map [ int32 ] struct { }
2013-10-09 02:42:02 +04:00
quit chan bool
2013-03-31 02:32:10 +04:00
}
type allocatedIP struct {
ip net . IP
err error
2013-02-28 23:50:02 +04:00
}
2013-03-31 02:32:10 +04:00
func ( alloc * IPAllocator ) run ( ) {
2013-02-28 23:50:02 +04:00
firstIP , _ := networkRange ( alloc . network )
2013-03-31 02:32:10 +04:00
ipNum := ipToInt ( firstIP )
ownIP := ipToInt ( alloc . network . IP )
size := networkSize ( alloc . network . Mask )
pos := int32 ( 1 )
max := size - 2 // -1 for the broadcast address, -1 for the gateway address
for {
var (
newNum int32
inUse bool
)
// Find first unused IP, give up after one whole round
for attempt := int32 ( 0 ) ; attempt < max ; attempt ++ {
newNum = ipNum + pos
pos = pos % max + 1
// The network's IP is never okay to use
if newNum == ownIP {
continue
}
if _ , inUse = alloc . inUse [ newNum ] ; ! inUse {
// We found an unused IP
break
}
2013-02-25 22:45:23 +04:00
}
2013-03-31 02:32:10 +04:00
2013-06-04 22:00:22 +04:00
ip := allocatedIP { ip : intToIP ( newNum ) }
2013-03-31 02:32:10 +04:00
if inUse {
ip . err = errors . New ( "No unallocated IP available" )
2013-02-25 22:45:23 +04:00
}
2013-03-31 02:32:10 +04:00
select {
2013-10-09 02:42:02 +04:00
case quit := <- alloc . quit :
if quit {
return
}
2013-03-31 02:32:10 +04:00
case alloc . queueAlloc <- ip :
alloc . inUse [ newNum ] = struct { } { }
case released := <- alloc . queueReleased :
r := ipToInt ( released )
delete ( alloc . inUse , r )
if inUse {
// If we couldn't allocate a new IP, the released one
// will be the only free one now, so instantly use it
// next time
pos = r - ipNum
} else {
// Use same IP as last time
if pos == 1 {
pos = max
} else {
pos --
}
}
2013-02-25 22:45:23 +04:00
}
2013-02-28 23:50:02 +04:00
}
}
func ( alloc * IPAllocator ) Acquire ( ) ( net . IP , error ) {
2013-03-31 02:32:10 +04:00
ip := <- alloc . queueAlloc
return ip . ip , ip . err
2013-02-28 23:50:02 +04:00
}
2013-03-31 02:32:10 +04:00
func ( alloc * IPAllocator ) Release ( ip net . IP ) {
alloc . queueReleased <- ip
2013-02-25 22:45:23 +04:00
}
2013-10-09 02:42:02 +04:00
func ( alloc * IPAllocator ) Close ( ) error {
alloc . quit <- true
close ( alloc . quit )
close ( alloc . queueAlloc )
close ( alloc . queueReleased )
return nil
}
2013-03-31 02:32:10 +04:00
func newIPAllocator ( network * net . IPNet ) * IPAllocator {
2013-02-28 23:50:02 +04:00
alloc := & IPAllocator {
2013-03-31 02:32:10 +04:00
network : network ,
queueAlloc : make ( chan allocatedIP ) ,
queueReleased : make ( chan net . IP ) ,
inUse : make ( map [ int32 ] struct { } ) ,
2013-10-09 02:42:02 +04:00
quit : make ( chan bool ) ,
2013-02-28 23:50:02 +04:00
}
2013-03-31 02:32:10 +04:00
go alloc . run ( )
return alloc
2013-02-28 23:50:02 +04:00
}
// Network interface represents the networking stack of a container
type NetworkInterface struct {
IPNet net . IPNet
Gateway net . IP
manager * NetworkManager
2013-06-12 02:46:23 +04:00
extPorts [ ] * Nat
2013-07-22 04:49:09 +04:00
disabled bool
2013-02-28 23:50:02 +04:00
}
2013-10-05 06:25:15 +04:00
// Allocate an external port and map it to the interface
func ( iface * NetworkInterface ) AllocatePort ( port Port , binding PortBinding ) ( * Nat , error ) {
2013-07-22 04:49:09 +04:00
if iface . disabled {
return nil , fmt . Errorf ( "Trying to allocate port for interface %v, which is disabled" , iface ) // FIXME
}
2013-10-05 06:25:15 +04:00
ip := iface . manager . portMapper . defaultIp
if binding . HostIp != "" {
ip = net . ParseIP ( binding . HostIp )
} else {
binding . HostIp = ip . String ( )
}
nat := & Nat {
Port : port ,
Binding : binding ,
}
containerPort , err := parsePort ( port . Port ( ) )
2013-02-28 23:50:02 +04:00
if err != nil {
2013-04-05 09:58:01 +04:00
return nil , err
}
2013-06-12 02:46:23 +04:00
2013-10-05 06:25:15 +04:00
hostPort , _ := parsePort ( nat . Binding . HostPort )
if nat . Port . Proto ( ) == "tcp" {
2013-11-21 20:26:07 +04:00
extPort , err := iface . manager . tcpPortAllocator . Acquire ( ip , hostPort )
2013-06-12 02:46:23 +04:00
if err != nil {
return nil , err
}
2013-10-05 06:25:15 +04:00
backend := & net . TCPAddr { IP : iface . IPNet . IP , Port : containerPort }
if err := iface . manager . portMapper . Map ( ip , extPort , backend ) ; err != nil {
2013-11-21 20:26:07 +04:00
iface . manager . tcpPortAllocator . Release ( ip , extPort )
2013-06-12 02:46:23 +04:00
return nil , err
}
2013-10-05 06:25:15 +04:00
nat . Binding . HostPort = strconv . Itoa ( extPort )
2013-06-12 02:46:23 +04:00
} else {
2013-11-21 20:26:07 +04:00
extPort , err := iface . manager . udpPortAllocator . Acquire ( ip , hostPort )
2013-06-12 02:46:23 +04:00
if err != nil {
return nil , err
}
2013-10-05 06:25:15 +04:00
backend := & net . UDPAddr { IP : iface . IPNet . IP , Port : containerPort }
if err := iface . manager . portMapper . Map ( ip , extPort , backend ) ; err != nil {
2013-11-21 20:26:07 +04:00
iface . manager . udpPortAllocator . Release ( ip , extPort )
2013-06-12 02:46:23 +04:00
return nil , err
}
2013-10-05 06:25:15 +04:00
nat . Binding . HostPort = strconv . Itoa ( extPort )
2013-04-05 09:58:01 +04:00
}
2013-06-12 02:46:23 +04:00
iface . extPorts = append ( iface . extPorts , nat )
2013-04-05 09:58:01 +04:00
return nat , nil
}
type Nat struct {
2013-10-05 06:25:15 +04:00
Port Port
Binding PortBinding
2013-04-05 09:58:01 +04:00
}
2013-10-05 06:25:15 +04:00
func ( n * Nat ) String ( ) string {
2013-11-30 04:53:20 +04:00
return fmt . Sprintf ( "%s:%s:%s/%s" , n . Binding . HostIp , n . Binding . HostPort , n . Port . Port ( ) , n . Port . Proto ( ) )
2013-02-28 23:50:02 +04:00
}
// Release: Network cleanup - release all resources
2013-03-31 02:32:10 +04:00
func ( iface * NetworkInterface ) Release ( ) {
2013-07-22 04:49:09 +04:00
if iface . disabled {
return
}
2013-06-12 02:46:23 +04:00
for _ , nat := range iface . extPorts {
2013-10-05 06:25:15 +04:00
hostPort , err := parsePort ( nat . Binding . HostPort )
if err != nil {
log . Printf ( "Unable to get host port: %s" , err )
continue
}
ip := net . ParseIP ( nat . Binding . HostIp )
2013-11-21 20:26:07 +04:00
utils . Debugf ( "Unmaping %s/%s:%s" , nat . Port . Proto , ip . String ( ) , nat . Binding . HostPort )
2013-10-05 06:25:15 +04:00
if err := iface . manager . portMapper . Unmap ( ip , hostPort , nat . Port . Proto ( ) ) ; err != nil {
log . Printf ( "Unable to unmap port %s: %s" , nat , err )
2013-02-28 23:50:02 +04:00
}
2013-11-21 20:26:07 +04:00
2013-10-05 06:25:15 +04:00
if nat . Port . Proto ( ) == "tcp" {
2013-11-21 20:26:07 +04:00
if err := iface . manager . tcpPortAllocator . Release ( ip , hostPort ) ; err != nil {
2013-10-05 06:25:15 +04:00
log . Printf ( "Unable to release port %s" , nat )
2013-06-12 02:46:23 +04:00
}
2013-11-21 20:26:07 +04:00
} else if nat . Port . Proto ( ) == "udp" {
2013-12-21 05:30:21 +04:00
if err := iface . manager . udpPortAllocator . Release ( ip , hostPort ) ; err != nil {
2013-11-21 20:26:07 +04:00
log . Printf ( "Unable to release port %s: %s" , nat , err )
}
2013-02-28 23:50:02 +04:00
}
}
2013-03-31 02:32:10 +04:00
iface . manager . ipAllocator . Release ( iface . IPNet . IP )
2013-02-28 23:50:02 +04:00
}
// Network Manager manages a set of network interfaces
// Only *one* manager per host machine should be used
type NetworkManager struct {
bridgeIface string
bridgeNetwork * net . IPNet
2013-06-12 02:46:23 +04:00
ipAllocator * IPAllocator
tcpPortAllocator * PortAllocator
udpPortAllocator * PortAllocator
portMapper * PortMapper
2013-07-22 04:49:09 +04:00
disabled bool
2013-02-28 23:50:02 +04:00
}
// Allocate a network interface
func ( manager * NetworkManager ) Allocate ( ) ( * NetworkInterface , error ) {
2013-07-22 04:49:09 +04:00
if manager . disabled {
return & NetworkInterface { disabled : true } , nil
}
2013-08-21 18:37:58 +04:00
var ip net . IP
var err error
ip , err = manager . ipAllocator . Acquire ( )
2013-02-25 22:45:23 +04:00
if err != nil {
2013-02-26 02:06:22 +04:00
return nil , err
2013-02-25 22:45:23 +04:00
}
2013-08-30 04:49:11 +04:00
// avoid duplicate IP
2013-08-21 18:37:58 +04:00
ipNum := ipToInt ( ip )
firstIP := manager . ipAllocator . network . IP . To4 ( ) . Mask ( manager . ipAllocator . network . Mask )
firstIPNum := ipToInt ( firstIP ) + 1
if firstIPNum == ipNum {
ip , err = manager . ipAllocator . Acquire ( )
if err != nil {
return nil , err
}
}
2013-02-26 02:06:22 +04:00
iface := & NetworkInterface {
2013-03-20 17:02:25 +04:00
IPNet : net . IPNet { IP : ip , Mask : manager . bridgeNetwork . Mask } ,
2013-02-28 23:50:02 +04:00
Gateway : manager . bridgeNetwork . IP ,
manager : manager ,
2013-02-26 02:06:22 +04:00
}
return iface , nil
}
2013-10-09 02:42:02 +04:00
func ( manager * NetworkManager ) Close ( ) error {
2013-11-22 23:28:49 +04:00
if manager . disabled {
return nil
}
2013-10-09 02:42:02 +04:00
err1 := manager . tcpPortAllocator . Close ( )
err2 := manager . udpPortAllocator . Close ( )
err3 := manager . ipAllocator . Close ( )
if err1 != nil {
return err1
}
if err2 != nil {
return err2
}
return err3
}
2013-10-05 06:25:15 +04:00
func newNetworkManager ( config * DaemonConfig ) ( * NetworkManager , error ) {
if config . BridgeIface == DisableNetworkBridge {
2013-07-22 04:49:09 +04:00
manager := & NetworkManager {
disabled : true ,
}
return manager , nil
}
2013-10-05 06:25:15 +04:00
addr , err := getIfaceAddr ( config . BridgeIface )
2013-02-28 23:50:02 +04:00
if err != nil {
2013-04-04 02:57:57 +04:00
// If the iface is not found, try to create it
2013-10-05 06:25:15 +04:00
if err := CreateBridgeIface ( config ) ; err != nil {
2013-04-04 02:57:57 +04:00
return nil , err
}
2013-10-05 06:25:15 +04:00
addr , err = getIfaceAddr ( config . BridgeIface )
2013-04-04 02:57:57 +04:00
if err != nil {
return nil , err
}
2013-02-28 23:50:02 +04:00
}
network := addr . ( * net . IPNet )
2013-10-24 20:08:50 +04:00
// Configure iptables for link support
if config . EnableIptables {
2013-11-27 12:10:44 +04:00
// Enable NAT
natArgs := [ ] string { "POSTROUTING" , "-t" , "nat" , "-s" , addr . String ( ) , "!" , "-d" , addr . String ( ) , "-j" , "MASQUERADE" }
if ! iptables . Exists ( natArgs ... ) {
if output , err := iptables . Raw ( append ( [ ] string { "-A" } , natArgs ... ) ... ) ; err != nil {
return nil , fmt . Errorf ( "Unable to enable network bridge NAT: %s" , err )
} else if len ( output ) != 0 {
return nil , fmt . Errorf ( "Error iptables postrouting: %s" , output )
}
}
// Accept incoming packets for existing connections
existingArgs := [ ] string { "FORWARD" , "-o" , config . BridgeIface , "-m" , "conntrack" , "--ctstate" , "RELATED,ESTABLISHED" , "-j" , "ACCEPT" }
if ! iptables . Exists ( existingArgs ... ) {
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , existingArgs ... ) ... ) ; err != nil {
return nil , fmt . Errorf ( "Unable to allow incoming packets: %s" , err )
} else if len ( output ) != 0 {
return nil , fmt . Errorf ( "Error iptables allow incoming: %s" , output )
}
}
// Accept all non-intercontainer outgoing packets
outgoingArgs := [ ] string { "FORWARD" , "-i" , config . BridgeIface , "!" , "-o" , config . BridgeIface , "-j" , "ACCEPT" }
if ! iptables . Exists ( outgoingArgs ... ) {
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , outgoingArgs ... ) ... ) ; err != nil {
return nil , fmt . Errorf ( "Unable to allow outgoing packets: %s" , err )
} else if len ( output ) != 0 {
return nil , fmt . Errorf ( "Error iptables allow outgoing: %s" , output )
}
}
2013-11-02 03:29:25 +04:00
args := [ ] string { "FORWARD" , "-i" , config . BridgeIface , "-o" , config . BridgeIface , "-j" }
acceptArgs := append ( args , "ACCEPT" )
dropArgs := append ( args , "DROP" )
2013-10-24 20:08:50 +04:00
if ! config . InterContainerCommunication {
2013-11-02 03:29:25 +04:00
iptables . Raw ( append ( [ ] string { "-D" } , acceptArgs ... ) ... )
if ! iptables . Exists ( dropArgs ... ) {
2013-10-24 20:08:50 +04:00
utils . Debugf ( "Disable inter-container communication" )
2013-11-02 03:29:25 +04:00
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , dropArgs ... ) ... ) ; err != nil {
2013-10-24 20:08:50 +04:00
return nil , fmt . Errorf ( "Unable to prevent intercontainer communication: %s" , err )
2013-11-05 19:33:07 +04:00
} else if len ( output ) != 0 {
2013-11-02 03:29:25 +04:00
return nil , fmt . Errorf ( "Error disabling intercontainer communication: %s" , output )
2013-10-24 20:08:50 +04:00
}
}
} else {
2013-11-02 03:29:25 +04:00
iptables . Raw ( append ( [ ] string { "-D" } , dropArgs ... ) ... )
if ! iptables . Exists ( acceptArgs ... ) {
utils . Debugf ( "Enable inter-container communication" )
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , acceptArgs ... ) ... ) ; err != nil {
return nil , fmt . Errorf ( "Unable to allow intercontainer communication: %s" , err )
} else if len ( output ) != 0 {
return nil , fmt . Errorf ( "Error enabling intercontainer communication: %s" , output )
}
}
2013-10-24 20:08:50 +04:00
}
}
2013-03-31 02:32:10 +04:00
ipAllocator := newIPAllocator ( network )
2013-02-28 23:50:02 +04:00
2013-06-12 02:46:23 +04:00
tcpPortAllocator , err := newPortAllocator ( )
if err != nil {
return nil , err
}
2013-11-21 20:26:07 +04:00
2013-06-12 02:46:23 +04:00
udpPortAllocator , err := newPortAllocator ( )
2013-02-28 23:50:02 +04:00
if err != nil {
return nil , err
}
2013-10-05 06:25:15 +04:00
portMapper , err := newPortMapper ( config )
2013-03-23 08:43:31 +04:00
if err != nil {
return nil , err
}
2013-02-28 23:50:02 +04:00
manager := & NetworkManager {
2013-10-05 06:25:15 +04:00
bridgeIface : config . BridgeIface ,
2013-06-12 02:46:23 +04:00
bridgeNetwork : network ,
ipAllocator : ipAllocator ,
tcpPortAllocator : tcpPortAllocator ,
udpPortAllocator : udpPortAllocator ,
portMapper : portMapper ,
2013-02-28 23:50:02 +04:00
}
2013-10-09 02:42:02 +04:00
2013-02-28 23:50:02 +04:00
return manager , nil
2013-02-25 22:45:23 +04:00
}