From b7f6742b7f42bdced5f142f245eb977d3e13de9e Mon Sep 17 00:00:00 2001 From: tamilmani1989 Date: Fri, 4 Jan 2019 16:19:36 -0800 Subject: [PATCH] CNI to support transparent mode (#279) * added changes in azure cni to support transparent mode * cni for calico policy controller * removed unused parameter * minor fix * addressed review comments * addressed review comments * modified vethname generation and the hostbveth prefix * removed setting arp for default gw * minor fix --- cni/network/network.go | 6 +- netlink/ip.go | 11 ++ network/endpoint.go | 9 +- network/endpoint_linux.go | 26 +++- network/network.go | 7 +- network/network_linux.go | 3 +- network/transparent_endpointclient_linux.go | 153 ++++++++++++++++++++ 7 files changed, 201 insertions(+), 14 deletions(-) create mode 100644 network/transparent_endpointclient_linux.go diff --git a/cni/network/network.go b/cni/network/network.go index eb9c13472..0166acf22 100644 --- a/cni/network/network.go +++ b/cni/network/network.go @@ -167,6 +167,7 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error { result *cniTypesCurr.Result azIpamResult *cniTypesCurr.Result err error + vethName string nwCfg *cni.NetworkConfig epInfo *network.EndpointInfo iface *cniTypesCurr.Interface @@ -388,9 +389,9 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error { // Network already exists. subnetPrefix := nwInfo.Subnets[0].Prefix.String() log.Printf("[cni-net] Found network %v with subnet %v.", networkId, subnetPrefix) + nwCfg.Ipam.Subnet = subnetPrefix // Call into IPAM plugin to allocate an address for the endpoint. - nwCfg.Ipam.Subnet = subnetPrefix result, err = plugin.DelegateAdd(nwCfg.Ipam.Type, nwCfg) if err != nil { err = plugin.Errorf("Failed to allocate address: %v", err) @@ -398,7 +399,6 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error { } ipconfig := result.IPs[0] - iface := &cniTypesCurr.Interface{Name: args.IfName} result.Interfaces = append(result.Interfaces, iface) @@ -456,7 +456,7 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error { // A runtime must not call ADD twice (without a corresponding DEL) for the same // (network name, container id, name of the interface inside the container) - vethName := fmt.Sprintf("%s%s%s", networkId, k8sContainerID, k8sIfName) + vethName = fmt.Sprintf("%s%s%s", networkId, k8sContainerID, k8sIfName) setEndpointOptions(cnsNetworkConfig, epInfo, vethName) // Create the endpoint. diff --git a/netlink/ip.go b/netlink/ip.go index 2908dab87..1db1e58c4 100644 --- a/netlink/ip.go +++ b/netlink/ip.go @@ -11,6 +11,17 @@ import ( "golang.org/x/sys/unix" ) +const ( + RT_SCOPE_UNIVERSE = 0 + RT_SCOPE_SITE = 200 + RT_SCOPE_LINK = 253 + RT_SCOPE_HOST = 254 + RT_SCOPE_NOWHERE = 255 +) +const ( + RTPROT_KERNEL = 2 +) + // GetIpAddressFamily returns the address family of an IP address. func GetIpAddressFamily(ip net.IP) int { if len(ip) <= net.IPv4len { diff --git a/network/endpoint.go b/network/endpoint.go index 763f27a65..504f18674 100644 --- a/network/endpoint.go +++ b/network/endpoint.go @@ -64,9 +64,12 @@ type EndpointInfo struct { // RouteInfo contains information about an IP route. type RouteInfo struct { - Dst net.IPNet - Gw net.IP - DevName string + Dst net.IPNet + Src net.IP + Gw net.IP + Protocol int + DevName string + Scope int } // NewEndpoint creates a new endpoint in the network. diff --git a/network/endpoint_linux.go b/network/endpoint_linux.go index 0bfd5df96..a8e64e4da 100644 --- a/network/endpoint_linux.go +++ b/network/endpoint_linux.go @@ -81,14 +81,19 @@ func (nw *network) newEndpointImpl(epInfo *EndpointInfo) (*endpoint, error) { } if vlanid != 0 { + log.Printf("OVS client") epClient = NewOVSEndpointClient( nw.extIf, epInfo, hostIfName, contIfName, vlanid) - } else { + } else if nw.Mode != opModeTransparent { + log.Printf("Bridge client") epClient = NewLinuxBridgeEndpointClient(nw.extIf, hostIfName, contIfName, nw.Mode) + } else { + log.Printf("Transparent client") + epClient = NewTransparentEndpointClient(nw.extIf, hostIfName, contIfName, nw.Mode) } // Cleanup on failure. @@ -207,8 +212,10 @@ func (nw *network) deleteEndpointImpl(ep *endpoint) error { if ep.VlanID != 0 { epInfo := ep.getInfo() epClient = NewOVSEndpointClient(nw.extIf, epInfo, ep.HostIfName, "", ep.VlanID) - } else { + } else if nw.Mode != opModeTransparent { epClient = NewLinuxBridgeEndpointClient(nw.extIf, ep.HostIfName, "", nw.Mode) + } else { + epClient = NewTransparentEndpointClient(nw.extIf, ep.HostIfName, "", nw.Mode) } epClient.DeleteEndpointRules(ep) @@ -246,7 +253,7 @@ func addRoutes(interfaceName string, routes []RouteInfo) error { if !strings.Contains(strings.ToLower(err.Error()), "file exists") { return err } else { - log.Printf("route already exists") + log.Printf("[net] route already exists") } } } @@ -259,7 +266,7 @@ func deleteRoutes(interfaceName string, routes []RouteInfo) error { interfaceIf, _ := net.InterfaceByName(interfaceName) for _, route := range routes { - log.Printf("[ovs] Deleting IP route %+v from link %v.", route, interfaceName) + log.Printf("[net] Deleting IP route %+v from link %v.", route, interfaceName) if route.DevName != "" { devIf, _ := net.InterfaceByName(route.DevName) @@ -416,3 +423,14 @@ func updateRoutes(existingEp *EndpointInfo, targetEp *EndpointInfo) error { return nil } + +func getDefaultGateway(routes []RouteInfo) net.IP { + _, defDstIP, _ := net.ParseCIDR("0.0.0.0/0") + for _, route := range routes { + if route.Dst.String() == defDstIP.String() { + return route.Gw + } + } + + return nil +} diff --git a/network/network.go b/network/network.go index cbf177e47..e0a11fe56 100644 --- a/network/network.go +++ b/network/network.go @@ -14,9 +14,10 @@ import ( const ( // Operational modes. - opModeBridge = "bridge" - opModeTunnel = "tunnel" - opModeDefault = opModeTunnel + opModeBridge = "bridge" + opModeTunnel = "tunnel" + opModeTransparent = "transparent" + opModeDefault = opModeTunnel ) // ExternalInterface is a host network interface that bridges containers to external networks. diff --git a/network/network_linux.go b/network/network_linux.go index 25217d5b9..6da8aaa70 100644 --- a/network/network_linux.go +++ b/network/network_linux.go @@ -52,7 +52,8 @@ func (nm *networkManager) newNetworkImpl(nwInfo *NetworkInfo, extIf *externalInt if opt != nil && opt[VlanIDKey] != nil { vlanid, _ = strconv.Atoi(opt[VlanIDKey].(string)) } - + case opModeTransparent: + break default: return nil, errNetworkModeInvalid } diff --git a/network/transparent_endpointclient_linux.go b/network/transparent_endpointclient_linux.go new file mode 100644 index 000000000..e41c51d5f --- /dev/null +++ b/network/transparent_endpointclient_linux.go @@ -0,0 +1,153 @@ +package network + +import ( + "fmt" + "net" + + "github.com/Azure/azure-container-networking/log" + "github.com/Azure/azure-container-networking/netlink" + "github.com/Azure/azure-container-networking/network/epcommon" + "github.com/Azure/azure-container-networking/platform" +) + +const ( + FAKE_GW_IP = "169.254.1.1/32" + DEFAULT_GW = "0.0.0.0/0" +) + +type TransparentEndpointClient struct { + bridgeName string + hostPrimaryIfName string + hostVethName string + containerVethName string + hostPrimaryMac net.HardwareAddr + containerMac net.HardwareAddr + hostVethMac net.HardwareAddr + mode string +} + +func NewTransparentEndpointClient( + extIf *externalInterface, + hostVethName string, + containerVethName string, + mode string, +) *TransparentEndpointClient { + + client := &TransparentEndpointClient{ + bridgeName: extIf.BridgeName, + hostPrimaryIfName: extIf.Name, + hostVethName: hostVethName, + containerVethName: containerVethName, + hostPrimaryMac: extIf.MacAddress, + mode: mode, + } + + return client +} + +func setArpProxy(ifName string) error { + cmd := fmt.Sprintf("echo 1 > /proc/sys/net/ipv4/conf/%v/proxy_arp", ifName) + _, err := platform.ExecuteCommand(cmd) + return err +} + +func (client *TransparentEndpointClient) AddEndpoints(epInfo *EndpointInfo) error { + if err := epcommon.CreateEndpoint(client.hostVethName, client.containerVethName); err != nil { + return err + } + + containerIf, err := net.InterfaceByName(client.containerVethName) + if err != nil { + return err + } + + client.containerMac = containerIf.HardwareAddr + + hostVethIf, err := net.InterfaceByName(client.hostVethName) + if err != nil { + return err + } + + client.hostVethMac = hostVethIf.HardwareAddr + + return nil +} + +func (client *TransparentEndpointClient) AddEndpointRules(epInfo *EndpointInfo) error { + var routeInfoList []RouteInfo + + // ip route add dev + // This route is needed for incoming packets to pod to route via hostveth + for _, ipAddr := range epInfo.IPAddresses { + var routeInfo RouteInfo + ipNet := net.IPNet{IP: ipAddr.IP, Mask: net.CIDRMask(32, 32)} + log.Printf("[net] Adding route for the ip %v", ipNet.String()) + routeInfo.Dst = ipNet + routeInfoList = append(routeInfoList, routeInfo) + if err := addRoutes(client.hostVethName, routeInfoList); err != nil { + return err + } + } + + log.Printf("calling setArpProxy for %v", client.hostVethName) + if err := setArpProxy(client.hostVethName); err != nil { + log.Printf("setArpProxy failed with: %v", err) + return err + } + + return nil +} + +func (client *TransparentEndpointClient) DeleteEndpointRules(ep *endpoint) { + var routeInfoList []RouteInfo + + // ip route del dev + // Deleting the route set up for routing the incoming packets to pod + for _, ipAddr := range ep.IPAddresses { + var routeInfo RouteInfo + ipNet := net.IPNet{IP: ipAddr.IP, Mask: net.CIDRMask(32, 32)} + log.Printf("[net] Deleting route for the ip %v", ipNet.String()) + routeInfo.Dst = ipNet + routeInfoList = append(routeInfoList, routeInfo) + deleteRoutes(client.hostVethName, routeInfoList) + } +} + +func (client *TransparentEndpointClient) MoveEndpointsToContainerNS(epInfo *EndpointInfo, nsID uintptr) error { + // Move the container interface to container's network namespace. + log.Printf("[net] Setting link %v netns %v.", client.containerVethName, epInfo.NetNsPath) + if err := netlink.SetLinkNetNs(client.containerVethName, nsID); err != nil { + return err + } + + return nil +} + +func (client *TransparentEndpointClient) SetupContainerInterfaces(epInfo *EndpointInfo) error { + if err := epcommon.SetupContainerInterface(client.containerVethName, epInfo.IfName); err != nil { + return err + } + + client.containerVethName = epInfo.IfName + + return nil +} + +func (client *TransparentEndpointClient) ConfigureContainerInterfacesAndRoutes(epInfo *EndpointInfo) error { + if err := epcommon.AssignIPToInterface(client.containerVethName, epInfo.IPAddresses); err != nil { + return err + } + + return addRoutes(client.containerVethName, epInfo.Routes) +} + +func (client *TransparentEndpointClient) DeleteEndpoints(ep *endpoint) error { + log.Printf("[net] Deleting veth pair %v %v.", ep.HostIfName, ep.IfName) + err := netlink.DeleteLink(ep.HostIfName) + if err != nil { + log.Printf("[net] Failed to delete veth pair %v: %v.", ep.HostIfName, err) + return err + } + + return nil +}