534 строки
17 KiB
Go
534 строки
17 KiB
Go
// Copyright 2017 Microsoft. All rights reserved.
|
|
// MIT License
|
|
|
|
package network
|
|
|
|
import (
|
|
"crypto/sha1"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"net"
|
|
"strings"
|
|
|
|
"github.com/Azure/azure-container-networking/cns"
|
|
"github.com/Azure/azure-container-networking/netio"
|
|
"github.com/Azure/azure-container-networking/netlink"
|
|
"github.com/Azure/azure-container-networking/network/networkutils"
|
|
"github.com/Azure/azure-container-networking/ovsctl"
|
|
"github.com/Azure/azure-container-networking/platform"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
const (
|
|
// Common prefix for all types of host network interface names.
|
|
commonInterfacePrefix = "az"
|
|
|
|
// Prefix for host virtual network interface names.
|
|
hostVEthInterfacePrefix = commonInterfacePrefix + "v"
|
|
)
|
|
|
|
type AzureHNSEndpointClient interface{}
|
|
|
|
func generateVethName(key string) string {
|
|
h := sha1.New()
|
|
h.Write([]byte(key))
|
|
return hex.EncodeToString(h.Sum(nil))[:11]
|
|
}
|
|
|
|
func ConstructEndpointID(containerID string, _ string, ifName string) (string, string) {
|
|
if len(containerID) > 8 {
|
|
containerID = containerID[:8]
|
|
} else {
|
|
logger.Info("Container ID is not greater than 8 ID", zap.String("containerID", containerID))
|
|
return "", ""
|
|
}
|
|
|
|
infraEpName := containerID + "-" + ifName
|
|
|
|
return infraEpName, ""
|
|
}
|
|
|
|
// newEndpointImpl creates a new endpoint in the network.
|
|
func (nw *network) newEndpointImpl(
|
|
_ apipaClient,
|
|
nl netlink.NetlinkInterface,
|
|
plc platform.ExecClient,
|
|
netioCli netio.NetIOInterface,
|
|
testEpClient EndpointClient,
|
|
nsc NamespaceClientInterface,
|
|
iptc ipTablesClient,
|
|
epInfo []*EndpointInfo,
|
|
) (*endpoint, error) {
|
|
var (
|
|
err error
|
|
hostIfName string
|
|
contIfName string
|
|
localIP string
|
|
vlanid = 0
|
|
defaultEpInfo = epInfo[0]
|
|
containerIf *net.Interface
|
|
)
|
|
|
|
if nw.Endpoints[defaultEpInfo.Id] != nil {
|
|
logger.Info("[net] Endpoint already exists.")
|
|
err = errEndpointExists
|
|
return nil, err
|
|
}
|
|
|
|
if defaultEpInfo.Data != nil {
|
|
if _, ok := defaultEpInfo.Data[VlanIDKey]; ok {
|
|
vlanid = defaultEpInfo.Data[VlanIDKey].(int)
|
|
}
|
|
|
|
if _, ok := defaultEpInfo.Data[LocalIPKey]; ok {
|
|
localIP = defaultEpInfo.Data[LocalIPKey].(string)
|
|
}
|
|
}
|
|
|
|
if _, ok := defaultEpInfo.Data[OptVethName]; ok {
|
|
key := defaultEpInfo.Data[OptVethName].(string)
|
|
logger.Info("Generate veth name based on the key provided", zap.String("key", key))
|
|
vethname := generateVethName(key)
|
|
hostIfName = fmt.Sprintf("%s%s", hostVEthInterfacePrefix, vethname)
|
|
contIfName = fmt.Sprintf("%s%s2", hostVEthInterfacePrefix, vethname)
|
|
} else {
|
|
// Create a veth pair.
|
|
logger.Info("Generate veth name based on endpoint id")
|
|
hostIfName = fmt.Sprintf("%s%s", hostVEthInterfacePrefix, defaultEpInfo.Id[:7])
|
|
contIfName = fmt.Sprintf("%s%s-2", hostVEthInterfacePrefix, defaultEpInfo.Id[:7])
|
|
}
|
|
|
|
ep := &endpoint{
|
|
Id: defaultEpInfo.Id,
|
|
IfName: contIfName, // container veth pair name. In cnm, we won't rename this and docker expects veth name.
|
|
HostIfName: hostIfName,
|
|
InfraVnetIP: defaultEpInfo.InfraVnetIP,
|
|
LocalIP: localIP,
|
|
IPAddresses: defaultEpInfo.IPAddresses,
|
|
DNS: defaultEpInfo.DNS,
|
|
VlanID: vlanid,
|
|
EnableSnatOnHost: defaultEpInfo.EnableSnatOnHost,
|
|
EnableInfraVnet: defaultEpInfo.EnableInfraVnet,
|
|
EnableMultitenancy: defaultEpInfo.EnableMultiTenancy,
|
|
AllowInboundFromHostToNC: defaultEpInfo.AllowInboundFromHostToNC,
|
|
AllowInboundFromNCToHost: defaultEpInfo.AllowInboundFromNCToHost,
|
|
NetworkNameSpace: defaultEpInfo.NetNsPath,
|
|
ContainerID: defaultEpInfo.ContainerID,
|
|
PODName: defaultEpInfo.PODName,
|
|
PODNameSpace: defaultEpInfo.PODNameSpace,
|
|
Routes: defaultEpInfo.Routes,
|
|
SecondaryInterfaces: make(map[string]*InterfaceInfo),
|
|
}
|
|
if nw.extIf != nil {
|
|
ep.Gateways = []net.IP{nw.extIf.IPv4Gateway}
|
|
}
|
|
|
|
for _, epInfo := range epInfo {
|
|
// testEpClient is non-nil only when the endpoint is created for the unit test
|
|
// resetting epClient to testEpClient in loop to use the test endpoint client if specified
|
|
epClient := testEpClient
|
|
if epClient == nil {
|
|
//nolint:gocritic
|
|
if vlanid != 0 {
|
|
if nw.Mode == opModeTransparentVlan {
|
|
logger.Info("Transparent vlan client")
|
|
if _, ok := epInfo.Data[SnatBridgeIPKey]; ok {
|
|
nw.SnatBridgeIP = epInfo.Data[SnatBridgeIPKey].(string)
|
|
}
|
|
epClient = NewTransparentVlanEndpointClient(nw, epInfo, hostIfName, contIfName, vlanid, localIP, nl, plc, nsc, iptc)
|
|
} else {
|
|
logger.Info("OVS client")
|
|
if _, ok := epInfo.Data[SnatBridgeIPKey]; ok {
|
|
nw.SnatBridgeIP = epInfo.Data[SnatBridgeIPKey].(string)
|
|
}
|
|
|
|
epClient = NewOVSEndpointClient(
|
|
nw,
|
|
epInfo,
|
|
hostIfName,
|
|
contIfName,
|
|
vlanid,
|
|
localIP,
|
|
nl,
|
|
ovsctl.NewOvsctl(),
|
|
plc,
|
|
iptc)
|
|
}
|
|
} else if nw.Mode != opModeTransparent {
|
|
logger.Info("Bridge client")
|
|
epClient = NewLinuxBridgeEndpointClient(nw.extIf, hostIfName, contIfName, nw.Mode, nl, plc)
|
|
} else if epInfo.NICType == cns.DelegatedVMNIC {
|
|
logger.Info("Secondary client")
|
|
epClient = NewSecondaryEndpointClient(nl, netioCli, plc, nsc, ep)
|
|
} else {
|
|
logger.Info("Transparent client")
|
|
epClient = NewTransparentEndpointClient(nw.extIf, hostIfName, contIfName, nw.Mode, nl, netioCli, plc)
|
|
}
|
|
}
|
|
|
|
//nolint:gocritic
|
|
defer func(client EndpointClient, contIfName string) {
|
|
// Cleanup on failure.
|
|
if err != nil {
|
|
logger.Error("CNI error. Delete Endpoint and rules that are created", zap.Error(err), zap.String("contIfName", contIfName))
|
|
if containerIf != nil {
|
|
client.DeleteEndpointRules(ep)
|
|
}
|
|
// set deleteHostVeth to true to cleanup host veth interface if created
|
|
//nolint:errcheck // ignore error
|
|
client.DeleteEndpoints(ep)
|
|
}
|
|
}(epClient, contIfName)
|
|
|
|
// wrapping endpoint client commands in anonymous func so that namespace can be exit and closed before the next loop
|
|
//nolint:wrapcheck // ignore wrap check
|
|
err = func() error {
|
|
if epErr := epClient.AddEndpoints(epInfo); epErr != nil {
|
|
return epErr
|
|
}
|
|
|
|
if epInfo.NICType == cns.InfraNIC {
|
|
var epErr error
|
|
containerIf, epErr = netioCli.GetNetworkInterfaceByName(contIfName)
|
|
if epErr != nil {
|
|
return epErr
|
|
}
|
|
ep.MacAddress = containerIf.HardwareAddr
|
|
}
|
|
|
|
// Setup rules for IP addresses on the container interface.
|
|
if epErr := epClient.AddEndpointRules(epInfo); epErr != nil {
|
|
return epErr
|
|
}
|
|
|
|
// If a network namespace for the container interface is specified...
|
|
if epInfo.NetNsPath != "" {
|
|
// Open the network namespace.
|
|
logger.Info("Opening netns", zap.Any("NetNsPath", epInfo.NetNsPath))
|
|
ns, epErr := nsc.OpenNamespace(epInfo.NetNsPath)
|
|
if epErr != nil {
|
|
return epErr
|
|
}
|
|
defer ns.Close()
|
|
|
|
if epErr := epClient.MoveEndpointsToContainerNS(epInfo, ns.GetFd()); epErr != nil {
|
|
return epErr
|
|
}
|
|
|
|
// Enter the container network namespace.
|
|
logger.Info("Entering netns", zap.Any("NetNsPath", epInfo.NetNsPath))
|
|
if epErr := ns.Enter(); epErr != nil {
|
|
return epErr
|
|
}
|
|
|
|
// Return to host network namespace.
|
|
defer func() {
|
|
logger.Info("Exiting netns", zap.Any("NetNsPath", epInfo.NetNsPath))
|
|
if epErr := ns.Exit(); epErr != nil {
|
|
logger.Error("Failed to exit netns with", zap.Error(epErr))
|
|
}
|
|
}()
|
|
}
|
|
|
|
if epInfo.IPV6Mode != "" {
|
|
// Enable ipv6 setting in container
|
|
logger.Info("Enable ipv6 setting in container.")
|
|
nuc := networkutils.NewNetworkUtils(nl, plc)
|
|
if epErr := nuc.UpdateIPV6Setting(0); epErr != nil {
|
|
return fmt.Errorf("Enable ipv6 in container failed:%w", epErr)
|
|
}
|
|
}
|
|
|
|
// If a name for the container interface is specified...
|
|
if epInfo.IfName != "" {
|
|
if epErr := epClient.SetupContainerInterfaces(epInfo); epErr != nil {
|
|
return epErr
|
|
}
|
|
}
|
|
|
|
return epClient.ConfigureContainerInterfacesAndRoutes(epInfo)
|
|
}()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return ep, nil
|
|
}
|
|
|
|
// deleteEndpointImpl deletes an existing endpoint from the network.
|
|
func (nw *network) deleteEndpointImpl(nl netlink.NetlinkInterface, plc platform.ExecClient, epClient EndpointClient, nioc netio.NetIOInterface, nsc NamespaceClientInterface,
|
|
iptc ipTablesClient, ep *endpoint,
|
|
) error {
|
|
// Delete the veth pair by deleting one of the peer interfaces.
|
|
// Deleting the host interface is more convenient since it does not require
|
|
// entering the container netns and hence works both for CNI and CNM.
|
|
|
|
// epClient is nil only for unit test.
|
|
if epClient == nil {
|
|
//nolint:gocritic
|
|
if ep.VlanID != 0 {
|
|
epInfo := ep.getInfo()
|
|
if nw.Mode == opModeTransparentVlan {
|
|
logger.Info("Transparent vlan client")
|
|
epClient = NewTransparentVlanEndpointClient(nw, epInfo, ep.HostIfName, "", ep.VlanID, ep.LocalIP, nl, plc, nsc, iptc)
|
|
|
|
} else {
|
|
epClient = NewOVSEndpointClient(nw, epInfo, ep.HostIfName, "", ep.VlanID, ep.LocalIP, nl, ovsctl.NewOvsctl(), plc, iptc)
|
|
}
|
|
} else if nw.Mode != opModeTransparent {
|
|
epClient = NewLinuxBridgeEndpointClient(nw.extIf, ep.HostIfName, "", nw.Mode, nl, plc)
|
|
} else {
|
|
if len(ep.SecondaryInterfaces) > 0 {
|
|
epClient = NewSecondaryEndpointClient(nl, nioc, plc, nsc, ep)
|
|
epClient.DeleteEndpointRules(ep)
|
|
//nolint:errcheck // ignore error
|
|
epClient.DeleteEndpoints(ep)
|
|
}
|
|
|
|
epClient = NewTransparentEndpointClient(nw.extIf, ep.HostIfName, "", nw.Mode, nl, nioc, plc)
|
|
}
|
|
}
|
|
|
|
epClient.DeleteEndpointRules(ep)
|
|
// deleteHostVeth set to false not to delete veth as CRI will remove network namespace and
|
|
// veth will get removed as part of that.
|
|
//nolint:errcheck // ignore error
|
|
epClient.DeleteEndpoints(ep)
|
|
|
|
return nil
|
|
}
|
|
|
|
// getInfoImpl returns information about the endpoint.
|
|
func (ep *endpoint) getInfoImpl(epInfo *EndpointInfo) {
|
|
}
|
|
|
|
func addRoutes(nl netlink.NetlinkInterface, netioshim netio.NetIOInterface, interfaceName string, routes []RouteInfo) error {
|
|
ifIndex := 0
|
|
|
|
for _, route := range routes {
|
|
if route.DevName != "" {
|
|
devIf, _ := netioshim.GetNetworkInterfaceByName(route.DevName)
|
|
ifIndex = devIf.Index
|
|
} else {
|
|
interfaceIf, err := netioshim.GetNetworkInterfaceByName(interfaceName)
|
|
if err != nil {
|
|
logger.Error("Interface not found with", zap.Error(err))
|
|
return fmt.Errorf("addRoutes failed: %w", err)
|
|
}
|
|
ifIndex = interfaceIf.Index
|
|
}
|
|
|
|
family := netlink.GetIPAddressFamily(route.Gw)
|
|
if route.Gw == nil {
|
|
family = netlink.GetIPAddressFamily(route.Dst.IP)
|
|
}
|
|
|
|
nlRoute := &netlink.Route{
|
|
Family: family,
|
|
Dst: &route.Dst,
|
|
Gw: route.Gw,
|
|
LinkIndex: ifIndex,
|
|
Priority: route.Priority,
|
|
Protocol: route.Protocol,
|
|
Scope: route.Scope,
|
|
Table: route.Table,
|
|
}
|
|
|
|
logger.Info("Adding IP route to link", zap.Any("route", route), zap.String("interfaceName", interfaceName))
|
|
if err := nl.AddIPRoute(nlRoute); err != nil {
|
|
if !strings.Contains(strings.ToLower(err.Error()), "file exists") {
|
|
return err
|
|
} else {
|
|
logger.Info("route already exists")
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func deleteRoutes(nl netlink.NetlinkInterface, netioshim netio.NetIOInterface, interfaceName string, routes []RouteInfo) error {
|
|
ifIndex := 0
|
|
|
|
for _, route := range routes {
|
|
if route.DevName != "" {
|
|
devIf, _ := netioshim.GetNetworkInterfaceByName(route.DevName)
|
|
if devIf == nil {
|
|
logger.Info("Not deleting route. Interface doesn't exist", zap.String("interfaceName", interfaceName))
|
|
continue
|
|
}
|
|
|
|
ifIndex = devIf.Index
|
|
} else if interfaceName != "" {
|
|
interfaceIf, _ := netioshim.GetNetworkInterfaceByName(interfaceName)
|
|
if interfaceIf == nil {
|
|
logger.Info("Not deleting route. Interface doesn't exist", zap.String("interfaceName", interfaceName))
|
|
continue
|
|
}
|
|
ifIndex = interfaceIf.Index
|
|
}
|
|
|
|
family := netlink.GetIPAddressFamily(route.Gw)
|
|
if route.Gw == nil {
|
|
family = netlink.GetIPAddressFamily(route.Dst.IP)
|
|
}
|
|
|
|
nlRoute := &netlink.Route{
|
|
Family: family,
|
|
Dst: &route.Dst,
|
|
LinkIndex: ifIndex,
|
|
Gw: route.Gw,
|
|
Protocol: route.Protocol,
|
|
Scope: route.Scope,
|
|
}
|
|
|
|
logger.Info("Deleting IP route from link", zap.Any("route", route), zap.String("interfaceName", interfaceName))
|
|
if err := nl.DeleteIPRoute(nlRoute); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// updateEndpointImpl updates an existing endpoint in the network.
|
|
func (nm *networkManager) updateEndpointImpl(nw *network, existingEpInfo *EndpointInfo, targetEpInfo *EndpointInfo) (*endpoint, error) {
|
|
var ep *endpoint
|
|
|
|
existingEpFromRepository := nw.Endpoints[existingEpInfo.Id]
|
|
logger.Info("[updateEndpointImpl] Going to retrieve endpoint with Id to update", zap.String("id", existingEpInfo.Id))
|
|
if existingEpFromRepository == nil {
|
|
logger.Info("[updateEndpointImpl] Endpoint cannot be updated as it does not exist")
|
|
return nil, errEndpointNotFound
|
|
}
|
|
|
|
netns := existingEpFromRepository.NetworkNameSpace
|
|
// Network namespace for the container interface has to be specified
|
|
if netns != "" {
|
|
// Open the network namespace.
|
|
logger.Info("[updateEndpointImpl] Opening netns", zap.Any("netns", netns))
|
|
ns, err := nm.nsClient.OpenNamespace(netns)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer ns.Close()
|
|
|
|
// Enter the container network namespace.
|
|
logger.Info("[updateEndpointImpl] Entering netns", zap.Any("netns", netns))
|
|
if err = ns.Enter(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Return to host network namespace.
|
|
defer func() {
|
|
logger.Info("[updateEndpointImpl] Exiting netns", zap.Any("netns", netns))
|
|
if err := ns.Exit(); err != nil {
|
|
logger.Error("[updateEndpointImpl] Failed to exit netns with", zap.Error(err))
|
|
}
|
|
}()
|
|
} else {
|
|
logger.Info("[updateEndpointImpl] Endpoint cannot be updated as the network namespace does not exist: Epid", zap.String("id", existingEpInfo.Id),
|
|
zap.String("component", "updateEndpointImpl"))
|
|
return nil, errNamespaceNotFound
|
|
}
|
|
|
|
logger.Info("[updateEndpointImpl] Going to update routes in netns", zap.Any("netns", netns))
|
|
if err := nm.updateRoutes(existingEpInfo, targetEpInfo); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Create the endpoint object.
|
|
ep = &endpoint{
|
|
Id: existingEpInfo.Id,
|
|
}
|
|
|
|
// Update existing endpoint state with the new routes to persist
|
|
ep.Routes = append(ep.Routes, targetEpInfo.Routes...)
|
|
|
|
return ep, nil
|
|
}
|
|
|
|
func (nm *networkManager) updateRoutes(existingEp *EndpointInfo, targetEp *EndpointInfo) error {
|
|
logger.Info("Updating routes for the endpoint", zap.Any("existingEp", existingEp))
|
|
logger.Info("Target endpoint is", zap.Any("targetEp", targetEp))
|
|
|
|
existingRoutes := make(map[string]RouteInfo)
|
|
targetRoutes := make(map[string]RouteInfo)
|
|
var tobeDeletedRoutes []RouteInfo
|
|
var tobeAddedRoutes []RouteInfo
|
|
|
|
// we should not remove default route from container if it exists
|
|
// we do not support enable/disable snat for now
|
|
defaultDst := net.ParseIP("0.0.0.0")
|
|
|
|
logger.Info("Going to collect routes and skip default and infravnet routes if applicable.")
|
|
logger.Info("Key for default route", zap.String("route", defaultDst.String()))
|
|
|
|
infraVnetKey := ""
|
|
if targetEp.EnableInfraVnet {
|
|
infraVnetSubnet := targetEp.InfraVnetAddressSpace
|
|
if infraVnetSubnet != "" {
|
|
infraVnetKey = strings.Split(infraVnetSubnet, "/")[0]
|
|
}
|
|
}
|
|
|
|
logger.Info("Key for route to infra vnet", zap.String("infraVnetKey", infraVnetKey))
|
|
for _, route := range existingEp.Routes {
|
|
destination := route.Dst.IP.String()
|
|
logger.Info("Checking destination as to skip or not", zap.String("destination", destination))
|
|
isDefaultRoute := destination == defaultDst.String()
|
|
isInfraVnetRoute := targetEp.EnableInfraVnet && (destination == infraVnetKey)
|
|
if !isDefaultRoute && !isInfraVnetRoute {
|
|
existingRoutes[route.Dst.String()] = route
|
|
logger.Info("was skipped", zap.String("destination", destination))
|
|
}
|
|
}
|
|
|
|
for _, route := range targetEp.Routes {
|
|
targetRoutes[route.Dst.String()] = route
|
|
}
|
|
|
|
for _, existingRoute := range existingRoutes {
|
|
dst := existingRoute.Dst.String()
|
|
if _, ok := targetRoutes[dst]; !ok {
|
|
tobeDeletedRoutes = append(tobeDeletedRoutes, existingRoute)
|
|
logger.Info("Adding following route to the tobeDeleted list", zap.Any("existingRoute", existingRoute))
|
|
}
|
|
}
|
|
|
|
for _, targetRoute := range targetRoutes {
|
|
dst := targetRoute.Dst.String()
|
|
if _, ok := existingRoutes[dst]; !ok {
|
|
tobeAddedRoutes = append(tobeAddedRoutes, targetRoute)
|
|
logger.Info("Adding following route to the tobeAdded list", zap.Any("targetRoute", targetRoute))
|
|
}
|
|
|
|
}
|
|
|
|
err := deleteRoutes(nm.netlink, &netio.NetIO{}, existingEp.IfName, tobeDeletedRoutes)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
err = addRoutes(nm.netlink, &netio.NetIO{}, existingEp.IfName, tobeAddedRoutes)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
logger.Info("Successfully updated routes for the endpoint using target", zap.Any("existingEp", existingEp), zap.Any("targetEp", targetEp))
|
|
|
|
return nil
|
|
}
|
|
|
|
func getDefaultGateway(routes []RouteInfo) net.IP {
|
|
_, defDstIP, _ := net.ParseCIDR("0.0.0.0/0")
|
|
for _, route := range routes {
|
|
if route.Dst.String() == defDstIP.String() {
|
|
return route.Gw
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|