azure-container-networking/network/network_linux.go

748 строки
21 KiB
Go
Исходник Обычный вид История

// Copyright 2017 Microsoft. All rights reserved.
// MIT License
package network
import (
"fmt"
"net"
"strconv"
"strings"
"github.com/Azure/azure-container-networking/iptables"
"github.com/Azure/azure-container-networking/netio"
"github.com/Azure/azure-container-networking/netlink"
"github.com/Azure/azure-container-networking/network/networkutils"
"github.com/Azure/azure-container-networking/ovsctl"
"github.com/Azure/azure-container-networking/platform"
"github.com/pkg/errors"
"go.uber.org/zap"
"golang.org/x/sys/unix"
)
const (
// Prefix for bridge names.
bridgePrefix = "azure"
2017-03-07 03:24:20 +03:00
// Virtual MAC address used by Azure VNET.
virtualMacAddress = "12:34:56:78:9a:bc"
versionID = "VERSION_ID"
distroID = "ID"
ubuntuStr = "ubuntu"
dnsServersStr = "DNS Servers"
dnsDomainStr = "DNS Domain"
ubuntuVersion17 = 17
// OptVethName key for veth name option
OptVethName = "vethname"
// SnatBridgeIPKey key for the SNAT bridge
SnatBridgeIPKey = "snatBridgeIP"
// LocalIPKey key for local IP
LocalIPKey = "localIP"
// InfraVnetIPKey key for infra vnet
InfraVnetIPKey = "infraVnetIP"
// Ubuntu Release Version for checking which command to use.
Ubuntu22 = "22.04"
)
const (
lineDelimiter = "\n"
colonDelimiter = ":"
dotDelimiter = "."
)
Netlink package interfacing and adding a fake (#996) (#1025) * Initial pass at Netlink interface * changing some netlink and epc * Resolcing all dependencies on netlink package * first pass at adding a netlinkinterface * windows working now * feat: update cns client (#992) * fix debug commands Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix: update cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to debug calls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * repackage cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to all methods and preinit all route urls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * down-scope cns client interface and move to consumer packages Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * no unkeyed struct literals Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * trace updated client method signatures out through windows paths * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix windows build Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * windows working now * Some golints checks * commenting a flaky NPM UT and adding some golint checks * renaming fakenetlink to mocknetlink * removing a mock netlink usage * fixing more golints and a test fix * fixing more go lints * Adding in netlink from higher level as input * adding netlinkinterface to windows endpoint impl * removing netlink name confusion Co-authored-by: Evan Baker <rbtr@users.noreply.github.com> Co-authored-by: Vamsi Kalapala <vakr@microsoft.com> Co-authored-by: Evan Baker <rbtr@users.noreply.github.com>
2021-09-20 21:57:12 +03:00
var errorNetworkManager = errors.New("Network_linux pkg error")
func newErrorNetworkManager(errStr string) error {
return fmt.Errorf("%w : %s", errorNetworkManager, errStr)
}
// Linux implementation of route.
type route netlink.Route
// NewNetworkImpl creates a new container network.
refactor: code changes for stateless cni and swift v2 (#2688) * ci: changes up to endpointInternal * ci: remove defaultInterface from invoker * ci: change up to CreateEndpoint * ci: changes up to CreateEndpoint() * ci: invoker cns and UT fixes * ci: add fixes to UT(s), capture non populated defaultInterface failures * ci: multitenancy changes * ci: invoker azure changes & remove all defaultInterfaceInfo possible * ci add NICType to baremetal flow * chore: address comments * merge nw info fields to ep info and draft new createEndpoint function * restruct ipamAddResult struct * reorder code to create epinfo first, and then create network and ep based on epinfo * add getNwInfo and generate ipamAddResult * fix network windows.go * create nw info first and create nw and ep dns info * fix testIpamAddFail ut referencing wrong redeclared err variable, fix error message * UT fix part one * fix the getNetworkID and getNetworkInfo * move create endpoint to network package, remove ifIndex as needed * use function to get network id * unify creation of nw and endpoint info by removing switch * change functions to consume ep info instead of nw info * remove unused variable accidentally added earlier * update old azure ipam invoker to use ep info and change ids to network ids when appropriate previously we renamed the NetworkInfo symbol to EndpointInfo in lots of places, but the Id in Network Info is NOT the same as Endpoint Info, so while the code compiles, code that previously used the id field of the network info struct would now be using the id field of the endpoint info struct. It should use the NetworkId field of the endpoint info struct instead. * rename endpoint info id field to EndpointID to remove ambiguity * change nw info to ep info in windows * adjust comments * move all methods in create ep info dependent on nw info to use ep info instead (windows and linux) addSubnetToNetworkInfo, setNetworkOptions, and getEndpointPolicies. getEndpointPolicies will now take just the subnets needed as a parameter rather than the whole nw or ep info. * make cnm compile (not necessarily correct) * make all tests compile except endpoint test secondary client (windows and linux) (not necessarily correct) * comment out endpoint test secondary endpoint client case to make tests compile * address todos and comments from meeting * remove duplicated code for populating address in ep info generation * update EndpointCreate to support multiple infra nic * save all endpoints to state, regardless of type, use either stateless or cni statefile undos some changes in "move create endpoint to network package, remove ifIndex as needed" deletion flow needs to figure out how to tell if the nic type is delegated 1 interface info : 1 endpoint info : 1 endpoint struct mapping * fix dual nic support conditional and finding master interface ip the master interface ip must be in a particular form where the last few bits are zeroed out based on the mask or we won't find the ip for example, while the host subnet perfix is 10.224.0.113/16, the ip that should be passed into find master interface (subnet) should be 10.224.0.0/16 which matches one of the interfaces' ipnet (10.224.0.0/16) * fix empty network name when we need to create a network, we collect the network information, but if we do not find the network, we return an empty nw info and an error when we create the endpoint we need to use endpoint info's network id, not the (possibly) empty network info struct's network id * make network_test.go compile (linux and windows compile) unit tests are not necessarily correct at this point * add NICType to endpoint struct and populate it important: when getting the endpoint state, the NIC Type field is not populated, leading to deletes not having a NIC Type; this should be changed so that getting the state populates that field including the nic type allows us to simplify the secondary endpoints delete flow (just check if the nic type is delegated instead of checking if the secondary interfaces map is populated) smoke tested: linux aks podsubnet (same vm, multi vm, internet, cni statefile consistent) linux standalone transparent vlan multitenancy (same vm, multi vm, internet, multi vnet, no connection between coke pepsi, cni statefile consistent) windows standalone bridge multitenancy single customer (same vm connections, internet, dns only, cni statefile consistent, 2 pods deleting and recreating) * ci: InterfaceInfo Map * fix multitenancy_test ut by changing key * add endpoint id to secondary ep info test since we populate the id in the actual flow * fix cni network_test linux and ensure secondary create ep info does not break in network_test we pass in sample delegated (secondary) data to Add which we then create endpoint info from even with most fields empty, in linux, the ep info is created without erroring * make invoker_cns_test linux pass running all linux package tests for network and cni package pass (or also fail on master, like createBridge) windows unit tests mostly all fail for the same ones on master and this branch summary: - network_windows_test.go ○ TestFailToAddIPv6DefaultRoute already fails on master - network_test.go ○ 9 tests fail on master, 9 tests fail on my branch - manager_test.go ○ 9 tests fail on master, 9 tests fail on my branch - endpoint_windows_test.go ○ TestNewAndDeleteEndpointImplHnsV2 already timeouts on master - endpoint_test.go ○ 9 tests fail on master, 9 tests fail on my branch - network_windows_test.go ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_already_hot_attached ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_not_hot_attached ○ We don't handle consecutive add anymore - network_test.go ○ TestPluginMultitenancyAdd/Add_Happy_path fails on master and my branch (received multiple NC results [] from CNS while dualnic feature is not supported)-- we still get two items on our list/map though which is expected - invoker_cns_test passes - invoker_azure_test passes - multitenancy_test passes The consecutive add tests fail but that is expected since we no longer support it. * modify delete flow to handle multiple epinfos to delete delete ALL endpoints related to the endpoint infos list in the event cni fails half-way through an add (one failed endpoint create and we delete all would-be-create endpoints and the state) replace looping over deletion code "n" number of times with getting a slice of endpoint infos to delete modify stateless cni code to retrieve a slice of network endpoint infos from a single response based on the container id (container id can be used in stateless cni for retrieval) incorporate stateless cni changes from other branch (cns client/ipam/restserver changes) modify get endpoint state to return slice of endpoint infos, and getting an endpoint will return an endpoint from that slice with nic type infra move edge case where endpoint is not created in the state but ips are already allocated to immediately after retrieving all ep infos fix mock behavior for getting all endpoints by container id move getting network id and network info out of the loop because their values do not seem to change between iterations move deletion of endpoint logic into a dedicated loop, and then create a dedicate loop for calling ipam delete to prevent inconsistent state all expected unit tests on linux pass * address feedback * Make change to UpdateEndpointState API to support SwiftV2 for Stateless CNI * change save state to only call update endpoint state once with a slice of endpoints, uts pass * fix using nonexistent key by passing in current interface info directly * fix azure ipam invoker not getting a populated network info for legacy cni * add L1VH windows support * add nic type to windows endpoints * move adding an external interface code to run only when creating a new network this change reflects prior behavior, where we would only add an external interface to the statefile if the network (after searching through all external interfaces) was not found currently, if there are multiple interfaces that could be selected as the master, we would add each external interface to the statefile, even if the *network* is associated with one of the existing interfaces while we would still always find the same network (thanks to having a constant NetworkId, regardless of the external interface), you could get an extra empty external interface in your statefile this commit should remove that possibility (the extra external interface shouldn't really matter in the first place though because we always select the external interface that has a matching network created on it) this should be os agnostic * update comments, first todo check pass * address some linter issues * rename networkId to networkID in endpoint info ran package tests in windows and linux for cni and network packages ran package tests in linux for cns restserver all have expected outputs (either pass, or also fails on master branch) * address linter issues * preserve more logs and reduce timeout for restart for debugging * clean comments and rename for clarity if we use the endpoint info for the network info fields, we name it nwInfo as a hint * address more linter issues linux network, restserver, and cni package tests pass * Revert "preserve more logs and reduce timeout for restart for debugging" This reverts commit 0f004925cfb37a4594df63a4259242c37cf07785. * ignore error on delete flow network query if we are in stateful cni and do not find the network, we will not error, but when we search for the endpoint it will not be found, leading to us calling ipam invoker delete which is assumed idempotent before returning previously we would error in stateful cni and return before calling ipam invoker delete * delete network on endpoint delete if stateless and delegated vmnic (win + linux) * add nic name, set nicname in linux to master interface name stateless will key into interface map with the nicname field in windows, the nicname field is based on the args ifname (usually eth0) in linux, the nicname field is based on the master interface found (usually eth0) note: hostifname/hostvethname = linux veth pair peer in the host ns ifname/contifname = linux veth pair peer in the container ns, in windows it's just the args ifname nicname is something else ifname isn't used during deletion in linux, hns id is used for deletion in windows * return secondary interface as cni result if no infra nic found, include mac address in cni result * address linter issue * fix critical error where failing to add in windows stateless would lead to hns components not being deleted and add netns for hnsv2 tested by triggering a failure to save the stateless state and seeing that the hns endpoint and network are cleaned up we use the endpoint info to clean up on "add" error, but previously, we didn't populate it with the hns ids to do so adds netns to stateless as the presence of a valid guid in netns determines if hnsv2 is used * set nicname used in stateless cni according to feedback * add dummy guid to stateless delete since we assume stateless is always hnsv2 we assume that the netns value isn't used in stateless deletion * clean up createEpInfo, declare endpoint info once * address feedback from vipul * change comments only * revert change to cns package * fix stateless cni migration flow not having nictype on migrate * keep nwInfo variables named the same as before pr (noop) * separate endpoint and network policies in endpoint info behavior should not change except in hnsv1, where network policies passed into network create call will NOT include endpoint policies endpoint policies always include network policies * address feedback from reviewers * address feedback and account for case where cns provides info without nic type if nic type is empty from cns in invoker cns, we assume it is infra nic type and populate it with infra nic type * address feedback to declare endpoint info once and populate all fields at once moved add subnets to after endpoint info created moved retrieval of all endpoint policies (from getEndpointPolicies and getPoliciesFromRuntimeCfg)until after endpoint info created network policies are just passed in from the args unaltered * use ifname instead of nicname field in endpoint struct as key in stateless * convert macaddress only nictype is delegatedvmnic * address feedback by removing network dns settings * address linter issues (noop) * address feedback and linter (noop) * remove unused consecutive add funcs (noop) * fix release ips when create a container without nictype using older cni and then upgrade cni and delete if we create a pod with an older cni version, it won't have a nictype if we upgrade cni and then delete, we should treat an empty nictype as an infra nictype and call the invoker delete * prevent eps with delegated nic type present on ep from also calling transparent endpoint client on delete tested on swift v2 linux single pod add, change cni to this version, delete (ok) then add using this cni version and delete, no extraneous transparent endpoint client calls logged * mock get interface method for ut searched for "NetPlugin" in all files and determined all prod use of NetPlugin goes through NewNetPlugin where we set the get interface method to the real interface get method adds ut where the master interface (by mac) is not found * address feedback (noop) * add ut for handling empty nictype on cns add (noop) * add multitenancy delete net not found ut (noop) * add uts for multi interface infos single add call, verify endpoint id, cns to cni data conversion and vice versa, get endpoint info from container id (noop) verifies partial success will delete all endpoints, even successfully created ones in the same cni add call * add ut for all pods associated with container id delete in one del call, new secondary delete flow (noop) * add two UTs * fix a linter issue * add ut to check endpoint ifname on new endpoint creation based on nictype (noop) * add ut for fail to find interface by subnet (noop) * Adding support for Stateless CNI Delete Edge case when there in no HNS ID * fix uts * fix linter issues * fix ut --------- Co-authored-by: jpayne3506 <payne.3506@gmail.com> Co-authored-by: paulyufan2 <paulyu01@outlook.com> Co-authored-by: AzureAhai <behzadm@microsoft.com>
2024-06-01 04:36:32 +03:00
func (nm *networkManager) newNetworkImpl(nwInfo *EndpointInfo, extIf *externalInterface) (*network, error) {
// Connect the external interface.
var (
vlanid int
ifName string
)
opt, _ := nwInfo.Options[genericData].(map[string]interface{})
logger.Info("opt options", zap.Any("opt", opt), zap.Any("options", nwInfo.Options))
switch nwInfo.Mode {
2017-03-07 03:24:20 +03:00
case opModeTunnel:
fallthrough
case opModeBridge:
logger.Info("create bridge")
ifName = extIf.BridgeName
if err := nm.connectExternalInterface(extIf, nwInfo); err != nil {
return nil, err
}
if opt != nil && opt[VlanIDKey] != nil {
vlanid, _ = strconv.Atoi(opt[VlanIDKey].(string))
}
case opModeTransparent:
logger.Info("Transparent mode")
ifName = extIf.Name
if nwInfo.IPV6Mode != "" {
nu := networkutils.NewNetworkUtils(nm.netlink, nm.plClient)
if err := nu.EnableIPV6Forwarding(); err != nil {
return nil, fmt.Errorf("Ipv6 forwarding failed: %w", err)
}
Netlink package interfacing and adding a fake (#996) (#1025) * Initial pass at Netlink interface * changing some netlink and epc * Resolcing all dependencies on netlink package * first pass at adding a netlinkinterface * windows working now * feat: update cns client (#992) * fix debug commands Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix: update cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to debug calls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * repackage cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to all methods and preinit all route urls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * down-scope cns client interface and move to consumer packages Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * no unkeyed struct literals Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * trace updated client method signatures out through windows paths * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix windows build Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * windows working now * Some golints checks * commenting a flaky NPM UT and adding some golint checks * renaming fakenetlink to mocknetlink * removing a mock netlink usage * fixing more golints and a test fix * fixing more go lints * Adding in netlink from higher level as input * adding netlinkinterface to windows endpoint impl * removing netlink name confusion Co-authored-by: Evan Baker <rbtr@users.noreply.github.com> Co-authored-by: Vamsi Kalapala <vakr@microsoft.com> Co-authored-by: Evan Baker <rbtr@users.noreply.github.com>
2021-09-20 21:57:12 +03:00
}
feat: Add SNAT bridge to Native, decouple SNAT bridge (#1506) * Native Endpoint Client Add Endpoints * AddEndpointRules, ConfigureContainerInterfacesAndRoutes * Changed interface names, log statements nw.extIf.Name > eth0 (eth0) eth0.vlanid > eth0.X (eth0.1) %s%s hostIfName > vnet (A1veth0) %s%s-2 contIfName > container (B1veth0) * Renaming, using lib to set ns * Namespace "path" is /var/run/netns/<NS> * Loopback set up, Remove auto kernel subnet route * Cannot set link to up if it's in another NS * Multiple containers on same VNET NS * Delete Endpoint routes on Delete * Minimizing netns usage * Moving NS Exec Code * Further minimized netns.Set usage * Moved helper methods down, drafted tests * Removed DevName from Route Info, more tests * Test existing vnet ns, delete endpoint * NetNS interface for testing * Separated tests by namespace * Endpoints delete if they cannot be moved into NS * Namespace netns tests * Added Native Client to deleteEndpointImpl * Deletion of Endpoints Impl and Tests * Cleaned code (Tests ok) * Moved mock/netns to package (Tests ok) * Fixing Netns (wip) Moved netnsinterface to consumer package (network). Removed "Netns" from "NewNetns" and "NewMockNetns" as it is unambiguous. Changed uintptr to int and casted the int to uintptr when needed later. * Using errors.Wrap for error context (wip) * Removed sentence case (wip) * Removing variable predeclaration * Removed NewNativeEndpointClient Directly instantiating struct because nothing special happens in NewNativeEndpointClient * Removed generics from ExecuteInNS * Removed uintptr from mocknetns, tests compile Forgot to remove uintptr from mocknetns * Fix tests, lint * Fixes from linter Works on VMSS * Replacing references to ethX with vlan veth * Removed unnecessary log * Removed unnecessary mac, fix tests * Mockns method name enum * Unable to use GetNetworkInterfaceByName due to NS If I use GetNetworkInterface, I need to be in the vnet NS, but that means I will need to call ExecuteInNS, which causes tests to fail. * Fixes from linter * Assume if NS exists, vlan veth exists Tests ok * Fixes for Linter * Snat refactor * Fix delete tests * Fix delete tests bug * More snat refactor * Breaking, prepping for Native Snat Delete native endpoint snat route linux to remove errors and in theory, ovs should work fine again. * Go mod tidy for linting Hopefully this fixes the windows lint error * Add fields to native endpoint client for snat * Using New() func to create Native Client Creation of the native endpoint client is too complicated to directly instantiate. * Snat defaults * Insert SNAT entry points * Native Snat error handling * Breaking, decouple ovsctl from snat Proposed Solution implementation Moved ovsctlClient.AddPortOnOVSBridge to ovs_endpoint_snatroute_linux.go. Removed ovsctlclient from NewSnatClient. Removed ovsctlClient from testing file. * Delete unecessary ovssnat files * No lint on vishvananda netns Maybe this will fix the windows linter? * Build linux only for netns package Maybe this fixes the linter error? * Remove nolint to see if linter fails * Breaking, removed bridgeName bridgeName refers to the OVS Switch I believe * If native uses snat bridge, should also get IP * Breaking, Decouple or Wrap snat route * Check to see if snat triggered * Snat behaviors specific to ovs/native * Pass the pointer Add/Delete ok * Renaming to make consts public * Breaking, moving ovs specific parts of snat to ovs * Remove enable infra vnet (Tests ok) Tested: Allow Host to NC only Allow NC to Host only Allow both Wget Ping between containers Warning: Enable snat is still hard coded to true!!! * Move add port to after exists() check * Moved netns interface to caller, generalized tests Tests ok, Native ok * Typos * Reordered if statement, unwrapped arp Tests ok, ping ok, wget ok * Linted, wrapping errors * Go fumpt entire network package * Code markers removed, clean (Tests ok) OVS & Native: - Ping between two containers same VM, no packets on bridge - Ping between two containers diff VM, no packets on bridge - Ping other container not in vnet, no packets on bridge - Ping snat to container, packets on bridge - Ping container to snat, packets on bridge - Tcpdump confirmed on azSnatBr - Deletion of containers deletes appropriate interfaces * Renamed veth, fixed logs * Made deleteEndpoints logic clearer, renamed error * Renamed eth0 to primaryHostIfName, vlanEth to vlanIf * Deleted debug log * Corrected merge (hardware addr) (Tests ok) * Renamed vlan veth to hostExtIf_vlanID, Disabled RA eth0.2 makes disable RA look for a folder eth0 and then another sub folder "2". ("eth0/2") However, it should look for a folder named "eth0.2" literally. To solve this, we change the naming scheme to use an underscore instead. (Tests ok) * Renamed Native to TransparentVlan Confirmed basic functionality on VM with correct mode * Make file updated * Create azure-windows-multitenancy-transparent-vlan.conflist * Unified snat err format * Rename to transparent-vlan * Route table support added to local netlink * Moved SNAT to end of function * Defer deleting vlan interface on failure
2022-08-10 23:50:26 +03:00
case opModeTransparentVlan:
logger.Info("Transparent vlan mode")
feat: Add native linux endpoint client to prep removing OVS (#1471) * Native Endpoint Client Add Endpoints * AddEndpointRules, ConfigureContainerInterfacesAndRoutes * Changed interface names, log statements nw.extIf.Name > eth0 (eth0) eth0.vlanid > eth0.X (eth0.1) %s%s hostIfName > vnet (A1veth0) %s%s-2 contIfName > container (B1veth0) * Renaming, using lib to set ns * Namespace "path" is /var/run/netns/<NS> * Loopback set up, Remove auto kernel subnet route * Cannot set link to up if it's in another NS * Multiple containers on same VNET NS * Delete Endpoint routes on Delete * Minimizing netns usage * Moving NS Exec Code * Further minimized netns.Set usage * Moved helper methods down, drafted tests * Removed DevName from Route Info, more tests * Test existing vnet ns, delete endpoint * NetNS interface for testing * Separated tests by namespace * Endpoints delete if they cannot be moved into NS * Namespace netns tests * Added Native Client to deleteEndpointImpl * Deletion of Endpoints Impl and Tests * Cleaned code (Tests ok) * Moved mock/netns to package (Tests ok) * Fixing Netns (wip) Moved netnsinterface to consumer package (network). Removed "Netns" from "NewNetns" and "NewMockNetns" as it is unambiguous. Changed uintptr to int and casted the int to uintptr when needed later. * Using errors.Wrap for error context (wip) * Removed sentence case (wip) * Removing variable predeclaration * Removed NewNativeEndpointClient Directly instantiating struct because nothing special happens in NewNativeEndpointClient * Removed generics from ExecuteInNS * Removed uintptr from mocknetns, tests compile Forgot to remove uintptr from mocknetns * Fix tests, lint * Fixes from linter Works on VMSS * Replacing references to ethX with vlan veth * Removed unnecessary log * Removed unnecessary mac, fix tests * Mockns method name enum * Unable to use GetNetworkInterfaceByName due to NS If I use GetNetworkInterface, I need to be in the vnet NS, but that means I will need to call ExecuteInNS, which causes tests to fail. * Fixes from linter * Assume if NS exists, vlan veth exists Tests ok * Fixes for Linter * Fix delete tests * Fix delete tests bug * Go mod tidy for linting Hopefully this fixes the windows lint error * No lint on vishvananda netns Maybe this will fix the windows linter? * Build linux only for netns package Maybe this fixes the linter error? * Remove nolint to see if linter fails * Moved netns interface to caller, generalized tests Tests ok, Native ok * Typos * Reordered if statement, unwrapped arp Tests ok, ping ok, wget ok * Renamed veth, fixed logs * Made deleteEndpoints logic clearer, renamed error * Renamed eth0 to primaryHostIfName, vlanEth to vlanIf
2022-08-03 00:54:10 +03:00
ifName = extIf.Name
nu := networkutils.NewNetworkUtils(nm.netlink, nm.plClient)
if err := nu.EnableIPV4Forwarding(); err != nil {
return nil, errors.Wrap(err, "ipv4 forwarding failed")
}
if err := nu.UpdateIPV6Setting(1); err != nil {
return nil, errors.Wrap(err, "failed to disable ipv6 on vm")
}
// Blocks wireserver traffic from apipa nic
if err := nu.BlockEgressTrafficFromContainer(nm.iptablesClient, iptables.V4, networkutils.AzureDNS, iptables.TCP, iptables.HTTPPort); err != nil {
return nil, errors.Wrap(err, "unable to insert vm iptables rule drop wireserver packets")
}
default:
2017-03-07 03:24:20 +03:00
return nil, errNetworkModeInvalid
}
err := nm.handleCommonOptions(ifName, nwInfo)
if err != nil {
logger.Error("handleCommonOptions failed with", zap.Error(err))
return nil, err
}
// Create the network object.
nw := &network{
refactor: code changes for stateless cni and swift v2 (#2688) * ci: changes up to endpointInternal * ci: remove defaultInterface from invoker * ci: change up to CreateEndpoint * ci: changes up to CreateEndpoint() * ci: invoker cns and UT fixes * ci: add fixes to UT(s), capture non populated defaultInterface failures * ci: multitenancy changes * ci: invoker azure changes & remove all defaultInterfaceInfo possible * ci add NICType to baremetal flow * chore: address comments * merge nw info fields to ep info and draft new createEndpoint function * restruct ipamAddResult struct * reorder code to create epinfo first, and then create network and ep based on epinfo * add getNwInfo and generate ipamAddResult * fix network windows.go * create nw info first and create nw and ep dns info * fix testIpamAddFail ut referencing wrong redeclared err variable, fix error message * UT fix part one * fix the getNetworkID and getNetworkInfo * move create endpoint to network package, remove ifIndex as needed * use function to get network id * unify creation of nw and endpoint info by removing switch * change functions to consume ep info instead of nw info * remove unused variable accidentally added earlier * update old azure ipam invoker to use ep info and change ids to network ids when appropriate previously we renamed the NetworkInfo symbol to EndpointInfo in lots of places, but the Id in Network Info is NOT the same as Endpoint Info, so while the code compiles, code that previously used the id field of the network info struct would now be using the id field of the endpoint info struct. It should use the NetworkId field of the endpoint info struct instead. * rename endpoint info id field to EndpointID to remove ambiguity * change nw info to ep info in windows * adjust comments * move all methods in create ep info dependent on nw info to use ep info instead (windows and linux) addSubnetToNetworkInfo, setNetworkOptions, and getEndpointPolicies. getEndpointPolicies will now take just the subnets needed as a parameter rather than the whole nw or ep info. * make cnm compile (not necessarily correct) * make all tests compile except endpoint test secondary client (windows and linux) (not necessarily correct) * comment out endpoint test secondary endpoint client case to make tests compile * address todos and comments from meeting * remove duplicated code for populating address in ep info generation * update EndpointCreate to support multiple infra nic * save all endpoints to state, regardless of type, use either stateless or cni statefile undos some changes in "move create endpoint to network package, remove ifIndex as needed" deletion flow needs to figure out how to tell if the nic type is delegated 1 interface info : 1 endpoint info : 1 endpoint struct mapping * fix dual nic support conditional and finding master interface ip the master interface ip must be in a particular form where the last few bits are zeroed out based on the mask or we won't find the ip for example, while the host subnet perfix is 10.224.0.113/16, the ip that should be passed into find master interface (subnet) should be 10.224.0.0/16 which matches one of the interfaces' ipnet (10.224.0.0/16) * fix empty network name when we need to create a network, we collect the network information, but if we do not find the network, we return an empty nw info and an error when we create the endpoint we need to use endpoint info's network id, not the (possibly) empty network info struct's network id * make network_test.go compile (linux and windows compile) unit tests are not necessarily correct at this point * add NICType to endpoint struct and populate it important: when getting the endpoint state, the NIC Type field is not populated, leading to deletes not having a NIC Type; this should be changed so that getting the state populates that field including the nic type allows us to simplify the secondary endpoints delete flow (just check if the nic type is delegated instead of checking if the secondary interfaces map is populated) smoke tested: linux aks podsubnet (same vm, multi vm, internet, cni statefile consistent) linux standalone transparent vlan multitenancy (same vm, multi vm, internet, multi vnet, no connection between coke pepsi, cni statefile consistent) windows standalone bridge multitenancy single customer (same vm connections, internet, dns only, cni statefile consistent, 2 pods deleting and recreating) * ci: InterfaceInfo Map * fix multitenancy_test ut by changing key * add endpoint id to secondary ep info test since we populate the id in the actual flow * fix cni network_test linux and ensure secondary create ep info does not break in network_test we pass in sample delegated (secondary) data to Add which we then create endpoint info from even with most fields empty, in linux, the ep info is created without erroring * make invoker_cns_test linux pass running all linux package tests for network and cni package pass (or also fail on master, like createBridge) windows unit tests mostly all fail for the same ones on master and this branch summary: - network_windows_test.go ○ TestFailToAddIPv6DefaultRoute already fails on master - network_test.go ○ 9 tests fail on master, 9 tests fail on my branch - manager_test.go ○ 9 tests fail on master, 9 tests fail on my branch - endpoint_windows_test.go ○ TestNewAndDeleteEndpointImplHnsV2 already timeouts on master - endpoint_test.go ○ 9 tests fail on master, 9 tests fail on my branch - network_windows_test.go ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_already_hot_attached ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_not_hot_attached ○ We don't handle consecutive add anymore - network_test.go ○ TestPluginMultitenancyAdd/Add_Happy_path fails on master and my branch (received multiple NC results [] from CNS while dualnic feature is not supported)-- we still get two items on our list/map though which is expected - invoker_cns_test passes - invoker_azure_test passes - multitenancy_test passes The consecutive add tests fail but that is expected since we no longer support it. * modify delete flow to handle multiple epinfos to delete delete ALL endpoints related to the endpoint infos list in the event cni fails half-way through an add (one failed endpoint create and we delete all would-be-create endpoints and the state) replace looping over deletion code "n" number of times with getting a slice of endpoint infos to delete modify stateless cni code to retrieve a slice of network endpoint infos from a single response based on the container id (container id can be used in stateless cni for retrieval) incorporate stateless cni changes from other branch (cns client/ipam/restserver changes) modify get endpoint state to return slice of endpoint infos, and getting an endpoint will return an endpoint from that slice with nic type infra move edge case where endpoint is not created in the state but ips are already allocated to immediately after retrieving all ep infos fix mock behavior for getting all endpoints by container id move getting network id and network info out of the loop because their values do not seem to change between iterations move deletion of endpoint logic into a dedicated loop, and then create a dedicate loop for calling ipam delete to prevent inconsistent state all expected unit tests on linux pass * address feedback * Make change to UpdateEndpointState API to support SwiftV2 for Stateless CNI * change save state to only call update endpoint state once with a slice of endpoints, uts pass * fix using nonexistent key by passing in current interface info directly * fix azure ipam invoker not getting a populated network info for legacy cni * add L1VH windows support * add nic type to windows endpoints * move adding an external interface code to run only when creating a new network this change reflects prior behavior, where we would only add an external interface to the statefile if the network (after searching through all external interfaces) was not found currently, if there are multiple interfaces that could be selected as the master, we would add each external interface to the statefile, even if the *network* is associated with one of the existing interfaces while we would still always find the same network (thanks to having a constant NetworkId, regardless of the external interface), you could get an extra empty external interface in your statefile this commit should remove that possibility (the extra external interface shouldn't really matter in the first place though because we always select the external interface that has a matching network created on it) this should be os agnostic * update comments, first todo check pass * address some linter issues * rename networkId to networkID in endpoint info ran package tests in windows and linux for cni and network packages ran package tests in linux for cns restserver all have expected outputs (either pass, or also fails on master branch) * address linter issues * preserve more logs and reduce timeout for restart for debugging * clean comments and rename for clarity if we use the endpoint info for the network info fields, we name it nwInfo as a hint * address more linter issues linux network, restserver, and cni package tests pass * Revert "preserve more logs and reduce timeout for restart for debugging" This reverts commit 0f004925cfb37a4594df63a4259242c37cf07785. * ignore error on delete flow network query if we are in stateful cni and do not find the network, we will not error, but when we search for the endpoint it will not be found, leading to us calling ipam invoker delete which is assumed idempotent before returning previously we would error in stateful cni and return before calling ipam invoker delete * delete network on endpoint delete if stateless and delegated vmnic (win + linux) * add nic name, set nicname in linux to master interface name stateless will key into interface map with the nicname field in windows, the nicname field is based on the args ifname (usually eth0) in linux, the nicname field is based on the master interface found (usually eth0) note: hostifname/hostvethname = linux veth pair peer in the host ns ifname/contifname = linux veth pair peer in the container ns, in windows it's just the args ifname nicname is something else ifname isn't used during deletion in linux, hns id is used for deletion in windows * return secondary interface as cni result if no infra nic found, include mac address in cni result * address linter issue * fix critical error where failing to add in windows stateless would lead to hns components not being deleted and add netns for hnsv2 tested by triggering a failure to save the stateless state and seeing that the hns endpoint and network are cleaned up we use the endpoint info to clean up on "add" error, but previously, we didn't populate it with the hns ids to do so adds netns to stateless as the presence of a valid guid in netns determines if hnsv2 is used * set nicname used in stateless cni according to feedback * add dummy guid to stateless delete since we assume stateless is always hnsv2 we assume that the netns value isn't used in stateless deletion * clean up createEpInfo, declare endpoint info once * address feedback from vipul * change comments only * revert change to cns package * fix stateless cni migration flow not having nictype on migrate * keep nwInfo variables named the same as before pr (noop) * separate endpoint and network policies in endpoint info behavior should not change except in hnsv1, where network policies passed into network create call will NOT include endpoint policies endpoint policies always include network policies * address feedback from reviewers * address feedback and account for case where cns provides info without nic type if nic type is empty from cns in invoker cns, we assume it is infra nic type and populate it with infra nic type * address feedback to declare endpoint info once and populate all fields at once moved add subnets to after endpoint info created moved retrieval of all endpoint policies (from getEndpointPolicies and getPoliciesFromRuntimeCfg)until after endpoint info created network policies are just passed in from the args unaltered * use ifname instead of nicname field in endpoint struct as key in stateless * convert macaddress only nictype is delegatedvmnic * address feedback by removing network dns settings * address linter issues (noop) * address feedback and linter (noop) * remove unused consecutive add funcs (noop) * fix release ips when create a container without nictype using older cni and then upgrade cni and delete if we create a pod with an older cni version, it won't have a nictype if we upgrade cni and then delete, we should treat an empty nictype as an infra nictype and call the invoker delete * prevent eps with delegated nic type present on ep from also calling transparent endpoint client on delete tested on swift v2 linux single pod add, change cni to this version, delete (ok) then add using this cni version and delete, no extraneous transparent endpoint client calls logged * mock get interface method for ut searched for "NetPlugin" in all files and determined all prod use of NetPlugin goes through NewNetPlugin where we set the get interface method to the real interface get method adds ut where the master interface (by mac) is not found * address feedback (noop) * add ut for handling empty nictype on cns add (noop) * add multitenancy delete net not found ut (noop) * add uts for multi interface infos single add call, verify endpoint id, cns to cni data conversion and vice versa, get endpoint info from container id (noop) verifies partial success will delete all endpoints, even successfully created ones in the same cni add call * add ut for all pods associated with container id delete in one del call, new secondary delete flow (noop) * add two UTs * fix a linter issue * add ut to check endpoint ifname on new endpoint creation based on nictype (noop) * add ut for fail to find interface by subnet (noop) * Adding support for Stateless CNI Delete Edge case when there in no HNS ID * fix uts * fix linter issues * fix ut --------- Co-authored-by: jpayne3506 <payne.3506@gmail.com> Co-authored-by: paulyufan2 <paulyu01@outlook.com> Co-authored-by: AzureAhai <behzadm@microsoft.com>
2024-06-01 04:36:32 +03:00
Id: nwInfo.NetworkID,
Mode: nwInfo.Mode,
Endpoints: make(map[string]*endpoint),
extIf: extIf,
VlanId: vlanid,
EnableSnatOnHost: nwInfo.EnableSnatOnHost,
}
return nw, nil
}
refactor: code changes for stateless cni and swift v2 (#2688) * ci: changes up to endpointInternal * ci: remove defaultInterface from invoker * ci: change up to CreateEndpoint * ci: changes up to CreateEndpoint() * ci: invoker cns and UT fixes * ci: add fixes to UT(s), capture non populated defaultInterface failures * ci: multitenancy changes * ci: invoker azure changes & remove all defaultInterfaceInfo possible * ci add NICType to baremetal flow * chore: address comments * merge nw info fields to ep info and draft new createEndpoint function * restruct ipamAddResult struct * reorder code to create epinfo first, and then create network and ep based on epinfo * add getNwInfo and generate ipamAddResult * fix network windows.go * create nw info first and create nw and ep dns info * fix testIpamAddFail ut referencing wrong redeclared err variable, fix error message * UT fix part one * fix the getNetworkID and getNetworkInfo * move create endpoint to network package, remove ifIndex as needed * use function to get network id * unify creation of nw and endpoint info by removing switch * change functions to consume ep info instead of nw info * remove unused variable accidentally added earlier * update old azure ipam invoker to use ep info and change ids to network ids when appropriate previously we renamed the NetworkInfo symbol to EndpointInfo in lots of places, but the Id in Network Info is NOT the same as Endpoint Info, so while the code compiles, code that previously used the id field of the network info struct would now be using the id field of the endpoint info struct. It should use the NetworkId field of the endpoint info struct instead. * rename endpoint info id field to EndpointID to remove ambiguity * change nw info to ep info in windows * adjust comments * move all methods in create ep info dependent on nw info to use ep info instead (windows and linux) addSubnetToNetworkInfo, setNetworkOptions, and getEndpointPolicies. getEndpointPolicies will now take just the subnets needed as a parameter rather than the whole nw or ep info. * make cnm compile (not necessarily correct) * make all tests compile except endpoint test secondary client (windows and linux) (not necessarily correct) * comment out endpoint test secondary endpoint client case to make tests compile * address todos and comments from meeting * remove duplicated code for populating address in ep info generation * update EndpointCreate to support multiple infra nic * save all endpoints to state, regardless of type, use either stateless or cni statefile undos some changes in "move create endpoint to network package, remove ifIndex as needed" deletion flow needs to figure out how to tell if the nic type is delegated 1 interface info : 1 endpoint info : 1 endpoint struct mapping * fix dual nic support conditional and finding master interface ip the master interface ip must be in a particular form where the last few bits are zeroed out based on the mask or we won't find the ip for example, while the host subnet perfix is 10.224.0.113/16, the ip that should be passed into find master interface (subnet) should be 10.224.0.0/16 which matches one of the interfaces' ipnet (10.224.0.0/16) * fix empty network name when we need to create a network, we collect the network information, but if we do not find the network, we return an empty nw info and an error when we create the endpoint we need to use endpoint info's network id, not the (possibly) empty network info struct's network id * make network_test.go compile (linux and windows compile) unit tests are not necessarily correct at this point * add NICType to endpoint struct and populate it important: when getting the endpoint state, the NIC Type field is not populated, leading to deletes not having a NIC Type; this should be changed so that getting the state populates that field including the nic type allows us to simplify the secondary endpoints delete flow (just check if the nic type is delegated instead of checking if the secondary interfaces map is populated) smoke tested: linux aks podsubnet (same vm, multi vm, internet, cni statefile consistent) linux standalone transparent vlan multitenancy (same vm, multi vm, internet, multi vnet, no connection between coke pepsi, cni statefile consistent) windows standalone bridge multitenancy single customer (same vm connections, internet, dns only, cni statefile consistent, 2 pods deleting and recreating) * ci: InterfaceInfo Map * fix multitenancy_test ut by changing key * add endpoint id to secondary ep info test since we populate the id in the actual flow * fix cni network_test linux and ensure secondary create ep info does not break in network_test we pass in sample delegated (secondary) data to Add which we then create endpoint info from even with most fields empty, in linux, the ep info is created without erroring * make invoker_cns_test linux pass running all linux package tests for network and cni package pass (or also fail on master, like createBridge) windows unit tests mostly all fail for the same ones on master and this branch summary: - network_windows_test.go ○ TestFailToAddIPv6DefaultRoute already fails on master - network_test.go ○ 9 tests fail on master, 9 tests fail on my branch - manager_test.go ○ 9 tests fail on master, 9 tests fail on my branch - endpoint_windows_test.go ○ TestNewAndDeleteEndpointImplHnsV2 already timeouts on master - endpoint_test.go ○ 9 tests fail on master, 9 tests fail on my branch - network_windows_test.go ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_already_hot_attached ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_not_hot_attached ○ We don't handle consecutive add anymore - network_test.go ○ TestPluginMultitenancyAdd/Add_Happy_path fails on master and my branch (received multiple NC results [] from CNS while dualnic feature is not supported)-- we still get two items on our list/map though which is expected - invoker_cns_test passes - invoker_azure_test passes - multitenancy_test passes The consecutive add tests fail but that is expected since we no longer support it. * modify delete flow to handle multiple epinfos to delete delete ALL endpoints related to the endpoint infos list in the event cni fails half-way through an add (one failed endpoint create and we delete all would-be-create endpoints and the state) replace looping over deletion code "n" number of times with getting a slice of endpoint infos to delete modify stateless cni code to retrieve a slice of network endpoint infos from a single response based on the container id (container id can be used in stateless cni for retrieval) incorporate stateless cni changes from other branch (cns client/ipam/restserver changes) modify get endpoint state to return slice of endpoint infos, and getting an endpoint will return an endpoint from that slice with nic type infra move edge case where endpoint is not created in the state but ips are already allocated to immediately after retrieving all ep infos fix mock behavior for getting all endpoints by container id move getting network id and network info out of the loop because their values do not seem to change between iterations move deletion of endpoint logic into a dedicated loop, and then create a dedicate loop for calling ipam delete to prevent inconsistent state all expected unit tests on linux pass * address feedback * Make change to UpdateEndpointState API to support SwiftV2 for Stateless CNI * change save state to only call update endpoint state once with a slice of endpoints, uts pass * fix using nonexistent key by passing in current interface info directly * fix azure ipam invoker not getting a populated network info for legacy cni * add L1VH windows support * add nic type to windows endpoints * move adding an external interface code to run only when creating a new network this change reflects prior behavior, where we would only add an external interface to the statefile if the network (after searching through all external interfaces) was not found currently, if there are multiple interfaces that could be selected as the master, we would add each external interface to the statefile, even if the *network* is associated with one of the existing interfaces while we would still always find the same network (thanks to having a constant NetworkId, regardless of the external interface), you could get an extra empty external interface in your statefile this commit should remove that possibility (the extra external interface shouldn't really matter in the first place though because we always select the external interface that has a matching network created on it) this should be os agnostic * update comments, first todo check pass * address some linter issues * rename networkId to networkID in endpoint info ran package tests in windows and linux for cni and network packages ran package tests in linux for cns restserver all have expected outputs (either pass, or also fails on master branch) * address linter issues * preserve more logs and reduce timeout for restart for debugging * clean comments and rename for clarity if we use the endpoint info for the network info fields, we name it nwInfo as a hint * address more linter issues linux network, restserver, and cni package tests pass * Revert "preserve more logs and reduce timeout for restart for debugging" This reverts commit 0f004925cfb37a4594df63a4259242c37cf07785. * ignore error on delete flow network query if we are in stateful cni and do not find the network, we will not error, but when we search for the endpoint it will not be found, leading to us calling ipam invoker delete which is assumed idempotent before returning previously we would error in stateful cni and return before calling ipam invoker delete * delete network on endpoint delete if stateless and delegated vmnic (win + linux) * add nic name, set nicname in linux to master interface name stateless will key into interface map with the nicname field in windows, the nicname field is based on the args ifname (usually eth0) in linux, the nicname field is based on the master interface found (usually eth0) note: hostifname/hostvethname = linux veth pair peer in the host ns ifname/contifname = linux veth pair peer in the container ns, in windows it's just the args ifname nicname is something else ifname isn't used during deletion in linux, hns id is used for deletion in windows * return secondary interface as cni result if no infra nic found, include mac address in cni result * address linter issue * fix critical error where failing to add in windows stateless would lead to hns components not being deleted and add netns for hnsv2 tested by triggering a failure to save the stateless state and seeing that the hns endpoint and network are cleaned up we use the endpoint info to clean up on "add" error, but previously, we didn't populate it with the hns ids to do so adds netns to stateless as the presence of a valid guid in netns determines if hnsv2 is used * set nicname used in stateless cni according to feedback * add dummy guid to stateless delete since we assume stateless is always hnsv2 we assume that the netns value isn't used in stateless deletion * clean up createEpInfo, declare endpoint info once * address feedback from vipul * change comments only * revert change to cns package * fix stateless cni migration flow not having nictype on migrate * keep nwInfo variables named the same as before pr (noop) * separate endpoint and network policies in endpoint info behavior should not change except in hnsv1, where network policies passed into network create call will NOT include endpoint policies endpoint policies always include network policies * address feedback from reviewers * address feedback and account for case where cns provides info without nic type if nic type is empty from cns in invoker cns, we assume it is infra nic type and populate it with infra nic type * address feedback to declare endpoint info once and populate all fields at once moved add subnets to after endpoint info created moved retrieval of all endpoint policies (from getEndpointPolicies and getPoliciesFromRuntimeCfg)until after endpoint info created network policies are just passed in from the args unaltered * use ifname instead of nicname field in endpoint struct as key in stateless * convert macaddress only nictype is delegatedvmnic * address feedback by removing network dns settings * address linter issues (noop) * address feedback and linter (noop) * remove unused consecutive add funcs (noop) * fix release ips when create a container without nictype using older cni and then upgrade cni and delete if we create a pod with an older cni version, it won't have a nictype if we upgrade cni and then delete, we should treat an empty nictype as an infra nictype and call the invoker delete * prevent eps with delegated nic type present on ep from also calling transparent endpoint client on delete tested on swift v2 linux single pod add, change cni to this version, delete (ok) then add using this cni version and delete, no extraneous transparent endpoint client calls logged * mock get interface method for ut searched for "NetPlugin" in all files and determined all prod use of NetPlugin goes through NewNetPlugin where we set the get interface method to the real interface get method adds ut where the master interface (by mac) is not found * address feedback (noop) * add ut for handling empty nictype on cns add (noop) * add multitenancy delete net not found ut (noop) * add uts for multi interface infos single add call, verify endpoint id, cns to cni data conversion and vice versa, get endpoint info from container id (noop) verifies partial success will delete all endpoints, even successfully created ones in the same cni add call * add ut for all pods associated with container id delete in one del call, new secondary delete flow (noop) * add two UTs * fix a linter issue * add ut to check endpoint ifname on new endpoint creation based on nictype (noop) * add ut for fail to find interface by subnet (noop) * Adding support for Stateless CNI Delete Edge case when there in no HNS ID * fix uts * fix linter issues * fix ut --------- Co-authored-by: jpayne3506 <payne.3506@gmail.com> Co-authored-by: paulyufan2 <paulyu01@outlook.com> Co-authored-by: AzureAhai <behzadm@microsoft.com>
2024-06-01 04:36:32 +03:00
func (nm *networkManager) handleCommonOptions(ifName string, nwInfo *EndpointInfo) error {
var err error
if routes, exists := nwInfo.Options[RoutesKey]; exists {
err = addRoutes(nm.netlink, nm.netio, ifName, routes.([]RouteInfo))
if err != nil {
return err
}
}
if iptcmds, exists := nwInfo.Options[IPTablesKey]; exists {
Netlink package interfacing and adding a fake (#996) (#1025) * Initial pass at Netlink interface * changing some netlink and epc * Resolcing all dependencies on netlink package * first pass at adding a netlinkinterface * windows working now * feat: update cns client (#992) * fix debug commands Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix: update cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to debug calls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * repackage cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to all methods and preinit all route urls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * down-scope cns client interface and move to consumer packages Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * no unkeyed struct literals Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * trace updated client method signatures out through windows paths * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix windows build Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * windows working now * Some golints checks * commenting a flaky NPM UT and adding some golint checks * renaming fakenetlink to mocknetlink * removing a mock netlink usage * fixing more golints and a test fix * fixing more go lints * Adding in netlink from higher level as input * adding netlinkinterface to windows endpoint impl * removing netlink name confusion Co-authored-by: Evan Baker <rbtr@users.noreply.github.com> Co-authored-by: Vamsi Kalapala <vakr@microsoft.com> Co-authored-by: Evan Baker <rbtr@users.noreply.github.com>
2021-09-20 21:57:12 +03:00
err = nm.addToIptables(iptcmds.([]iptables.IPTableEntry))
if err != nil {
return err
}
}
return nil
}
// DeleteNetworkImpl deletes an existing container network.
func (nm *networkManager) deleteNetworkImpl(nw *network) error {
var networkClient NetworkClient
if nw.VlanId != 0 {
networkClient = NewOVSClient(nw.extIf.BridgeName, nw.extIf.Name, ovsctl.NewOvsctl(), nm.netlink, nm.plClient)
} else {
refactor: code changes for stateless cni and swift v2 (#2688) * ci: changes up to endpointInternal * ci: remove defaultInterface from invoker * ci: change up to CreateEndpoint * ci: changes up to CreateEndpoint() * ci: invoker cns and UT fixes * ci: add fixes to UT(s), capture non populated defaultInterface failures * ci: multitenancy changes * ci: invoker azure changes & remove all defaultInterfaceInfo possible * ci add NICType to baremetal flow * chore: address comments * merge nw info fields to ep info and draft new createEndpoint function * restruct ipamAddResult struct * reorder code to create epinfo first, and then create network and ep based on epinfo * add getNwInfo and generate ipamAddResult * fix network windows.go * create nw info first and create nw and ep dns info * fix testIpamAddFail ut referencing wrong redeclared err variable, fix error message * UT fix part one * fix the getNetworkID and getNetworkInfo * move create endpoint to network package, remove ifIndex as needed * use function to get network id * unify creation of nw and endpoint info by removing switch * change functions to consume ep info instead of nw info * remove unused variable accidentally added earlier * update old azure ipam invoker to use ep info and change ids to network ids when appropriate previously we renamed the NetworkInfo symbol to EndpointInfo in lots of places, but the Id in Network Info is NOT the same as Endpoint Info, so while the code compiles, code that previously used the id field of the network info struct would now be using the id field of the endpoint info struct. It should use the NetworkId field of the endpoint info struct instead. * rename endpoint info id field to EndpointID to remove ambiguity * change nw info to ep info in windows * adjust comments * move all methods in create ep info dependent on nw info to use ep info instead (windows and linux) addSubnetToNetworkInfo, setNetworkOptions, and getEndpointPolicies. getEndpointPolicies will now take just the subnets needed as a parameter rather than the whole nw or ep info. * make cnm compile (not necessarily correct) * make all tests compile except endpoint test secondary client (windows and linux) (not necessarily correct) * comment out endpoint test secondary endpoint client case to make tests compile * address todos and comments from meeting * remove duplicated code for populating address in ep info generation * update EndpointCreate to support multiple infra nic * save all endpoints to state, regardless of type, use either stateless or cni statefile undos some changes in "move create endpoint to network package, remove ifIndex as needed" deletion flow needs to figure out how to tell if the nic type is delegated 1 interface info : 1 endpoint info : 1 endpoint struct mapping * fix dual nic support conditional and finding master interface ip the master interface ip must be in a particular form where the last few bits are zeroed out based on the mask or we won't find the ip for example, while the host subnet perfix is 10.224.0.113/16, the ip that should be passed into find master interface (subnet) should be 10.224.0.0/16 which matches one of the interfaces' ipnet (10.224.0.0/16) * fix empty network name when we need to create a network, we collect the network information, but if we do not find the network, we return an empty nw info and an error when we create the endpoint we need to use endpoint info's network id, not the (possibly) empty network info struct's network id * make network_test.go compile (linux and windows compile) unit tests are not necessarily correct at this point * add NICType to endpoint struct and populate it important: when getting the endpoint state, the NIC Type field is not populated, leading to deletes not having a NIC Type; this should be changed so that getting the state populates that field including the nic type allows us to simplify the secondary endpoints delete flow (just check if the nic type is delegated instead of checking if the secondary interfaces map is populated) smoke tested: linux aks podsubnet (same vm, multi vm, internet, cni statefile consistent) linux standalone transparent vlan multitenancy (same vm, multi vm, internet, multi vnet, no connection between coke pepsi, cni statefile consistent) windows standalone bridge multitenancy single customer (same vm connections, internet, dns only, cni statefile consistent, 2 pods deleting and recreating) * ci: InterfaceInfo Map * fix multitenancy_test ut by changing key * add endpoint id to secondary ep info test since we populate the id in the actual flow * fix cni network_test linux and ensure secondary create ep info does not break in network_test we pass in sample delegated (secondary) data to Add which we then create endpoint info from even with most fields empty, in linux, the ep info is created without erroring * make invoker_cns_test linux pass running all linux package tests for network and cni package pass (or also fail on master, like createBridge) windows unit tests mostly all fail for the same ones on master and this branch summary: - network_windows_test.go ○ TestFailToAddIPv6DefaultRoute already fails on master - network_test.go ○ 9 tests fail on master, 9 tests fail on my branch - manager_test.go ○ 9 tests fail on master, 9 tests fail on my branch - endpoint_windows_test.go ○ TestNewAndDeleteEndpointImplHnsV2 already timeouts on master - endpoint_test.go ○ 9 tests fail on master, 9 tests fail on my branch - network_windows_test.go ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_already_hot_attached ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_not_hot_attached ○ We don't handle consecutive add anymore - network_test.go ○ TestPluginMultitenancyAdd/Add_Happy_path fails on master and my branch (received multiple NC results [] from CNS while dualnic feature is not supported)-- we still get two items on our list/map though which is expected - invoker_cns_test passes - invoker_azure_test passes - multitenancy_test passes The consecutive add tests fail but that is expected since we no longer support it. * modify delete flow to handle multiple epinfos to delete delete ALL endpoints related to the endpoint infos list in the event cni fails half-way through an add (one failed endpoint create and we delete all would-be-create endpoints and the state) replace looping over deletion code "n" number of times with getting a slice of endpoint infos to delete modify stateless cni code to retrieve a slice of network endpoint infos from a single response based on the container id (container id can be used in stateless cni for retrieval) incorporate stateless cni changes from other branch (cns client/ipam/restserver changes) modify get endpoint state to return slice of endpoint infos, and getting an endpoint will return an endpoint from that slice with nic type infra move edge case where endpoint is not created in the state but ips are already allocated to immediately after retrieving all ep infos fix mock behavior for getting all endpoints by container id move getting network id and network info out of the loop because their values do not seem to change between iterations move deletion of endpoint logic into a dedicated loop, and then create a dedicate loop for calling ipam delete to prevent inconsistent state all expected unit tests on linux pass * address feedback * Make change to UpdateEndpointState API to support SwiftV2 for Stateless CNI * change save state to only call update endpoint state once with a slice of endpoints, uts pass * fix using nonexistent key by passing in current interface info directly * fix azure ipam invoker not getting a populated network info for legacy cni * add L1VH windows support * add nic type to windows endpoints * move adding an external interface code to run only when creating a new network this change reflects prior behavior, where we would only add an external interface to the statefile if the network (after searching through all external interfaces) was not found currently, if there are multiple interfaces that could be selected as the master, we would add each external interface to the statefile, even if the *network* is associated with one of the existing interfaces while we would still always find the same network (thanks to having a constant NetworkId, regardless of the external interface), you could get an extra empty external interface in your statefile this commit should remove that possibility (the extra external interface shouldn't really matter in the first place though because we always select the external interface that has a matching network created on it) this should be os agnostic * update comments, first todo check pass * address some linter issues * rename networkId to networkID in endpoint info ran package tests in windows and linux for cni and network packages ran package tests in linux for cns restserver all have expected outputs (either pass, or also fails on master branch) * address linter issues * preserve more logs and reduce timeout for restart for debugging * clean comments and rename for clarity if we use the endpoint info for the network info fields, we name it nwInfo as a hint * address more linter issues linux network, restserver, and cni package tests pass * Revert "preserve more logs and reduce timeout for restart for debugging" This reverts commit 0f004925cfb37a4594df63a4259242c37cf07785. * ignore error on delete flow network query if we are in stateful cni and do not find the network, we will not error, but when we search for the endpoint it will not be found, leading to us calling ipam invoker delete which is assumed idempotent before returning previously we would error in stateful cni and return before calling ipam invoker delete * delete network on endpoint delete if stateless and delegated vmnic (win + linux) * add nic name, set nicname in linux to master interface name stateless will key into interface map with the nicname field in windows, the nicname field is based on the args ifname (usually eth0) in linux, the nicname field is based on the master interface found (usually eth0) note: hostifname/hostvethname = linux veth pair peer in the host ns ifname/contifname = linux veth pair peer in the container ns, in windows it's just the args ifname nicname is something else ifname isn't used during deletion in linux, hns id is used for deletion in windows * return secondary interface as cni result if no infra nic found, include mac address in cni result * address linter issue * fix critical error where failing to add in windows stateless would lead to hns components not being deleted and add netns for hnsv2 tested by triggering a failure to save the stateless state and seeing that the hns endpoint and network are cleaned up we use the endpoint info to clean up on "add" error, but previously, we didn't populate it with the hns ids to do so adds netns to stateless as the presence of a valid guid in netns determines if hnsv2 is used * set nicname used in stateless cni according to feedback * add dummy guid to stateless delete since we assume stateless is always hnsv2 we assume that the netns value isn't used in stateless deletion * clean up createEpInfo, declare endpoint info once * address feedback from vipul * change comments only * revert change to cns package * fix stateless cni migration flow not having nictype on migrate * keep nwInfo variables named the same as before pr (noop) * separate endpoint and network policies in endpoint info behavior should not change except in hnsv1, where network policies passed into network create call will NOT include endpoint policies endpoint policies always include network policies * address feedback from reviewers * address feedback and account for case where cns provides info without nic type if nic type is empty from cns in invoker cns, we assume it is infra nic type and populate it with infra nic type * address feedback to declare endpoint info once and populate all fields at once moved add subnets to after endpoint info created moved retrieval of all endpoint policies (from getEndpointPolicies and getPoliciesFromRuntimeCfg)until after endpoint info created network policies are just passed in from the args unaltered * use ifname instead of nicname field in endpoint struct as key in stateless * convert macaddress only nictype is delegatedvmnic * address feedback by removing network dns settings * address linter issues (noop) * address feedback and linter (noop) * remove unused consecutive add funcs (noop) * fix release ips when create a container without nictype using older cni and then upgrade cni and delete if we create a pod with an older cni version, it won't have a nictype if we upgrade cni and then delete, we should treat an empty nictype as an infra nictype and call the invoker delete * prevent eps with delegated nic type present on ep from also calling transparent endpoint client on delete tested on swift v2 linux single pod add, change cni to this version, delete (ok) then add using this cni version and delete, no extraneous transparent endpoint client calls logged * mock get interface method for ut searched for "NetPlugin" in all files and determined all prod use of NetPlugin goes through NewNetPlugin where we set the get interface method to the real interface get method adds ut where the master interface (by mac) is not found * address feedback (noop) * add ut for handling empty nictype on cns add (noop) * add multitenancy delete net not found ut (noop) * add uts for multi interface infos single add call, verify endpoint id, cns to cni data conversion and vice versa, get endpoint info from container id (noop) verifies partial success will delete all endpoints, even successfully created ones in the same cni add call * add ut for all pods associated with container id delete in one del call, new secondary delete flow (noop) * add two UTs * fix a linter issue * add ut to check endpoint ifname on new endpoint creation based on nictype (noop) * add ut for fail to find interface by subnet (noop) * Adding support for Stateless CNI Delete Edge case when there in no HNS ID * fix uts * fix linter issues * fix ut --------- Co-authored-by: jpayne3506 <payne.3506@gmail.com> Co-authored-by: paulyufan2 <paulyu01@outlook.com> Co-authored-by: AzureAhai <behzadm@microsoft.com>
2024-06-01 04:36:32 +03:00
networkClient = NewLinuxBridgeClient(nw.extIf.BridgeName, nw.extIf.Name, EndpointInfo{}, nm.netlink, nm.plClient)
}
// Disconnect the interface if this was the last network using it.
2017-02-28 12:27:20 +03:00
if len(nw.extIf.Networks) == 1 {
nm.disconnectExternalInterface(nw.extIf, networkClient)
}
return nil
}
// SaveIPConfig saves the IP configuration of an interface.
func (nm *networkManager) saveIPConfig(hostIf *net.Interface, extIf *externalInterface) error {
// Save the default routes on the interface.
Netlink package interfacing and adding a fake (#996) (#1025) * Initial pass at Netlink interface * changing some netlink and epc * Resolcing all dependencies on netlink package * first pass at adding a netlinkinterface * windows working now * feat: update cns client (#992) * fix debug commands Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix: update cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to debug calls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * repackage cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to all methods and preinit all route urls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * down-scope cns client interface and move to consumer packages Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * no unkeyed struct literals Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * trace updated client method signatures out through windows paths * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix windows build Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * windows working now * Some golints checks * commenting a flaky NPM UT and adding some golint checks * renaming fakenetlink to mocknetlink * removing a mock netlink usage * fixing more golints and a test fix * fixing more go lints * Adding in netlink from higher level as input * adding netlinkinterface to windows endpoint impl * removing netlink name confusion Co-authored-by: Evan Baker <rbtr@users.noreply.github.com> Co-authored-by: Vamsi Kalapala <vakr@microsoft.com> Co-authored-by: Evan Baker <rbtr@users.noreply.github.com>
2021-09-20 21:57:12 +03:00
routes, err := nm.netlink.GetIPRoute(&netlink.Route{Dst: &net.IPNet{}, LinkIndex: hostIf.Index})
if err != nil {
logger.Error("Failed to query routes", zap.Error(err))
return err
}
for _, r := range routes {
if r.Dst == nil {
if r.Family == unix.AF_INET {
extIf.IPv4Gateway = r.Gw
} else if r.Family == unix.AF_INET6 {
extIf.IPv6Gateway = r.Gw
}
}
extIf.Routes = append(extIf.Routes, (*route)(r))
}
// Save global unicast IP addresses on the interface.
addrs, err := hostIf.Addrs()
for _, addr := range addrs {
ipAddr, ipNet, err := net.ParseCIDR(addr.String())
ipNet.IP = ipAddr
if err != nil {
continue
}
if !ipAddr.IsGlobalUnicast() {
continue
}
extIf.IPAddresses = append(extIf.IPAddresses, ipNet)
logger.Info("Deleting IP address from interface", zap.Any("ipNet", ipNet), zap.String("hostInfName", hostIf.Name))
Netlink package interfacing and adding a fake (#996) (#1025) * Initial pass at Netlink interface * changing some netlink and epc * Resolcing all dependencies on netlink package * first pass at adding a netlinkinterface * windows working now * feat: update cns client (#992) * fix debug commands Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix: update cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to debug calls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * repackage cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to all methods and preinit all route urls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * down-scope cns client interface and move to consumer packages Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * no unkeyed struct literals Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * trace updated client method signatures out through windows paths * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix windows build Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * windows working now * Some golints checks * commenting a flaky NPM UT and adding some golint checks * renaming fakenetlink to mocknetlink * removing a mock netlink usage * fixing more golints and a test fix * fixing more go lints * Adding in netlink from higher level as input * adding netlinkinterface to windows endpoint impl * removing netlink name confusion Co-authored-by: Evan Baker <rbtr@users.noreply.github.com> Co-authored-by: Vamsi Kalapala <vakr@microsoft.com> Co-authored-by: Evan Baker <rbtr@users.noreply.github.com>
2021-09-20 21:57:12 +03:00
err = nm.netlink.DeleteIPAddress(hostIf.Name, ipAddr, ipNet)
if err != nil {
break
}
}
logger.Info("Saved interface IP configuration", zap.Any("extIf", extIf))
return err
}
func getMajorVersion(version string) (int, error) {
versionSplit := strings.Split(version, dotDelimiter)
if len(versionSplit) > 0 {
retrieved_version, err := strconv.Atoi(versionSplit[0])
if err != nil {
return 0, err
}
return retrieved_version, err
}
return 0, fmt.Errorf("Error getting major version") //nolint
}
func isGreaterOrEqaulUbuntuVersion(versionToMatch int) bool {
osInfo, err := platform.GetOSDetails()
if err != nil {
logger.Error("Unable to get OS Details", zap.Error(err))
return false
}
logger.Info("OSInfo", zap.Any("osInfo", osInfo))
version := osInfo[versionID]
distro := osInfo[distroID]
if strings.EqualFold(distro, ubuntuStr) {
version = strings.Trim(version, "\"")
retrieved_version, err := getMajorVersion(version)
if err != nil {
logger.Error("Not setting dns. Unable to retrieve major version", zap.Error(err))
return false
}
if retrieved_version >= versionToMatch {
return true
}
}
return false
}
func (nm *networkManager) systemVersion() (string, error) {
osVersion, err := nm.plClient.ExecuteRawCommand("lsb_release -rs")
if err != nil {
return osVersion, errors.Wrap(err, "error retrieving the system distribution version")
}
return osVersion, nil
}
func (nm *networkManager) addDomain(ifName, domain string) (string, error) {
osVersion, err := nm.systemVersion()
if err != nil {
return osVersion, err
}
var cmd string
switch {
case strings.HasPrefix(osVersion, Ubuntu22):
cmd = fmt.Sprintf("resolvectl domain %s %s", ifName, domain)
default:
cmd = fmt.Sprintf("systemd-resolve --interface %s --set-domain %s", ifName, domain)
}
return cmd, nil
}
func (nm *networkManager) addDNSServers(ifName string, dnsServers []string) (string, error) {
osVersion, err := nm.systemVersion()
if err != nil {
return osVersion, err
}
if len(dnsServers) == 0 {
logger.Warn("No dns servers to add")
return "", nil
}
var cmd string
switch {
case strings.HasPrefix(osVersion, Ubuntu22):
cmd = fmt.Sprintf("resolvectl dns %s %s", ifName, strings.Join(dnsServers, " "))
default:
serverList := ""
for _, server := range dnsServers {
serverList = serverList + " --set-dns " + server
}
cmd = fmt.Sprintf("systemd-resolve --interface %s %s", ifName, serverList)
}
return cmd, nil
}
func (nm *networkManager) ifNameStatus(ifName string) (string, error) {
osVersion, err := nm.systemVersion()
if err != nil {
return osVersion, err
}
var cmd string
switch {
case strings.HasPrefix(osVersion, Ubuntu22):
cmd = fmt.Sprintf("resolvectl status %s", ifName)
default:
cmd = fmt.Sprintf("systemd-resolve --status %s", ifName)
}
return cmd, nil
}
func (nm *networkManager) readDNSInfo(ifName string) (DNSInfo, error) {
var dnsInfo DNSInfo
cmd, err := nm.ifNameStatus(ifName)
if err != nil {
return dnsInfo, errors.Wrap(err, "Error generating interface name status cmd")
}
out, err := nm.plClient.ExecuteRawCommand(cmd)
if err != nil {
return dnsInfo, errors.Wrapf(err, "Error executing interface status with cmd %s", cmd)
}
logger.Info("console output for above cmd", zap.Any("out", out))
lineArr := strings.Split(out, lineDelimiter)
if len(lineArr) <= 0 {
return dnsInfo, fmt.Errorf("Console output doesn't have any lines") //nolint
}
dnsServerFound := false
for _, line := range lineArr {
if strings.Contains(line, dnsServersStr) {
dnsServerSplit := strings.Split(line, colonDelimiter)
if len(dnsServerSplit) > 1 {
dnsServerFound = true
dnsServerSplit[1] = strings.TrimSpace(dnsServerSplit[1])
dnsInfo.Servers = append(dnsInfo.Servers, dnsServerSplit[1])
}
} else if !strings.Contains(line, colonDelimiter) && dnsServerFound {
dnsServer := strings.TrimSpace(line)
dnsInfo.Servers = append(dnsInfo.Servers, dnsServer)
} else {
dnsServerFound = false
}
}
for _, line := range lineArr {
if strings.Contains(line, dnsDomainStr) {
dnsDomainSplit := strings.Split(line, colonDelimiter)
if len(dnsDomainSplit) > 1 {
dnsInfo.Suffix = strings.TrimSpace(dnsDomainSplit[1])
}
}
}
return dnsInfo, nil
}
func (nm *networkManager) saveDNSConfig(extIf *externalInterface) error {
dnsInfo, err := nm.readDNSInfo(extIf.Name)
if err != nil || len(dnsInfo.Servers) == 0 || dnsInfo.Suffix == "" {
logger.Info("Failed to read dns info from interface", zap.Any("dnsInfo", dnsInfo), zap.String("extIfName", extIf.Name),
zap.Error(err))
return err
}
extIf.DNSInfo = dnsInfo
logger.Info("Saved DNS Info", zap.Any("DNSInfo", extIf.DNSInfo), zap.String("extIfName", extIf.Name))
return nil
}
// ApplyIPConfig applies a previously saved IP configuration to an interface.
func (nm *networkManager) applyIPConfig(extIf *externalInterface, targetIf *net.Interface) error {
// Add IP addresses.
for _, addr := range extIf.IPAddresses {
logger.Info("Adding IP address to interface", zap.Any("addr", addr), zap.String("Name", targetIf.Name))
Netlink package interfacing and adding a fake (#996) (#1025) * Initial pass at Netlink interface * changing some netlink and epc * Resolcing all dependencies on netlink package * first pass at adding a netlinkinterface * windows working now * feat: update cns client (#992) * fix debug commands Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix: update cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to debug calls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * repackage cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to all methods and preinit all route urls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * down-scope cns client interface and move to consumer packages Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * no unkeyed struct literals Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * trace updated client method signatures out through windows paths * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix windows build Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * windows working now * Some golints checks * commenting a flaky NPM UT and adding some golint checks * renaming fakenetlink to mocknetlink * removing a mock netlink usage * fixing more golints and a test fix * fixing more go lints * Adding in netlink from higher level as input * adding netlinkinterface to windows endpoint impl * removing netlink name confusion Co-authored-by: Evan Baker <rbtr@users.noreply.github.com> Co-authored-by: Vamsi Kalapala <vakr@microsoft.com> Co-authored-by: Evan Baker <rbtr@users.noreply.github.com>
2021-09-20 21:57:12 +03:00
err := nm.netlink.AddIPAddress(targetIf.Name, addr.IP, addr)
if err != nil && !strings.Contains(strings.ToLower(err.Error()), "file exists") {
logger.Info("Failed to add IP address", zap.Any("addr", addr), zap.Error(err))
return err
}
}
// Add IP routes.
for _, route := range extIf.Routes {
route.LinkIndex = targetIf.Index
logger.Info("Adding IP route", zap.Any("route", route))
Netlink package interfacing and adding a fake (#996) (#1025) * Initial pass at Netlink interface * changing some netlink and epc * Resolcing all dependencies on netlink package * first pass at adding a netlinkinterface * windows working now * feat: update cns client (#992) * fix debug commands Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix: update cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to debug calls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * repackage cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to all methods and preinit all route urls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * down-scope cns client interface and move to consumer packages Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * no unkeyed struct literals Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * trace updated client method signatures out through windows paths * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix windows build Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * windows working now * Some golints checks * commenting a flaky NPM UT and adding some golint checks * renaming fakenetlink to mocknetlink * removing a mock netlink usage * fixing more golints and a test fix * fixing more go lints * Adding in netlink from higher level as input * adding netlinkinterface to windows endpoint impl * removing netlink name confusion Co-authored-by: Evan Baker <rbtr@users.noreply.github.com> Co-authored-by: Vamsi Kalapala <vakr@microsoft.com> Co-authored-by: Evan Baker <rbtr@users.noreply.github.com>
2021-09-20 21:57:12 +03:00
err := nm.netlink.AddIPRoute((*netlink.Route)(route))
if err != nil {
logger.Error("Failed to add IP route", zap.Any("route", route), zap.Error(err))
return err
}
}
return nil
}
func (nm *networkManager) applyDNSConfig(extIf *externalInterface, ifName string) error {
var (
setDNSList []string
cmd string
err error
)
if extIf != nil {
for _, server := range extIf.DNSInfo.Servers {
if net.ParseIP(server).To4() == nil {
logger.Error("Invalid dns ip", zap.String("server", server))
continue
}
setDNSList = append(setDNSList, server)
}
if len(setDNSList) > 0 {
cmd, err = nm.addDNSServers(ifName, setDNSList)
if err != nil {
return errors.Wrap(err, "Error generating add DNS Servers cmd")
}
if cmd != "" {
_, err = nm.plClient.ExecuteRawCommand(cmd)
if err != nil {
return errors.Wrapf(err, "Error executing add DNS Servers with cmd %s", cmd)
}
}
}
if extIf.DNSInfo.Suffix != "" {
cmd, err = nm.addDomain(ifName, extIf.DNSInfo.Suffix)
if err != nil {
return errors.Wrap(err, "Error generating add domain cmd")
}
_, err = nm.plClient.ExecuteRawCommand(cmd)
if err != nil {
return errors.Wrapf(err, "Error executing add Domain with cmd %s", cmd)
}
}
}
return err
}
// ConnectExternalInterface connects the given host interface to a bridge.
refactor: code changes for stateless cni and swift v2 (#2688) * ci: changes up to endpointInternal * ci: remove defaultInterface from invoker * ci: change up to CreateEndpoint * ci: changes up to CreateEndpoint() * ci: invoker cns and UT fixes * ci: add fixes to UT(s), capture non populated defaultInterface failures * ci: multitenancy changes * ci: invoker azure changes & remove all defaultInterfaceInfo possible * ci add NICType to baremetal flow * chore: address comments * merge nw info fields to ep info and draft new createEndpoint function * restruct ipamAddResult struct * reorder code to create epinfo first, and then create network and ep based on epinfo * add getNwInfo and generate ipamAddResult * fix network windows.go * create nw info first and create nw and ep dns info * fix testIpamAddFail ut referencing wrong redeclared err variable, fix error message * UT fix part one * fix the getNetworkID and getNetworkInfo * move create endpoint to network package, remove ifIndex as needed * use function to get network id * unify creation of nw and endpoint info by removing switch * change functions to consume ep info instead of nw info * remove unused variable accidentally added earlier * update old azure ipam invoker to use ep info and change ids to network ids when appropriate previously we renamed the NetworkInfo symbol to EndpointInfo in lots of places, but the Id in Network Info is NOT the same as Endpoint Info, so while the code compiles, code that previously used the id field of the network info struct would now be using the id field of the endpoint info struct. It should use the NetworkId field of the endpoint info struct instead. * rename endpoint info id field to EndpointID to remove ambiguity * change nw info to ep info in windows * adjust comments * move all methods in create ep info dependent on nw info to use ep info instead (windows and linux) addSubnetToNetworkInfo, setNetworkOptions, and getEndpointPolicies. getEndpointPolicies will now take just the subnets needed as a parameter rather than the whole nw or ep info. * make cnm compile (not necessarily correct) * make all tests compile except endpoint test secondary client (windows and linux) (not necessarily correct) * comment out endpoint test secondary endpoint client case to make tests compile * address todos and comments from meeting * remove duplicated code for populating address in ep info generation * update EndpointCreate to support multiple infra nic * save all endpoints to state, regardless of type, use either stateless or cni statefile undos some changes in "move create endpoint to network package, remove ifIndex as needed" deletion flow needs to figure out how to tell if the nic type is delegated 1 interface info : 1 endpoint info : 1 endpoint struct mapping * fix dual nic support conditional and finding master interface ip the master interface ip must be in a particular form where the last few bits are zeroed out based on the mask or we won't find the ip for example, while the host subnet perfix is 10.224.0.113/16, the ip that should be passed into find master interface (subnet) should be 10.224.0.0/16 which matches one of the interfaces' ipnet (10.224.0.0/16) * fix empty network name when we need to create a network, we collect the network information, but if we do not find the network, we return an empty nw info and an error when we create the endpoint we need to use endpoint info's network id, not the (possibly) empty network info struct's network id * make network_test.go compile (linux and windows compile) unit tests are not necessarily correct at this point * add NICType to endpoint struct and populate it important: when getting the endpoint state, the NIC Type field is not populated, leading to deletes not having a NIC Type; this should be changed so that getting the state populates that field including the nic type allows us to simplify the secondary endpoints delete flow (just check if the nic type is delegated instead of checking if the secondary interfaces map is populated) smoke tested: linux aks podsubnet (same vm, multi vm, internet, cni statefile consistent) linux standalone transparent vlan multitenancy (same vm, multi vm, internet, multi vnet, no connection between coke pepsi, cni statefile consistent) windows standalone bridge multitenancy single customer (same vm connections, internet, dns only, cni statefile consistent, 2 pods deleting and recreating) * ci: InterfaceInfo Map * fix multitenancy_test ut by changing key * add endpoint id to secondary ep info test since we populate the id in the actual flow * fix cni network_test linux and ensure secondary create ep info does not break in network_test we pass in sample delegated (secondary) data to Add which we then create endpoint info from even with most fields empty, in linux, the ep info is created without erroring * make invoker_cns_test linux pass running all linux package tests for network and cni package pass (or also fail on master, like createBridge) windows unit tests mostly all fail for the same ones on master and this branch summary: - network_windows_test.go ○ TestFailToAddIPv6DefaultRoute already fails on master - network_test.go ○ 9 tests fail on master, 9 tests fail on my branch - manager_test.go ○ 9 tests fail on master, 9 tests fail on my branch - endpoint_windows_test.go ○ TestNewAndDeleteEndpointImplHnsV2 already timeouts on master - endpoint_test.go ○ 9 tests fail on master, 9 tests fail on my branch - network_windows_test.go ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_already_hot_attached ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_not_hot_attached ○ We don't handle consecutive add anymore - network_test.go ○ TestPluginMultitenancyAdd/Add_Happy_path fails on master and my branch (received multiple NC results [] from CNS while dualnic feature is not supported)-- we still get two items on our list/map though which is expected - invoker_cns_test passes - invoker_azure_test passes - multitenancy_test passes The consecutive add tests fail but that is expected since we no longer support it. * modify delete flow to handle multiple epinfos to delete delete ALL endpoints related to the endpoint infos list in the event cni fails half-way through an add (one failed endpoint create and we delete all would-be-create endpoints and the state) replace looping over deletion code "n" number of times with getting a slice of endpoint infos to delete modify stateless cni code to retrieve a slice of network endpoint infos from a single response based on the container id (container id can be used in stateless cni for retrieval) incorporate stateless cni changes from other branch (cns client/ipam/restserver changes) modify get endpoint state to return slice of endpoint infos, and getting an endpoint will return an endpoint from that slice with nic type infra move edge case where endpoint is not created in the state but ips are already allocated to immediately after retrieving all ep infos fix mock behavior for getting all endpoints by container id move getting network id and network info out of the loop because their values do not seem to change between iterations move deletion of endpoint logic into a dedicated loop, and then create a dedicate loop for calling ipam delete to prevent inconsistent state all expected unit tests on linux pass * address feedback * Make change to UpdateEndpointState API to support SwiftV2 for Stateless CNI * change save state to only call update endpoint state once with a slice of endpoints, uts pass * fix using nonexistent key by passing in current interface info directly * fix azure ipam invoker not getting a populated network info for legacy cni * add L1VH windows support * add nic type to windows endpoints * move adding an external interface code to run only when creating a new network this change reflects prior behavior, where we would only add an external interface to the statefile if the network (after searching through all external interfaces) was not found currently, if there are multiple interfaces that could be selected as the master, we would add each external interface to the statefile, even if the *network* is associated with one of the existing interfaces while we would still always find the same network (thanks to having a constant NetworkId, regardless of the external interface), you could get an extra empty external interface in your statefile this commit should remove that possibility (the extra external interface shouldn't really matter in the first place though because we always select the external interface that has a matching network created on it) this should be os agnostic * update comments, first todo check pass * address some linter issues * rename networkId to networkID in endpoint info ran package tests in windows and linux for cni and network packages ran package tests in linux for cns restserver all have expected outputs (either pass, or also fails on master branch) * address linter issues * preserve more logs and reduce timeout for restart for debugging * clean comments and rename for clarity if we use the endpoint info for the network info fields, we name it nwInfo as a hint * address more linter issues linux network, restserver, and cni package tests pass * Revert "preserve more logs and reduce timeout for restart for debugging" This reverts commit 0f004925cfb37a4594df63a4259242c37cf07785. * ignore error on delete flow network query if we are in stateful cni and do not find the network, we will not error, but when we search for the endpoint it will not be found, leading to us calling ipam invoker delete which is assumed idempotent before returning previously we would error in stateful cni and return before calling ipam invoker delete * delete network on endpoint delete if stateless and delegated vmnic (win + linux) * add nic name, set nicname in linux to master interface name stateless will key into interface map with the nicname field in windows, the nicname field is based on the args ifname (usually eth0) in linux, the nicname field is based on the master interface found (usually eth0) note: hostifname/hostvethname = linux veth pair peer in the host ns ifname/contifname = linux veth pair peer in the container ns, in windows it's just the args ifname nicname is something else ifname isn't used during deletion in linux, hns id is used for deletion in windows * return secondary interface as cni result if no infra nic found, include mac address in cni result * address linter issue * fix critical error where failing to add in windows stateless would lead to hns components not being deleted and add netns for hnsv2 tested by triggering a failure to save the stateless state and seeing that the hns endpoint and network are cleaned up we use the endpoint info to clean up on "add" error, but previously, we didn't populate it with the hns ids to do so adds netns to stateless as the presence of a valid guid in netns determines if hnsv2 is used * set nicname used in stateless cni according to feedback * add dummy guid to stateless delete since we assume stateless is always hnsv2 we assume that the netns value isn't used in stateless deletion * clean up createEpInfo, declare endpoint info once * address feedback from vipul * change comments only * revert change to cns package * fix stateless cni migration flow not having nictype on migrate * keep nwInfo variables named the same as before pr (noop) * separate endpoint and network policies in endpoint info behavior should not change except in hnsv1, where network policies passed into network create call will NOT include endpoint policies endpoint policies always include network policies * address feedback from reviewers * address feedback and account for case where cns provides info without nic type if nic type is empty from cns in invoker cns, we assume it is infra nic type and populate it with infra nic type * address feedback to declare endpoint info once and populate all fields at once moved add subnets to after endpoint info created moved retrieval of all endpoint policies (from getEndpointPolicies and getPoliciesFromRuntimeCfg)until after endpoint info created network policies are just passed in from the args unaltered * use ifname instead of nicname field in endpoint struct as key in stateless * convert macaddress only nictype is delegatedvmnic * address feedback by removing network dns settings * address linter issues (noop) * address feedback and linter (noop) * remove unused consecutive add funcs (noop) * fix release ips when create a container without nictype using older cni and then upgrade cni and delete if we create a pod with an older cni version, it won't have a nictype if we upgrade cni and then delete, we should treat an empty nictype as an infra nictype and call the invoker delete * prevent eps with delegated nic type present on ep from also calling transparent endpoint client on delete tested on swift v2 linux single pod add, change cni to this version, delete (ok) then add using this cni version and delete, no extraneous transparent endpoint client calls logged * mock get interface method for ut searched for "NetPlugin" in all files and determined all prod use of NetPlugin goes through NewNetPlugin where we set the get interface method to the real interface get method adds ut where the master interface (by mac) is not found * address feedback (noop) * add ut for handling empty nictype on cns add (noop) * add multitenancy delete net not found ut (noop) * add uts for multi interface infos single add call, verify endpoint id, cns to cni data conversion and vice versa, get endpoint info from container id (noop) verifies partial success will delete all endpoints, even successfully created ones in the same cni add call * add ut for all pods associated with container id delete in one del call, new secondary delete flow (noop) * add two UTs * fix a linter issue * add ut to check endpoint ifname on new endpoint creation based on nictype (noop) * add ut for fail to find interface by subnet (noop) * Adding support for Stateless CNI Delete Edge case when there in no HNS ID * fix uts * fix linter issues * fix ut --------- Co-authored-by: jpayne3506 <payne.3506@gmail.com> Co-authored-by: paulyufan2 <paulyu01@outlook.com> Co-authored-by: AzureAhai <behzadm@microsoft.com>
2024-06-01 04:36:32 +03:00
func (nm *networkManager) connectExternalInterface(extIf *externalInterface, nwInfo *EndpointInfo) error {
var (
err error
networkClient NetworkClient
)
defer func() {
logger.Info("Connecting interface completed", zap.String("Name", extIf.Name), zap.Error(err))
}()
// Check whether this interface is already connected.
if extIf.BridgeName != "" {
logger.Info("Interface is already connected to bridge", zap.String("BridgeName", extIf.BridgeName))
return nil
}
// Find the external interface.
hostIf, err := net.InterfaceByName(extIf.Name)
if err != nil {
return err
}
// If a bridge name is not specified, generate one based on the external interface index.
2017-03-07 03:24:20 +03:00
bridgeName := nwInfo.BridgeName
if bridgeName == "" {
bridgeName = fmt.Sprintf("%s%d", bridgePrefix, hostIf.Index)
}
opt, _ := nwInfo.Options[genericData].(map[string]interface{})
if opt != nil && opt[VlanIDKey] != nil {
networkClient = NewOVSClient(bridgeName, extIf.Name, ovsctl.NewOvsctl(), nm.netlink, nm.plClient)
} else {
networkClient = NewLinuxBridgeClient(bridgeName, extIf.Name, *nwInfo, nm.netlink, nm.plClient)
}
// Check if the bridge already exists.
bridge, err := net.InterfaceByName(bridgeName)
if err != nil {
// Create the bridge.
if err = networkClient.CreateBridge(); err != nil {
logger.Error("Error while creating bridge", zap.Error(err))
return err
}
bridge, err = net.InterfaceByName(bridgeName)
if err != nil {
return err
}
} else {
// Use the existing bridge.
logger.Info("Found existing bridge", zap.String("bridgeName", bridgeName))
}
defer func() {
if err != nil {
logger.Info("cleanup network")
nm.disconnectExternalInterface(extIf, networkClient)
}
}()
// Save host IP configuration.
err = nm.saveIPConfig(hostIf, extIf)
if err != nil {
logger.Error("Failed to save IP configuration for interface",
zap.String("Name", hostIf.Name), zap.Error(err))
}
/*
If custom dns server is updated, VM needs reboot for the change to take effect.
*/
isGreaterOrEqualUbuntu17 := isGreaterOrEqaulUbuntuVersion(ubuntuVersion17)
isSystemdResolvedActive := false
if isGreaterOrEqualUbuntu17 {
// Don't copy dns servers if systemd-resolved isn't available
if _, cmderr := nm.plClient.ExecuteRawCommand("systemctl status systemd-resolved"); cmderr == nil {
isSystemdResolvedActive = true
logger.Info("Saving dns config from", zap.String("Name", hostIf.Name))
if err = nm.saveDNSConfig(extIf); err != nil {
logger.Error("Failed to save dns config", zap.Error(err))
return err
}
}
}
logger.Info("Modifying interfaces", zap.String("Name", hostIf.Name))
// External interface down.
Netlink package interfacing and adding a fake (#996) (#1025) * Initial pass at Netlink interface * changing some netlink and epc * Resolcing all dependencies on netlink package * first pass at adding a netlinkinterface * windows working now * feat: update cns client (#992) * fix debug commands Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix: update cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to debug calls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * repackage cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to all methods and preinit all route urls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * down-scope cns client interface and move to consumer packages Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * no unkeyed struct literals Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * trace updated client method signatures out through windows paths * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix windows build Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * windows working now * Some golints checks * commenting a flaky NPM UT and adding some golint checks * renaming fakenetlink to mocknetlink * removing a mock netlink usage * fixing more golints and a test fix * fixing more go lints * Adding in netlink from higher level as input * adding netlinkinterface to windows endpoint impl * removing netlink name confusion Co-authored-by: Evan Baker <rbtr@users.noreply.github.com> Co-authored-by: Vamsi Kalapala <vakr@microsoft.com> Co-authored-by: Evan Baker <rbtr@users.noreply.github.com>
2021-09-20 21:57:12 +03:00
err = nm.netlink.SetLinkState(hostIf.Name, false)
if err != nil {
return errors.Wrap(err, "failed to set external interface down")
}
// Connect the external interface to the bridge.
if err = networkClient.SetBridgeMasterToHostInterface(); err != nil {
return errors.Wrap(err, "failed to connect external interface to bridge")
}
// External interface up.
Netlink package interfacing and adding a fake (#996) (#1025) * Initial pass at Netlink interface * changing some netlink and epc * Resolcing all dependencies on netlink package * first pass at adding a netlinkinterface * windows working now * feat: update cns client (#992) * fix debug commands Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix: update cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to debug calls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * repackage cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to all methods and preinit all route urls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * down-scope cns client interface and move to consumer packages Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * no unkeyed struct literals Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * trace updated client method signatures out through windows paths * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix windows build Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * windows working now * Some golints checks * commenting a flaky NPM UT and adding some golint checks * renaming fakenetlink to mocknetlink * removing a mock netlink usage * fixing more golints and a test fix * fixing more go lints * Adding in netlink from higher level as input * adding netlinkinterface to windows endpoint impl * removing netlink name confusion Co-authored-by: Evan Baker <rbtr@users.noreply.github.com> Co-authored-by: Vamsi Kalapala <vakr@microsoft.com> Co-authored-by: Evan Baker <rbtr@users.noreply.github.com>
2021-09-20 21:57:12 +03:00
err = nm.netlink.SetLinkState(hostIf.Name, true)
if err != nil {
return errors.Wrap(err, "failed to set external interface up")
}
// Bridge up.
Netlink package interfacing and adding a fake (#996) (#1025) * Initial pass at Netlink interface * changing some netlink and epc * Resolcing all dependencies on netlink package * first pass at adding a netlinkinterface * windows working now * feat: update cns client (#992) * fix debug commands Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix: update cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to debug calls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * repackage cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to all methods and preinit all route urls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * down-scope cns client interface and move to consumer packages Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * no unkeyed struct literals Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * trace updated client method signatures out through windows paths * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix windows build Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * windows working now * Some golints checks * commenting a flaky NPM UT and adding some golint checks * renaming fakenetlink to mocknetlink * removing a mock netlink usage * fixing more golints and a test fix * fixing more go lints * Adding in netlink from higher level as input * adding netlinkinterface to windows endpoint impl * removing netlink name confusion Co-authored-by: Evan Baker <rbtr@users.noreply.github.com> Co-authored-by: Vamsi Kalapala <vakr@microsoft.com> Co-authored-by: Evan Baker <rbtr@users.noreply.github.com>
2021-09-20 21:57:12 +03:00
err = nm.netlink.SetLinkState(bridgeName, true)
if err != nil {
return errors.Wrap(err, "failed to set bridge link state up")
}
// Add the bridge rules.
err = networkClient.AddL2Rules(extIf)
if err != nil {
return errors.Wrap(err, "failed to add bridge rules")
}
// External interface hairpin on.
if !nwInfo.DisableHairpinOnHostInterface {
logger.Info("Setting link hairpin on", zap.String("Name", hostIf.Name))
if err = networkClient.SetHairpinOnHostInterface(true); err != nil {
return err
}
}
// Apply IP configuration to the bridge for host traffic.
err = nm.applyIPConfig(extIf, bridge)
if err != nil {
logger.Error("Failed to apply interface IP configuration", zap.Error(err))
return err
}
if isGreaterOrEqualUbuntu17 && isSystemdResolvedActive {
if err = nm.applyDNSConfig(extIf, bridgeName); err != nil {
logger.Error("Failed to apply DNS configuration with", zap.Error(err))
return err
}
logger.Info("Applied dns config on", zap.Any("DNSInfo", extIf.DNSInfo), zap.String("bridgeName", bridgeName))
}
if nwInfo.IPV6Mode == IPV6Nat {
// adds pod cidr gateway ip to bridge
if err = nm.addIpv6NatGateway(nwInfo); err != nil {
logger.Error("Adding IPv6 Nat Gateway failed with", zap.Error(err))
return err
}
if err = nm.addIpv6SnatRule(extIf, nwInfo); err != nil {
logger.Error("Adding IPv6 Snat Rule failed with", zap.Error(err))
return err
}
// unmark packet if set by kube-proxy to skip kube-postrouting rule and processed
// by cni snat rule
if err = nm.iptablesClient.InsertIptableRule(iptables.V6, iptables.Mangle, iptables.Postrouting, "", "MARK --set-mark 0x0"); err != nil {
logger.Error("Adding Iptable mangle rule failed", zap.Error(err))
return err
}
}
extIf.BridgeName = bridgeName
logger.Info("Connected interface to bridge", zap.String("Name", extIf.Name), zap.String("BridgeName", extIf.BridgeName))
return nil
}
// DisconnectExternalInterface disconnects a host interface from its bridge.
func (nm *networkManager) disconnectExternalInterface(extIf *externalInterface, networkClient NetworkClient) {
logger.Info("Disconnecting interface and deleting bridge rules", zap.String("Name", extIf.Name))
2017-03-07 03:24:20 +03:00
// Delete bridge rules set on the external interface.
networkClient.DeleteL2Rules(extIf)
logger.Info("Deleting bridge")
// Delete Bridge
networkClient.DeleteBridge()
extIf.BridgeName = ""
logger.Info("Restoring ipconfig with primary interface", zap.String("Name", extIf.Name))
// Restore IP configuration.
hostIf, _ := net.InterfaceByName(extIf.Name)
err := nm.applyIPConfig(extIf, hostIf)
if err != nil {
logger.Error("Failed to apply IP configuration", zap.Error(err))
}
extIf.IPAddresses = nil
extIf.Routes = nil
logger.Info("Disconnected interface", zap.String("Name", extIf.Name))
}
func (nm *networkManager) addToIptables(cmds []iptables.IPTableEntry) error {
logger.Info("Adding additional iptable rules...")
for _, cmd := range cmds {
err := nm.iptablesClient.RunCmd(cmd.Version, cmd.Params)
if err != nil {
return err
}
logger.Info("Successfully run iptables rule", zap.Any("cmd", cmd))
}
return nil
}
// Add ipv6 nat gateway IP on bridge
refactor: code changes for stateless cni and swift v2 (#2688) * ci: changes up to endpointInternal * ci: remove defaultInterface from invoker * ci: change up to CreateEndpoint * ci: changes up to CreateEndpoint() * ci: invoker cns and UT fixes * ci: add fixes to UT(s), capture non populated defaultInterface failures * ci: multitenancy changes * ci: invoker azure changes & remove all defaultInterfaceInfo possible * ci add NICType to baremetal flow * chore: address comments * merge nw info fields to ep info and draft new createEndpoint function * restruct ipamAddResult struct * reorder code to create epinfo first, and then create network and ep based on epinfo * add getNwInfo and generate ipamAddResult * fix network windows.go * create nw info first and create nw and ep dns info * fix testIpamAddFail ut referencing wrong redeclared err variable, fix error message * UT fix part one * fix the getNetworkID and getNetworkInfo * move create endpoint to network package, remove ifIndex as needed * use function to get network id * unify creation of nw and endpoint info by removing switch * change functions to consume ep info instead of nw info * remove unused variable accidentally added earlier * update old azure ipam invoker to use ep info and change ids to network ids when appropriate previously we renamed the NetworkInfo symbol to EndpointInfo in lots of places, but the Id in Network Info is NOT the same as Endpoint Info, so while the code compiles, code that previously used the id field of the network info struct would now be using the id field of the endpoint info struct. It should use the NetworkId field of the endpoint info struct instead. * rename endpoint info id field to EndpointID to remove ambiguity * change nw info to ep info in windows * adjust comments * move all methods in create ep info dependent on nw info to use ep info instead (windows and linux) addSubnetToNetworkInfo, setNetworkOptions, and getEndpointPolicies. getEndpointPolicies will now take just the subnets needed as a parameter rather than the whole nw or ep info. * make cnm compile (not necessarily correct) * make all tests compile except endpoint test secondary client (windows and linux) (not necessarily correct) * comment out endpoint test secondary endpoint client case to make tests compile * address todos and comments from meeting * remove duplicated code for populating address in ep info generation * update EndpointCreate to support multiple infra nic * save all endpoints to state, regardless of type, use either stateless or cni statefile undos some changes in "move create endpoint to network package, remove ifIndex as needed" deletion flow needs to figure out how to tell if the nic type is delegated 1 interface info : 1 endpoint info : 1 endpoint struct mapping * fix dual nic support conditional and finding master interface ip the master interface ip must be in a particular form where the last few bits are zeroed out based on the mask or we won't find the ip for example, while the host subnet perfix is 10.224.0.113/16, the ip that should be passed into find master interface (subnet) should be 10.224.0.0/16 which matches one of the interfaces' ipnet (10.224.0.0/16) * fix empty network name when we need to create a network, we collect the network information, but if we do not find the network, we return an empty nw info and an error when we create the endpoint we need to use endpoint info's network id, not the (possibly) empty network info struct's network id * make network_test.go compile (linux and windows compile) unit tests are not necessarily correct at this point * add NICType to endpoint struct and populate it important: when getting the endpoint state, the NIC Type field is not populated, leading to deletes not having a NIC Type; this should be changed so that getting the state populates that field including the nic type allows us to simplify the secondary endpoints delete flow (just check if the nic type is delegated instead of checking if the secondary interfaces map is populated) smoke tested: linux aks podsubnet (same vm, multi vm, internet, cni statefile consistent) linux standalone transparent vlan multitenancy (same vm, multi vm, internet, multi vnet, no connection between coke pepsi, cni statefile consistent) windows standalone bridge multitenancy single customer (same vm connections, internet, dns only, cni statefile consistent, 2 pods deleting and recreating) * ci: InterfaceInfo Map * fix multitenancy_test ut by changing key * add endpoint id to secondary ep info test since we populate the id in the actual flow * fix cni network_test linux and ensure secondary create ep info does not break in network_test we pass in sample delegated (secondary) data to Add which we then create endpoint info from even with most fields empty, in linux, the ep info is created without erroring * make invoker_cns_test linux pass running all linux package tests for network and cni package pass (or also fail on master, like createBridge) windows unit tests mostly all fail for the same ones on master and this branch summary: - network_windows_test.go ○ TestFailToAddIPv6DefaultRoute already fails on master - network_test.go ○ 9 tests fail on master, 9 tests fail on my branch - manager_test.go ○ 9 tests fail on master, 9 tests fail on my branch - endpoint_windows_test.go ○ TestNewAndDeleteEndpointImplHnsV2 already timeouts on master - endpoint_test.go ○ 9 tests fail on master, 9 tests fail on my branch - network_windows_test.go ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_already_hot_attached ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_not_hot_attached ○ We don't handle consecutive add anymore - network_test.go ○ TestPluginMultitenancyAdd/Add_Happy_path fails on master and my branch (received multiple NC results [] from CNS while dualnic feature is not supported)-- we still get two items on our list/map though which is expected - invoker_cns_test passes - invoker_azure_test passes - multitenancy_test passes The consecutive add tests fail but that is expected since we no longer support it. * modify delete flow to handle multiple epinfos to delete delete ALL endpoints related to the endpoint infos list in the event cni fails half-way through an add (one failed endpoint create and we delete all would-be-create endpoints and the state) replace looping over deletion code "n" number of times with getting a slice of endpoint infos to delete modify stateless cni code to retrieve a slice of network endpoint infos from a single response based on the container id (container id can be used in stateless cni for retrieval) incorporate stateless cni changes from other branch (cns client/ipam/restserver changes) modify get endpoint state to return slice of endpoint infos, and getting an endpoint will return an endpoint from that slice with nic type infra move edge case where endpoint is not created in the state but ips are already allocated to immediately after retrieving all ep infos fix mock behavior for getting all endpoints by container id move getting network id and network info out of the loop because their values do not seem to change between iterations move deletion of endpoint logic into a dedicated loop, and then create a dedicate loop for calling ipam delete to prevent inconsistent state all expected unit tests on linux pass * address feedback * Make change to UpdateEndpointState API to support SwiftV2 for Stateless CNI * change save state to only call update endpoint state once with a slice of endpoints, uts pass * fix using nonexistent key by passing in current interface info directly * fix azure ipam invoker not getting a populated network info for legacy cni * add L1VH windows support * add nic type to windows endpoints * move adding an external interface code to run only when creating a new network this change reflects prior behavior, where we would only add an external interface to the statefile if the network (after searching through all external interfaces) was not found currently, if there are multiple interfaces that could be selected as the master, we would add each external interface to the statefile, even if the *network* is associated with one of the existing interfaces while we would still always find the same network (thanks to having a constant NetworkId, regardless of the external interface), you could get an extra empty external interface in your statefile this commit should remove that possibility (the extra external interface shouldn't really matter in the first place though because we always select the external interface that has a matching network created on it) this should be os agnostic * update comments, first todo check pass * address some linter issues * rename networkId to networkID in endpoint info ran package tests in windows and linux for cni and network packages ran package tests in linux for cns restserver all have expected outputs (either pass, or also fails on master branch) * address linter issues * preserve more logs and reduce timeout for restart for debugging * clean comments and rename for clarity if we use the endpoint info for the network info fields, we name it nwInfo as a hint * address more linter issues linux network, restserver, and cni package tests pass * Revert "preserve more logs and reduce timeout for restart for debugging" This reverts commit 0f004925cfb37a4594df63a4259242c37cf07785. * ignore error on delete flow network query if we are in stateful cni and do not find the network, we will not error, but when we search for the endpoint it will not be found, leading to us calling ipam invoker delete which is assumed idempotent before returning previously we would error in stateful cni and return before calling ipam invoker delete * delete network on endpoint delete if stateless and delegated vmnic (win + linux) * add nic name, set nicname in linux to master interface name stateless will key into interface map with the nicname field in windows, the nicname field is based on the args ifname (usually eth0) in linux, the nicname field is based on the master interface found (usually eth0) note: hostifname/hostvethname = linux veth pair peer in the host ns ifname/contifname = linux veth pair peer in the container ns, in windows it's just the args ifname nicname is something else ifname isn't used during deletion in linux, hns id is used for deletion in windows * return secondary interface as cni result if no infra nic found, include mac address in cni result * address linter issue * fix critical error where failing to add in windows stateless would lead to hns components not being deleted and add netns for hnsv2 tested by triggering a failure to save the stateless state and seeing that the hns endpoint and network are cleaned up we use the endpoint info to clean up on "add" error, but previously, we didn't populate it with the hns ids to do so adds netns to stateless as the presence of a valid guid in netns determines if hnsv2 is used * set nicname used in stateless cni according to feedback * add dummy guid to stateless delete since we assume stateless is always hnsv2 we assume that the netns value isn't used in stateless deletion * clean up createEpInfo, declare endpoint info once * address feedback from vipul * change comments only * revert change to cns package * fix stateless cni migration flow not having nictype on migrate * keep nwInfo variables named the same as before pr (noop) * separate endpoint and network policies in endpoint info behavior should not change except in hnsv1, where network policies passed into network create call will NOT include endpoint policies endpoint policies always include network policies * address feedback from reviewers * address feedback and account for case where cns provides info without nic type if nic type is empty from cns in invoker cns, we assume it is infra nic type and populate it with infra nic type * address feedback to declare endpoint info once and populate all fields at once moved add subnets to after endpoint info created moved retrieval of all endpoint policies (from getEndpointPolicies and getPoliciesFromRuntimeCfg)until after endpoint info created network policies are just passed in from the args unaltered * use ifname instead of nicname field in endpoint struct as key in stateless * convert macaddress only nictype is delegatedvmnic * address feedback by removing network dns settings * address linter issues (noop) * address feedback and linter (noop) * remove unused consecutive add funcs (noop) * fix release ips when create a container without nictype using older cni and then upgrade cni and delete if we create a pod with an older cni version, it won't have a nictype if we upgrade cni and then delete, we should treat an empty nictype as an infra nictype and call the invoker delete * prevent eps with delegated nic type present on ep from also calling transparent endpoint client on delete tested on swift v2 linux single pod add, change cni to this version, delete (ok) then add using this cni version and delete, no extraneous transparent endpoint client calls logged * mock get interface method for ut searched for "NetPlugin" in all files and determined all prod use of NetPlugin goes through NewNetPlugin where we set the get interface method to the real interface get method adds ut where the master interface (by mac) is not found * address feedback (noop) * add ut for handling empty nictype on cns add (noop) * add multitenancy delete net not found ut (noop) * add uts for multi interface infos single add call, verify endpoint id, cns to cni data conversion and vice versa, get endpoint info from container id (noop) verifies partial success will delete all endpoints, even successfully created ones in the same cni add call * add ut for all pods associated with container id delete in one del call, new secondary delete flow (noop) * add two UTs * fix a linter issue * add ut to check endpoint ifname on new endpoint creation based on nictype (noop) * add ut for fail to find interface by subnet (noop) * Adding support for Stateless CNI Delete Edge case when there in no HNS ID * fix uts * fix linter issues * fix ut --------- Co-authored-by: jpayne3506 <payne.3506@gmail.com> Co-authored-by: paulyufan2 <paulyu01@outlook.com> Co-authored-by: AzureAhai <behzadm@microsoft.com>
2024-06-01 04:36:32 +03:00
func (nm *networkManager) addIpv6NatGateway(nwInfo *EndpointInfo) error {
logger.Info("Adding ipv6 nat gateway on azure bridge")
for _, subnetInfo := range nwInfo.Subnets {
if subnetInfo.Family == platform.AfINET6 {
ipAddr := []net.IPNet{{
IP: subnetInfo.Gateway,
Mask: subnetInfo.Prefix.Mask,
}}
nuc := networkutils.NewNetworkUtils(nm.netlink, nm.plClient)
err := nuc.AssignIPToInterface(nwInfo.BridgeName, ipAddr)
Netlink package interfacing and adding a fake (#996) (#1025) * Initial pass at Netlink interface * changing some netlink and epc * Resolcing all dependencies on netlink package * first pass at adding a netlinkinterface * windows working now * feat: update cns client (#992) * fix debug commands Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix: update cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to debug calls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * repackage cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to all methods and preinit all route urls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * down-scope cns client interface and move to consumer packages Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * no unkeyed struct literals Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * trace updated client method signatures out through windows paths * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix windows build Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * windows working now * Some golints checks * commenting a flaky NPM UT and adding some golint checks * renaming fakenetlink to mocknetlink * removing a mock netlink usage * fixing more golints and a test fix * fixing more go lints * Adding in netlink from higher level as input * adding netlinkinterface to windows endpoint impl * removing netlink name confusion Co-authored-by: Evan Baker <rbtr@users.noreply.github.com> Co-authored-by: Vamsi Kalapala <vakr@microsoft.com> Co-authored-by: Evan Baker <rbtr@users.noreply.github.com>
2021-09-20 21:57:12 +03:00
if err != nil {
return newErrorNetworkManager(err.Error())
}
}
}
return nil
}
// snat ipv6 traffic to secondary ipv6 ip before leaving VM
refactor: code changes for stateless cni and swift v2 (#2688) * ci: changes up to endpointInternal * ci: remove defaultInterface from invoker * ci: change up to CreateEndpoint * ci: changes up to CreateEndpoint() * ci: invoker cns and UT fixes * ci: add fixes to UT(s), capture non populated defaultInterface failures * ci: multitenancy changes * ci: invoker azure changes & remove all defaultInterfaceInfo possible * ci add NICType to baremetal flow * chore: address comments * merge nw info fields to ep info and draft new createEndpoint function * restruct ipamAddResult struct * reorder code to create epinfo first, and then create network and ep based on epinfo * add getNwInfo and generate ipamAddResult * fix network windows.go * create nw info first and create nw and ep dns info * fix testIpamAddFail ut referencing wrong redeclared err variable, fix error message * UT fix part one * fix the getNetworkID and getNetworkInfo * move create endpoint to network package, remove ifIndex as needed * use function to get network id * unify creation of nw and endpoint info by removing switch * change functions to consume ep info instead of nw info * remove unused variable accidentally added earlier * update old azure ipam invoker to use ep info and change ids to network ids when appropriate previously we renamed the NetworkInfo symbol to EndpointInfo in lots of places, but the Id in Network Info is NOT the same as Endpoint Info, so while the code compiles, code that previously used the id field of the network info struct would now be using the id field of the endpoint info struct. It should use the NetworkId field of the endpoint info struct instead. * rename endpoint info id field to EndpointID to remove ambiguity * change nw info to ep info in windows * adjust comments * move all methods in create ep info dependent on nw info to use ep info instead (windows and linux) addSubnetToNetworkInfo, setNetworkOptions, and getEndpointPolicies. getEndpointPolicies will now take just the subnets needed as a parameter rather than the whole nw or ep info. * make cnm compile (not necessarily correct) * make all tests compile except endpoint test secondary client (windows and linux) (not necessarily correct) * comment out endpoint test secondary endpoint client case to make tests compile * address todos and comments from meeting * remove duplicated code for populating address in ep info generation * update EndpointCreate to support multiple infra nic * save all endpoints to state, regardless of type, use either stateless or cni statefile undos some changes in "move create endpoint to network package, remove ifIndex as needed" deletion flow needs to figure out how to tell if the nic type is delegated 1 interface info : 1 endpoint info : 1 endpoint struct mapping * fix dual nic support conditional and finding master interface ip the master interface ip must be in a particular form where the last few bits are zeroed out based on the mask or we won't find the ip for example, while the host subnet perfix is 10.224.0.113/16, the ip that should be passed into find master interface (subnet) should be 10.224.0.0/16 which matches one of the interfaces' ipnet (10.224.0.0/16) * fix empty network name when we need to create a network, we collect the network information, but if we do not find the network, we return an empty nw info and an error when we create the endpoint we need to use endpoint info's network id, not the (possibly) empty network info struct's network id * make network_test.go compile (linux and windows compile) unit tests are not necessarily correct at this point * add NICType to endpoint struct and populate it important: when getting the endpoint state, the NIC Type field is not populated, leading to deletes not having a NIC Type; this should be changed so that getting the state populates that field including the nic type allows us to simplify the secondary endpoints delete flow (just check if the nic type is delegated instead of checking if the secondary interfaces map is populated) smoke tested: linux aks podsubnet (same vm, multi vm, internet, cni statefile consistent) linux standalone transparent vlan multitenancy (same vm, multi vm, internet, multi vnet, no connection between coke pepsi, cni statefile consistent) windows standalone bridge multitenancy single customer (same vm connections, internet, dns only, cni statefile consistent, 2 pods deleting and recreating) * ci: InterfaceInfo Map * fix multitenancy_test ut by changing key * add endpoint id to secondary ep info test since we populate the id in the actual flow * fix cni network_test linux and ensure secondary create ep info does not break in network_test we pass in sample delegated (secondary) data to Add which we then create endpoint info from even with most fields empty, in linux, the ep info is created without erroring * make invoker_cns_test linux pass running all linux package tests for network and cni package pass (or also fail on master, like createBridge) windows unit tests mostly all fail for the same ones on master and this branch summary: - network_windows_test.go ○ TestFailToAddIPv6DefaultRoute already fails on master - network_test.go ○ 9 tests fail on master, 9 tests fail on my branch - manager_test.go ○ 9 tests fail on master, 9 tests fail on my branch - endpoint_windows_test.go ○ TestNewAndDeleteEndpointImplHnsV2 already timeouts on master - endpoint_test.go ○ 9 tests fail on master, 9 tests fail on my branch - network_windows_test.go ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_already_hot_attached ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_not_hot_attached ○ We don't handle consecutive add anymore - network_test.go ○ TestPluginMultitenancyAdd/Add_Happy_path fails on master and my branch (received multiple NC results [] from CNS while dualnic feature is not supported)-- we still get two items on our list/map though which is expected - invoker_cns_test passes - invoker_azure_test passes - multitenancy_test passes The consecutive add tests fail but that is expected since we no longer support it. * modify delete flow to handle multiple epinfos to delete delete ALL endpoints related to the endpoint infos list in the event cni fails half-way through an add (one failed endpoint create and we delete all would-be-create endpoints and the state) replace looping over deletion code "n" number of times with getting a slice of endpoint infos to delete modify stateless cni code to retrieve a slice of network endpoint infos from a single response based on the container id (container id can be used in stateless cni for retrieval) incorporate stateless cni changes from other branch (cns client/ipam/restserver changes) modify get endpoint state to return slice of endpoint infos, and getting an endpoint will return an endpoint from that slice with nic type infra move edge case where endpoint is not created in the state but ips are already allocated to immediately after retrieving all ep infos fix mock behavior for getting all endpoints by container id move getting network id and network info out of the loop because their values do not seem to change between iterations move deletion of endpoint logic into a dedicated loop, and then create a dedicate loop for calling ipam delete to prevent inconsistent state all expected unit tests on linux pass * address feedback * Make change to UpdateEndpointState API to support SwiftV2 for Stateless CNI * change save state to only call update endpoint state once with a slice of endpoints, uts pass * fix using nonexistent key by passing in current interface info directly * fix azure ipam invoker not getting a populated network info for legacy cni * add L1VH windows support * add nic type to windows endpoints * move adding an external interface code to run only when creating a new network this change reflects prior behavior, where we would only add an external interface to the statefile if the network (after searching through all external interfaces) was not found currently, if there are multiple interfaces that could be selected as the master, we would add each external interface to the statefile, even if the *network* is associated with one of the existing interfaces while we would still always find the same network (thanks to having a constant NetworkId, regardless of the external interface), you could get an extra empty external interface in your statefile this commit should remove that possibility (the extra external interface shouldn't really matter in the first place though because we always select the external interface that has a matching network created on it) this should be os agnostic * update comments, first todo check pass * address some linter issues * rename networkId to networkID in endpoint info ran package tests in windows and linux for cni and network packages ran package tests in linux for cns restserver all have expected outputs (either pass, or also fails on master branch) * address linter issues * preserve more logs and reduce timeout for restart for debugging * clean comments and rename for clarity if we use the endpoint info for the network info fields, we name it nwInfo as a hint * address more linter issues linux network, restserver, and cni package tests pass * Revert "preserve more logs and reduce timeout for restart for debugging" This reverts commit 0f004925cfb37a4594df63a4259242c37cf07785. * ignore error on delete flow network query if we are in stateful cni and do not find the network, we will not error, but when we search for the endpoint it will not be found, leading to us calling ipam invoker delete which is assumed idempotent before returning previously we would error in stateful cni and return before calling ipam invoker delete * delete network on endpoint delete if stateless and delegated vmnic (win + linux) * add nic name, set nicname in linux to master interface name stateless will key into interface map with the nicname field in windows, the nicname field is based on the args ifname (usually eth0) in linux, the nicname field is based on the master interface found (usually eth0) note: hostifname/hostvethname = linux veth pair peer in the host ns ifname/contifname = linux veth pair peer in the container ns, in windows it's just the args ifname nicname is something else ifname isn't used during deletion in linux, hns id is used for deletion in windows * return secondary interface as cni result if no infra nic found, include mac address in cni result * address linter issue * fix critical error where failing to add in windows stateless would lead to hns components not being deleted and add netns for hnsv2 tested by triggering a failure to save the stateless state and seeing that the hns endpoint and network are cleaned up we use the endpoint info to clean up on "add" error, but previously, we didn't populate it with the hns ids to do so adds netns to stateless as the presence of a valid guid in netns determines if hnsv2 is used * set nicname used in stateless cni according to feedback * add dummy guid to stateless delete since we assume stateless is always hnsv2 we assume that the netns value isn't used in stateless deletion * clean up createEpInfo, declare endpoint info once * address feedback from vipul * change comments only * revert change to cns package * fix stateless cni migration flow not having nictype on migrate * keep nwInfo variables named the same as before pr (noop) * separate endpoint and network policies in endpoint info behavior should not change except in hnsv1, where network policies passed into network create call will NOT include endpoint policies endpoint policies always include network policies * address feedback from reviewers * address feedback and account for case where cns provides info without nic type if nic type is empty from cns in invoker cns, we assume it is infra nic type and populate it with infra nic type * address feedback to declare endpoint info once and populate all fields at once moved add subnets to after endpoint info created moved retrieval of all endpoint policies (from getEndpointPolicies and getPoliciesFromRuntimeCfg)until after endpoint info created network policies are just passed in from the args unaltered * use ifname instead of nicname field in endpoint struct as key in stateless * convert macaddress only nictype is delegatedvmnic * address feedback by removing network dns settings * address linter issues (noop) * address feedback and linter (noop) * remove unused consecutive add funcs (noop) * fix release ips when create a container without nictype using older cni and then upgrade cni and delete if we create a pod with an older cni version, it won't have a nictype if we upgrade cni and then delete, we should treat an empty nictype as an infra nictype and call the invoker delete * prevent eps with delegated nic type present on ep from also calling transparent endpoint client on delete tested on swift v2 linux single pod add, change cni to this version, delete (ok) then add using this cni version and delete, no extraneous transparent endpoint client calls logged * mock get interface method for ut searched for "NetPlugin" in all files and determined all prod use of NetPlugin goes through NewNetPlugin where we set the get interface method to the real interface get method adds ut where the master interface (by mac) is not found * address feedback (noop) * add ut for handling empty nictype on cns add (noop) * add multitenancy delete net not found ut (noop) * add uts for multi interface infos single add call, verify endpoint id, cns to cni data conversion and vice versa, get endpoint info from container id (noop) verifies partial success will delete all endpoints, even successfully created ones in the same cni add call * add ut for all pods associated with container id delete in one del call, new secondary delete flow (noop) * add two UTs * fix a linter issue * add ut to check endpoint ifname on new endpoint creation based on nictype (noop) * add ut for fail to find interface by subnet (noop) * Adding support for Stateless CNI Delete Edge case when there in no HNS ID * fix uts * fix linter issues * fix ut --------- Co-authored-by: jpayne3506 <payne.3506@gmail.com> Co-authored-by: paulyufan2 <paulyu01@outlook.com> Co-authored-by: AzureAhai <behzadm@microsoft.com>
2024-06-01 04:36:32 +03:00
func (nm *networkManager) addIpv6SnatRule(extIf *externalInterface, nwInfo *EndpointInfo) error {
var (
ipv6SnatRuleSet bool
ipv6SubnetPrefix net.IPNet
)
for _, subnet := range nwInfo.Subnets {
if subnet.Family == platform.AfINET6 {
ipv6SubnetPrefix = subnet.Prefix
break
}
}
if len(ipv6SubnetPrefix.IP) == 0 {
return errSubnetV6NotFound
}
for _, ipAddr := range extIf.IPAddresses {
if ipAddr.IP.To4() != nil {
continue
}
logger.Info("Adding ipv6 snat rule")
matchSrcPrefix := fmt.Sprintf("-s %s", ipv6SubnetPrefix.String())
nu := networkutils.NewNetworkUtils(nm.netlink, nm.plClient)
if err := nu.AddSnatRule(nm.iptablesClient, matchSrcPrefix, ipAddr.IP); err != nil {
return fmt.Errorf("adding iptable snat rule failed:%w", err)
}
ipv6SnatRuleSet = true
}
if !ipv6SnatRuleSet {
return errV6SnatRuleNotSet
}
return nil
}
refactor: code changes for stateless cni and swift v2 (#2688) * ci: changes up to endpointInternal * ci: remove defaultInterface from invoker * ci: change up to CreateEndpoint * ci: changes up to CreateEndpoint() * ci: invoker cns and UT fixes * ci: add fixes to UT(s), capture non populated defaultInterface failures * ci: multitenancy changes * ci: invoker azure changes & remove all defaultInterfaceInfo possible * ci add NICType to baremetal flow * chore: address comments * merge nw info fields to ep info and draft new createEndpoint function * restruct ipamAddResult struct * reorder code to create epinfo first, and then create network and ep based on epinfo * add getNwInfo and generate ipamAddResult * fix network windows.go * create nw info first and create nw and ep dns info * fix testIpamAddFail ut referencing wrong redeclared err variable, fix error message * UT fix part one * fix the getNetworkID and getNetworkInfo * move create endpoint to network package, remove ifIndex as needed * use function to get network id * unify creation of nw and endpoint info by removing switch * change functions to consume ep info instead of nw info * remove unused variable accidentally added earlier * update old azure ipam invoker to use ep info and change ids to network ids when appropriate previously we renamed the NetworkInfo symbol to EndpointInfo in lots of places, but the Id in Network Info is NOT the same as Endpoint Info, so while the code compiles, code that previously used the id field of the network info struct would now be using the id field of the endpoint info struct. It should use the NetworkId field of the endpoint info struct instead. * rename endpoint info id field to EndpointID to remove ambiguity * change nw info to ep info in windows * adjust comments * move all methods in create ep info dependent on nw info to use ep info instead (windows and linux) addSubnetToNetworkInfo, setNetworkOptions, and getEndpointPolicies. getEndpointPolicies will now take just the subnets needed as a parameter rather than the whole nw or ep info. * make cnm compile (not necessarily correct) * make all tests compile except endpoint test secondary client (windows and linux) (not necessarily correct) * comment out endpoint test secondary endpoint client case to make tests compile * address todos and comments from meeting * remove duplicated code for populating address in ep info generation * update EndpointCreate to support multiple infra nic * save all endpoints to state, regardless of type, use either stateless or cni statefile undos some changes in "move create endpoint to network package, remove ifIndex as needed" deletion flow needs to figure out how to tell if the nic type is delegated 1 interface info : 1 endpoint info : 1 endpoint struct mapping * fix dual nic support conditional and finding master interface ip the master interface ip must be in a particular form where the last few bits are zeroed out based on the mask or we won't find the ip for example, while the host subnet perfix is 10.224.0.113/16, the ip that should be passed into find master interface (subnet) should be 10.224.0.0/16 which matches one of the interfaces' ipnet (10.224.0.0/16) * fix empty network name when we need to create a network, we collect the network information, but if we do not find the network, we return an empty nw info and an error when we create the endpoint we need to use endpoint info's network id, not the (possibly) empty network info struct's network id * make network_test.go compile (linux and windows compile) unit tests are not necessarily correct at this point * add NICType to endpoint struct and populate it important: when getting the endpoint state, the NIC Type field is not populated, leading to deletes not having a NIC Type; this should be changed so that getting the state populates that field including the nic type allows us to simplify the secondary endpoints delete flow (just check if the nic type is delegated instead of checking if the secondary interfaces map is populated) smoke tested: linux aks podsubnet (same vm, multi vm, internet, cni statefile consistent) linux standalone transparent vlan multitenancy (same vm, multi vm, internet, multi vnet, no connection between coke pepsi, cni statefile consistent) windows standalone bridge multitenancy single customer (same vm connections, internet, dns only, cni statefile consistent, 2 pods deleting and recreating) * ci: InterfaceInfo Map * fix multitenancy_test ut by changing key * add endpoint id to secondary ep info test since we populate the id in the actual flow * fix cni network_test linux and ensure secondary create ep info does not break in network_test we pass in sample delegated (secondary) data to Add which we then create endpoint info from even with most fields empty, in linux, the ep info is created without erroring * make invoker_cns_test linux pass running all linux package tests for network and cni package pass (or also fail on master, like createBridge) windows unit tests mostly all fail for the same ones on master and this branch summary: - network_windows_test.go ○ TestFailToAddIPv6DefaultRoute already fails on master - network_test.go ○ 9 tests fail on master, 9 tests fail on my branch - manager_test.go ○ 9 tests fail on master, 9 tests fail on my branch - endpoint_windows_test.go ○ TestNewAndDeleteEndpointImplHnsV2 already timeouts on master - endpoint_test.go ○ 9 tests fail on master, 9 tests fail on my branch - network_windows_test.go ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_already_hot_attached ○ FAIL: TestPluginSecondAddSamePodWindows/CNI_consecutive_add_not_hot_attached ○ We don't handle consecutive add anymore - network_test.go ○ TestPluginMultitenancyAdd/Add_Happy_path fails on master and my branch (received multiple NC results [] from CNS while dualnic feature is not supported)-- we still get two items on our list/map though which is expected - invoker_cns_test passes - invoker_azure_test passes - multitenancy_test passes The consecutive add tests fail but that is expected since we no longer support it. * modify delete flow to handle multiple epinfos to delete delete ALL endpoints related to the endpoint infos list in the event cni fails half-way through an add (one failed endpoint create and we delete all would-be-create endpoints and the state) replace looping over deletion code "n" number of times with getting a slice of endpoint infos to delete modify stateless cni code to retrieve a slice of network endpoint infos from a single response based on the container id (container id can be used in stateless cni for retrieval) incorporate stateless cni changes from other branch (cns client/ipam/restserver changes) modify get endpoint state to return slice of endpoint infos, and getting an endpoint will return an endpoint from that slice with nic type infra move edge case where endpoint is not created in the state but ips are already allocated to immediately after retrieving all ep infos fix mock behavior for getting all endpoints by container id move getting network id and network info out of the loop because their values do not seem to change between iterations move deletion of endpoint logic into a dedicated loop, and then create a dedicate loop for calling ipam delete to prevent inconsistent state all expected unit tests on linux pass * address feedback * Make change to UpdateEndpointState API to support SwiftV2 for Stateless CNI * change save state to only call update endpoint state once with a slice of endpoints, uts pass * fix using nonexistent key by passing in current interface info directly * fix azure ipam invoker not getting a populated network info for legacy cni * add L1VH windows support * add nic type to windows endpoints * move adding an external interface code to run only when creating a new network this change reflects prior behavior, where we would only add an external interface to the statefile if the network (after searching through all external interfaces) was not found currently, if there are multiple interfaces that could be selected as the master, we would add each external interface to the statefile, even if the *network* is associated with one of the existing interfaces while we would still always find the same network (thanks to having a constant NetworkId, regardless of the external interface), you could get an extra empty external interface in your statefile this commit should remove that possibility (the extra external interface shouldn't really matter in the first place though because we always select the external interface that has a matching network created on it) this should be os agnostic * update comments, first todo check pass * address some linter issues * rename networkId to networkID in endpoint info ran package tests in windows and linux for cni and network packages ran package tests in linux for cns restserver all have expected outputs (either pass, or also fails on master branch) * address linter issues * preserve more logs and reduce timeout for restart for debugging * clean comments and rename for clarity if we use the endpoint info for the network info fields, we name it nwInfo as a hint * address more linter issues linux network, restserver, and cni package tests pass * Revert "preserve more logs and reduce timeout for restart for debugging" This reverts commit 0f004925cfb37a4594df63a4259242c37cf07785. * ignore error on delete flow network query if we are in stateful cni and do not find the network, we will not error, but when we search for the endpoint it will not be found, leading to us calling ipam invoker delete which is assumed idempotent before returning previously we would error in stateful cni and return before calling ipam invoker delete * delete network on endpoint delete if stateless and delegated vmnic (win + linux) * add nic name, set nicname in linux to master interface name stateless will key into interface map with the nicname field in windows, the nicname field is based on the args ifname (usually eth0) in linux, the nicname field is based on the master interface found (usually eth0) note: hostifname/hostvethname = linux veth pair peer in the host ns ifname/contifname = linux veth pair peer in the container ns, in windows it's just the args ifname nicname is something else ifname isn't used during deletion in linux, hns id is used for deletion in windows * return secondary interface as cni result if no infra nic found, include mac address in cni result * address linter issue * fix critical error where failing to add in windows stateless would lead to hns components not being deleted and add netns for hnsv2 tested by triggering a failure to save the stateless state and seeing that the hns endpoint and network are cleaned up we use the endpoint info to clean up on "add" error, but previously, we didn't populate it with the hns ids to do so adds netns to stateless as the presence of a valid guid in netns determines if hnsv2 is used * set nicname used in stateless cni according to feedback * add dummy guid to stateless delete since we assume stateless is always hnsv2 we assume that the netns value isn't used in stateless deletion * clean up createEpInfo, declare endpoint info once * address feedback from vipul * change comments only * revert change to cns package * fix stateless cni migration flow not having nictype on migrate * keep nwInfo variables named the same as before pr (noop) * separate endpoint and network policies in endpoint info behavior should not change except in hnsv1, where network policies passed into network create call will NOT include endpoint policies endpoint policies always include network policies * address feedback from reviewers * address feedback and account for case where cns provides info without nic type if nic type is empty from cns in invoker cns, we assume it is infra nic type and populate it with infra nic type * address feedback to declare endpoint info once and populate all fields at once moved add subnets to after endpoint info created moved retrieval of all endpoint policies (from getEndpointPolicies and getPoliciesFromRuntimeCfg)until after endpoint info created network policies are just passed in from the args unaltered * use ifname instead of nicname field in endpoint struct as key in stateless * convert macaddress only nictype is delegatedvmnic * address feedback by removing network dns settings * address linter issues (noop) * address feedback and linter (noop) * remove unused consecutive add funcs (noop) * fix release ips when create a container without nictype using older cni and then upgrade cni and delete if we create a pod with an older cni version, it won't have a nictype if we upgrade cni and then delete, we should treat an empty nictype as an infra nictype and call the invoker delete * prevent eps with delegated nic type present on ep from also calling transparent endpoint client on delete tested on swift v2 linux single pod add, change cni to this version, delete (ok) then add using this cni version and delete, no extraneous transparent endpoint client calls logged * mock get interface method for ut searched for "NetPlugin" in all files and determined all prod use of NetPlugin goes through NewNetPlugin where we set the get interface method to the real interface get method adds ut where the master interface (by mac) is not found * address feedback (noop) * add ut for handling empty nictype on cns add (noop) * add multitenancy delete net not found ut (noop) * add uts for multi interface infos single add call, verify endpoint id, cns to cni data conversion and vice versa, get endpoint info from container id (noop) verifies partial success will delete all endpoints, even successfully created ones in the same cni add call * add ut for all pods associated with container id delete in one del call, new secondary delete flow (noop) * add two UTs * fix a linter issue * add ut to check endpoint ifname on new endpoint creation based on nictype (noop) * add ut for fail to find interface by subnet (noop) * Adding support for Stateless CNI Delete Edge case when there in no HNS ID * fix uts * fix linter issues * fix ut --------- Co-authored-by: jpayne3506 <payne.3506@gmail.com> Co-authored-by: paulyufan2 <paulyu01@outlook.com> Co-authored-by: AzureAhai <behzadm@microsoft.com>
2024-06-01 04:36:32 +03:00
func getNetworkInfoImpl(nwInfo *EndpointInfo, nw *network) {
if nw.VlanId != 0 {
vlanMap := make(map[string]interface{})
vlanMap[VlanIDKey] = strconv.Itoa(nw.VlanId)
nwInfo.Options[genericData] = vlanMap
}
}
Netlink package interfacing and adding a fake (#996) (#1025) * Initial pass at Netlink interface * changing some netlink and epc * Resolcing all dependencies on netlink package * first pass at adding a netlinkinterface * windows working now * feat: update cns client (#992) * fix debug commands Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix: update cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to debug calls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * repackage cns client Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * add ctx to all methods and preinit all route urls Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * down-scope cns client interface and move to consumer packages Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * no unkeyed struct literals Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * trace updated client method signatures out through windows paths * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * fix windows build Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * delint Signed-off-by: Evan Baker <rbtr@users.noreply.github.com> * windows working now * Some golints checks * commenting a flaky NPM UT and adding some golint checks * renaming fakenetlink to mocknetlink * removing a mock netlink usage * fixing more golints and a test fix * fixing more go lints * Adding in netlink from higher level as input * adding netlinkinterface to windows endpoint impl * removing netlink name confusion Co-authored-by: Evan Baker <rbtr@users.noreply.github.com> Co-authored-by: Vamsi Kalapala <vakr@microsoft.com> Co-authored-by: Evan Baker <rbtr@users.noreply.github.com>
2021-09-20 21:57:12 +03:00
// AddStaticRoute adds a static route to the interface.
func AddStaticRoute(nl netlink.NetlinkInterface, netioshim netio.NetIOInterface, ip, interfaceName string) error {
logger.Info("Adding static route", zap.String("ip", ip))
var routes []RouteInfo
_, ipNet, _ := net.ParseCIDR(ip)
gwIP := net.ParseIP("0.0.0.0")
route := RouteInfo{Dst: *ipNet, Gw: gwIP}
routes = append(routes, route)
if err := addRoutes(nl, netioshim, interfaceName, routes); err != nil {
if err != nil && !strings.Contains(strings.ToLower(err.Error()), "file exists") {
logger.Error("addroutes failed with error", zap.Error(err))
return err
}
}
return nil
}