2020-07-16 03:34:41 +03:00
// Copyright 2017 Microsoft. All rights reserved.
// MIT License
package restserver
2020-07-16 12:51:11 +03:00
import (
2020-07-23 23:03:10 +03:00
"bytes"
2020-12-12 00:54:17 +03:00
"context"
2020-07-23 23:03:10 +03:00
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
2020-07-23 03:11:41 +03:00
"reflect"
2020-12-12 00:54:17 +03:00
"strconv"
2022-03-12 04:11:12 +03:00
"time"
2020-07-23 03:11:41 +03:00
2020-07-16 12:51:11 +03:00
"github.com/Azure/azure-container-networking/cns"
"github.com/Azure/azure-container-networking/cns/logger"
2021-10-21 21:14:03 +03:00
"github.com/Azure/azure-container-networking/cns/nmagent"
2021-08-07 00:19:21 +03:00
"github.com/Azure/azure-container-networking/cns/types"
2020-07-23 23:03:10 +03:00
"github.com/Azure/azure-container-networking/common"
2021-08-25 19:57:14 +03:00
"github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha"
2021-08-07 00:19:21 +03:00
"github.com/pkg/errors"
2020-07-16 12:51:11 +03:00
)
2020-07-16 03:34:41 +03:00
// This file contains the internal functions called by either HTTP APIs (api.go) or
// internal APIs (definde in internalapi.go).
// This will be used internally (say by RequestController in case of AKS)
// GetPartitionKey - Get dnc/service partition key
func ( service * HTTPRestService ) GetPartitionKey ( ) ( dncPartitionKey string ) {
service . RLock ( )
dncPartitionKey = service . dncPartitionKey
service . RUnlock ( )
return
}
2020-07-16 12:51:11 +03:00
2020-07-23 23:03:10 +03:00
// SetNodeOrchestrator :- Set node orchestrator after registering with mDNC
func ( service * HTTPRestService ) SetNodeOrchestrator ( r * cns . SetOrchestratorTypeRequest ) {
body , _ := json . Marshal ( r )
req , _ := http . NewRequest ( http . MethodPost , "" , bytes . NewBuffer ( body ) )
req . Header . Set ( common . ContentType , common . JsonContent )
service . setOrchestratorType ( httptest . NewRecorder ( ) , req )
}
// SyncNodeStatus :- Retrieve the latest node state from DNC & returns the first occurence of returnCode and error with respect to contextFromCNI
2022-05-16 20:01:15 +03:00
func ( service * HTTPRestService ) SyncNodeStatus ( dncEP , infraVnet , nodeID string , contextFromCNI json . RawMessage ) ( returnCode types . ResponseCode , errStr string ) {
2020-07-23 23:03:10 +03:00
logger . Printf ( "[Azure CNS] SyncNodeStatus" )
var (
2021-08-07 00:19:21 +03:00
resp * http . Response
2020-07-23 23:03:10 +03:00
nodeInfoResponse cns . NodeInfoResponse
body [ ] byte
httpc = common . GetHttpClient ( )
)
// try to retrieve NodeInfoResponse from mDNC
2021-08-07 00:19:21 +03:00
url := fmt . Sprintf ( common . SyncNodeNetworkContainersURLFmt , dncEP , infraVnet , nodeID , dncApiVersion )
req , _ := http . NewRequestWithContext ( context . TODO ( ) , http . MethodGet , url , nil )
resp , err := httpc . Do ( req )
2020-07-23 23:03:10 +03:00
if err == nil {
2021-08-07 00:19:21 +03:00
if resp . StatusCode == http . StatusOK {
err = json . NewDecoder ( resp . Body ) . Decode ( & nodeInfoResponse )
2020-07-23 23:03:10 +03:00
} else {
2021-08-07 00:19:21 +03:00
err = errors . Errorf ( "http err: %d" , resp . StatusCode )
2020-07-23 23:03:10 +03:00
}
2021-08-07 00:19:21 +03:00
resp . Body . Close ( )
2020-07-23 23:03:10 +03:00
}
if err != nil {
2021-08-07 00:19:21 +03:00
returnCode = types . UnexpectedError
2020-07-23 23:03:10 +03:00
errStr = fmt . Sprintf ( "[Azure-CNS] Failed to sync node with error: %+v" , err )
logger . Errorf ( errStr )
return
}
var (
ncsToBeAdded = make ( map [ string ] cns . CreateNetworkContainerRequest )
ncsToBeDeleted = make ( map [ string ] bool )
)
// determine new NCs and NCs to be deleted
service . RLock ( )
for ncid := range service . state . ContainerStatus {
ncsToBeDeleted [ ncid ] = true
}
for _ , nc := range nodeInfoResponse . NetworkContainers {
2020-07-28 02:27:13 +03:00
ncid := nc . NetworkContainerid
2020-07-23 23:03:10 +03:00
delete ( ncsToBeDeleted , ncid )
if savedNc , exists := service . state . ContainerStatus [ ncid ] ; ! exists || savedNc . CreateNetworkContainerRequest . Version < nc . Version {
ncsToBeAdded [ ncid ] = nc
}
}
service . RUnlock ( )
// check if the version is valid and save it to service state
for ncid , nc := range ncsToBeAdded {
var (
2021-10-21 21:14:03 +03:00
versionURL = fmt . Sprintf ( nmagent . GetNetworkContainerVersionURLFmt ,
nmagent . WireserverIP ,
2020-07-23 23:03:10 +03:00
nc . PrimaryInterfaceIdentifier ,
nc . NetworkContainerid ,
nc . AuthorizationToken )
w = httptest . NewRecorder ( )
)
ncVersionURLs . Store ( nc . NetworkContainerid , versionURL )
2020-11-03 08:56:08 +03:00
waitingForUpdate , _ , _ := service . isNCWaitingForUpdate ( nc . Version , nc . NetworkContainerid )
2020-07-23 23:03:10 +03:00
body , _ = json . Marshal ( nc )
req , _ = http . NewRequest ( http . MethodPost , "" , bytes . NewBuffer ( body ) )
req . Header . Set ( common . ContentType , common . JsonContent )
service . createOrUpdateNetworkContainer ( w , req )
if w . Result ( ) . StatusCode == http . StatusOK {
var resp cns . CreateNetworkContainerResponse
2021-08-07 00:19:21 +03:00
if err = json . Unmarshal ( w . Body . Bytes ( ) , & resp ) ; err == nil && resp . Response . ReturnCode == types . Success {
2020-07-23 23:03:10 +03:00
service . Lock ( )
2021-09-02 02:28:17 +03:00
ncstatus := service . state . ContainerStatus [ ncid ]
2020-11-03 08:56:08 +03:00
ncstatus . VfpUpdateComplete = ! waitingForUpdate
2020-07-23 23:03:10 +03:00
service . state . ContainerStatus [ ncid ] = ncstatus
service . Unlock ( )
}
}
}
service . Lock ( )
service . saveState ( )
service . Unlock ( )
// delete dangling NCs
for nc := range ncsToBeDeleted {
var body bytes . Buffer
json . NewEncoder ( & body ) . Encode ( & cns . DeleteNetworkContainerRequest { NetworkContainerid : nc } )
req , err = http . NewRequest ( http . MethodPost , "" , & body )
if err == nil {
req . Header . Set ( common . JsonContent , common . JsonContent )
service . deleteNetworkContainer ( httptest . NewRecorder ( ) , req )
} else {
logger . Errorf ( "[Azure-CNS] Failed to delete NC request to sync state: %s" , err . Error ( ) )
}
ncVersionURLs . Delete ( nc )
}
return
}
2020-12-12 00:54:17 +03:00
// SyncHostNCVersion will check NC version from NMAgent and save it as host NC version in container status.
// If NMAgent NC version got updated, CNS will refresh the pending programming IP status.
2021-10-21 21:14:03 +03:00
func ( service * HTTPRestService ) SyncHostNCVersion ( ctx context . Context , channelMode string ) {
service . Lock ( )
defer service . Unlock ( )
2022-03-12 04:11:12 +03:00
start := time . Now ( )
err := service . syncHostNCVersion ( ctx , channelMode )
if err != nil {
logger . Errorf ( "sync host error %v" , err )
}
2022-10-13 04:00:21 +03:00
syncHostNCVersionCount . WithLabelValues ( strconv . FormatBool ( err == nil ) ) . Inc ( )
syncHostNCVersionLatency . WithLabelValues ( strconv . FormatBool ( err == nil ) ) . Observe ( time . Since ( start ) . Seconds ( ) )
2022-03-12 04:11:12 +03:00
}
var errNonExistentContainerStatus = errors . New ( "nonExistantContainerstatus" )
func ( service * HTTPRestService ) syncHostNCVersion ( ctx context . Context , channelMode string ) error {
2022-10-13 04:00:21 +03:00
outdatedNCs := map [ string ] struct { } { }
2021-10-21 21:14:03 +03:00
for idx := range service . state . ContainerStatus {
2020-12-12 00:54:17 +03:00
// Will open a separate PR to convert all the NC version related variable to int. Change from string to int is a pain.
2022-10-13 04:00:21 +03:00
localNCVersion , err := strconv . Atoi ( service . state . ContainerStatus [ idx ] . HostVersion )
2020-12-12 00:54:17 +03:00
if err != nil {
2021-10-21 21:14:03 +03:00
logger . Errorf ( "Received err when change containerstatus.HostVersion %s to int, err msg %v" , service . state . ContainerStatus [ idx ] . HostVersion , err )
2020-12-12 00:54:17 +03:00
continue
}
2022-10-13 04:00:21 +03:00
dncNCVersion , err := strconv . Atoi ( service . state . ContainerStatus [ idx ] . CreateNetworkContainerRequest . Version )
2020-12-12 00:54:17 +03:00
if err != nil {
2021-10-21 21:14:03 +03:00
logger . Errorf ( "Received err when change nc version %s in containerstatus to int, err msg %v" , service . state . ContainerStatus [ idx ] . CreateNetworkContainerRequest . Version , err )
2020-12-12 00:54:17 +03:00
continue
}
// host NC version is the NC version from NMAgent, if it's smaller than NC version from DNC, then append it to indicate it needs update.
2022-10-13 04:00:21 +03:00
if localNCVersion < dncNCVersion {
outdatedNCs [ service . state . ContainerStatus [ idx ] . ID ] = struct { } { }
} else if localNCVersion > dncNCVersion {
logger . Errorf ( "NC version from NMAgent is larger than DNC, NC version from NMAgent is %d, NC version from DNC is %d" , localNCVersion , dncNCVersion )
2020-12-12 00:54:17 +03:00
}
}
2022-10-13 04:00:21 +03:00
if len ( outdatedNCs ) == 0 {
2022-03-12 04:11:12 +03:00
return nil
2021-10-21 21:14:03 +03:00
}
2022-10-13 04:00:21 +03:00
ncVersionListResp , err := service . nmagentClient . GetNCVersionList ( ctx )
2021-10-21 21:14:03 +03:00
if err != nil {
2022-03-12 04:11:12 +03:00
return errors . Wrap ( err , "failed to get nc version list from nmagent" )
2021-10-21 21:14:03 +03:00
}
2022-10-13 04:00:21 +03:00
nmaNCs := map [ string ] string { }
for _ , nc := range ncVersionListResp . Containers {
nmaNCs [ nc . NetworkContainerID ] = nc . Version
2021-10-21 21:14:03 +03:00
}
2022-10-13 04:00:21 +03:00
for ncID := range outdatedNCs {
nmaNCVersionStr , ok := nmaNCs [ ncID ]
2021-10-21 21:14:03 +03:00
if ! ok {
2022-10-13 04:00:21 +03:00
// NMA doesn't have this NC that we need programmed yet, bail out
2021-10-21 21:14:03 +03:00
continue
}
2022-10-13 04:00:21 +03:00
nmaNCVersion , err := strconv . Atoi ( nmaNCVersionStr )
2021-10-21 21:14:03 +03:00
if err != nil {
2022-10-13 04:00:21 +03:00
logger . Errorf ( "failed to parse container version of %s: %s" , ncID , err )
continue
2021-10-21 21:14:03 +03:00
}
// Check whether it exist in service state and get the related nc info
ncInfo , exist := service . state . ContainerStatus [ ncID ]
if ! exist {
2022-10-13 04:00:21 +03:00
// if we marked this NC as needs update, but it no longer exists in internal state when we reach
// this point, our internal state has changed unexpectedly and we should bail out and try again.
2022-03-12 04:11:12 +03:00
return errors . Wrapf ( errNonExistentContainerStatus , "can't find NC with ID %s in service state, stop updating this host NC version" , ncID )
2021-10-21 21:14:03 +03:00
}
2022-10-13 04:00:21 +03:00
localNCVersion , err := strconv . Atoi ( ncInfo . HostVersion )
if err != nil {
logger . Errorf ( "failed to parse host nc version string %s: %s" , ncInfo . HostVersion , err )
continue
}
if localNCVersion > nmaNCVersion {
logger . Errorf ( "NC version from NMA is decreasing: have %d, got %d" , localNCVersion , nmaNCVersion )
continue
}
2021-10-21 21:14:03 +03:00
if channelMode == cns . CRD {
2022-10-13 04:00:21 +03:00
service . MarkIpsAsAvailableUntransacted ( ncInfo . ID , nmaNCVersion )
2020-12-12 00:54:17 +03:00
}
2022-10-13 04:00:21 +03:00
logger . Printf ( "Updating NC %s host version from %s to %s" , ncID , ncInfo . HostVersion , nmaNCVersionStr )
ncInfo . HostVersion = nmaNCVersionStr
logger . Printf ( "Updated NC %s host version to %s" , ncID , ncInfo . HostVersion )
2021-10-21 21:14:03 +03:00
service . state . ContainerStatus [ ncID ] = ncInfo
2022-10-13 04:00:21 +03:00
// if we successfully updated the NC, pop it from the needs update set.
delete ( outdatedNCs , ncID )
}
// if we didn't empty out the needs update set, NMA has not programmed all the NCs we are expecting, and we
// need to return an error indicating that
if len ( outdatedNCs ) > 0 {
return errors . Errorf ( "unabled to update some NCs: %v, missing or bad response from NMA" , outdatedNCs )
2020-12-12 00:54:17 +03:00
}
2022-03-12 04:11:12 +03:00
return nil
2020-12-12 00:54:17 +03:00
}
2020-07-16 13:05:28 +03:00
// This API will be called by CNS RequestController on CRD update.
2022-05-16 20:01:15 +03:00
func ( service * HTTPRestService ) ReconcileNCState ( ncRequest * cns . CreateNetworkContainerRequest , podInfoByIP map [ string ] cns . PodInfo , nnc * v1alpha . NodeNetworkConfig ) types . ResponseCode {
2021-08-07 00:19:21 +03:00
logger . Printf ( "Reconciling NC state with podInfo %+v" , podInfoByIP )
2020-07-28 06:53:49 +03:00
// check if ncRequest is null, then return as there is no CRD state yet
if ncRequest == nil {
2021-08-07 00:19:21 +03:00
logger . Printf ( "CNS starting with no NC state, podInfoMap count %d" , len ( podInfoByIP ) )
return types . Success
2020-07-28 06:53:49 +03:00
}
2021-12-02 03:02:27 +03:00
// If the NC was created successfully, then reconcile the assigned pod state
2021-09-22 03:02:03 +03:00
returnCode := service . CreateOrUpdateNetworkContainerInternal ( ncRequest )
2021-08-07 00:19:21 +03:00
if returnCode != types . Success {
2021-06-04 06:49:00 +03:00
return returnCode
}
2020-07-28 06:53:49 +03:00
2021-12-02 03:02:27 +03:00
// now parse the secondaryIP list, if it exists in PodInfo list, then assign that ip.
2020-07-28 06:53:49 +03:00
for _ , secIpConfig := range ncRequest . SecondaryIPConfigs {
2021-08-07 00:19:21 +03:00
if podInfo , exists := podInfoByIP [ secIpConfig . IPAddress ] ; exists {
2021-12-02 03:02:27 +03:00
logger . Printf ( "SecondaryIP %+v is assigned to Pod. %+v, ncId: %s" , secIpConfig , podInfo , ncRequest . NetworkContainerid )
2020-07-28 06:53:49 +03:00
2021-06-30 01:14:11 +03:00
jsonContext , err := podInfo . OrchestratorContext ( )
if err != nil {
logger . Errorf ( "Failed to marshal KubernetesPodInfo, error: %v" , err )
2021-08-07 00:19:21 +03:00
return types . UnexpectedError
2020-07-28 06:53:49 +03:00
}
2020-11-02 20:53:35 +03:00
ipconfigRequest := cns . IPConfigRequest {
2020-08-19 07:53:16 +03:00
DesiredIPAddress : secIpConfig . IPAddress ,
2020-07-28 06:53:49 +03:00
OrchestratorContext : jsonContext ,
2021-06-30 01:14:11 +03:00
InfraContainerID : podInfo . InfraContainerID ( ) ,
2021-10-08 00:14:05 +03:00
PodInterfaceID : podInfo . InterfaceID ( ) ,
2020-07-28 06:53:49 +03:00
}
if _ , err := requestIPConfigHelper ( service , ipconfigRequest ) ; err != nil {
2021-02-27 01:59:03 +03:00
logger . Errorf ( "AllocateIPConfig failed for SecondaryIP %+v, podInfo %+v, ncId %s, error: %v" , secIpConfig , podInfo , ncRequest . NetworkContainerid , err )
2021-08-07 00:19:21 +03:00
return types . FailedToAllocateIPConfig
2020-07-28 06:53:49 +03:00
}
} else {
2021-12-02 03:02:27 +03:00
logger . Printf ( "SecondaryIP %+v is not assigned. ncId: %s" , secIpConfig , ncRequest . NetworkContainerid )
2020-07-28 06:53:49 +03:00
}
}
2022-02-16 03:50:30 +03:00
err := service . MarkExistingIPsAsPendingRelease ( nnc . Spec . IPsNotInUse )
2020-09-29 01:37:36 +03:00
if err != nil {
2021-11-30 00:48:31 +03:00
logger . Errorf ( "[Azure CNS] Error. Failed to mark IPs as pending %v" , nnc . Spec . IPsNotInUse )
2021-08-07 00:19:21 +03:00
return types . UnexpectedError
2020-09-29 01:37:36 +03:00
}
2020-07-28 06:53:49 +03:00
return 0
}
2021-06-04 06:49:00 +03:00
// GetNetworkContainerInternal gets network container details.
2021-08-07 00:19:21 +03:00
func ( service * HTTPRestService ) GetNetworkContainerInternal (
req cns . GetNetworkContainerRequest ,
) ( cns . GetNetworkContainerResponse , types . ResponseCode ) {
2021-06-04 06:49:00 +03:00
getNetworkContainerResponse := service . getNetworkContainerResponse ( req )
returnCode := getNetworkContainerResponse . Response . ReturnCode
return getNetworkContainerResponse , returnCode
}
// DeleteNetworkContainerInternal deletes a network container.
2021-08-07 00:19:21 +03:00
func ( service * HTTPRestService ) DeleteNetworkContainerInternal (
req cns . DeleteNetworkContainerRequest ,
) types . ResponseCode {
2021-06-04 06:49:00 +03:00
_ , exist := service . getNetworkContainerDetails ( req . NetworkContainerid )
if ! exist {
logger . Printf ( "network container for id %v doesn't exist" , req . NetworkContainerid )
2021-08-07 00:19:21 +03:00
return types . Success
2021-06-04 06:49:00 +03:00
}
service . Lock ( )
defer service . Unlock ( )
if service . state . ContainerStatus != nil {
delete ( service . state . ContainerStatus , req . NetworkContainerid )
}
if service . state . ContainerIDByOrchestratorContext != nil {
for orchestratorContext , networkContainerID := range service . state . ContainerIDByOrchestratorContext {
if networkContainerID == req . NetworkContainerid {
delete ( service . state . ContainerIDByOrchestratorContext , orchestratorContext )
break
}
}
}
service . saveState ( )
2021-08-07 00:19:21 +03:00
return types . Success
2021-06-04 06:49:00 +03:00
}
2020-07-28 06:53:49 +03:00
// This API will be called by CNS RequestController on CRD update.
2021-10-07 19:55:09 +03:00
func ( service * HTTPRestService ) CreateOrUpdateNetworkContainerInternal ( req * cns . CreateNetworkContainerRequest ) types . ResponseCode {
2020-07-16 12:51:11 +03:00
if req . NetworkContainerid == "" {
logger . Errorf ( "[Azure CNS] Error. NetworkContainerid is empty" )
2021-08-07 00:19:21 +03:00
return types . NetworkContainerNotSpecified
2020-07-16 12:51:11 +03:00
}
// For now only RequestController uses this API which will be initialized only for AKS scenario.
// Validate ContainerType is set as Docker
2021-07-22 12:00:59 +03:00
if service . state . OrchestratorType != cns . KubernetesCRD && service . state . OrchestratorType != cns . Kubernetes {
2020-07-16 12:51:11 +03:00
logger . Errorf ( "[Azure CNS] Error. Unsupported OrchestratorType: %s" , service . state . OrchestratorType )
2021-08-07 00:19:21 +03:00
return types . UnsupportedOrchestratorType
2020-07-16 12:51:11 +03:00
}
// Validate PrimaryCA must never be empty
2020-07-23 03:11:41 +03:00
err := validateIPSubnet ( req . IPConfiguration . IPSubnet )
2020-07-17 08:22:10 +03:00
if err != nil {
logger . Errorf ( "[Azure CNS] Error. PrimaryCA is invalid, NC Req: %v" , req )
2021-08-07 00:19:21 +03:00
return types . InvalidPrimaryIPConfig
2020-07-17 08:22:10 +03:00
}
// Validate SecondaryIPConfig
2020-08-14 00:06:37 +03:00
for _ , secIpconfig := range req . SecondaryIPConfigs {
2020-07-17 08:22:10 +03:00
// Validate Ipconfig
2020-08-14 00:06:37 +03:00
if secIpconfig . IPAddress == "" {
logger . Errorf ( "Failed to add IPConfig to state: %+v, empty IPSubnet.IPAddress" , secIpconfig )
2021-08-07 00:19:21 +03:00
return types . InvalidSecondaryIPConfig
2020-07-17 08:22:10 +03:00
}
2020-07-16 12:51:11 +03:00
}
// Validate if state exists already
2020-07-23 03:11:41 +03:00
existingNCInfo , ok := service . getNetworkContainerDetails ( req . NetworkContainerid )
2020-07-16 12:51:11 +03:00
if ok {
2020-07-23 03:11:41 +03:00
existingReq := existingNCInfo . CreateNetworkContainerRequest
2021-09-02 02:28:17 +03:00
if ! reflect . DeepEqual ( existingReq . IPConfiguration , req . IPConfiguration ) {
2020-07-16 12:51:11 +03:00
logger . Errorf ( "[Azure CNS] Error. PrimaryCA is not same, NCId %s, old CA %s, new CA %s" , req . NetworkContainerid , existingReq . PrimaryInterfaceIdentifier , req . PrimaryInterfaceIdentifier )
2021-08-07 00:19:21 +03:00
return types . PrimaryCANotSame
2020-07-16 12:51:11 +03:00
}
}
// This will Create Or Update the NC state.
2021-09-22 03:02:03 +03:00
returnCode , returnMessage := service . saveNetworkContainerGoalState ( * req )
2020-07-16 12:51:11 +03:00
// If the NC was created successfully, log NC snapshot.
if returnCode == 0 {
2021-09-22 03:02:03 +03:00
logNCSnapshot ( * req )
2020-07-16 12:51:11 +03:00
} else {
logger . Errorf ( returnMessage )
}
2022-07-22 02:34:10 +03:00
if service . Options [ common . OptProgramSNATIPTables ] == true {
2022-08-15 23:00:32 +03:00
returnCode , returnMessage = service . programSNATRules ( req )
2022-07-22 02:34:10 +03:00
if returnCode != 0 {
logger . Errorf ( returnMessage )
}
}
2021-06-04 06:49:00 +03:00
return returnCode
}