azure-container-networking/cns/restserver/internalapi.go

392 строки
14 KiB
Go
Исходник Обычный вид История

2020-07-16 03:34:41 +03:00
// Copyright 2017 Microsoft. All rights reserved.
// MIT License
package restserver
2020-07-16 12:51:11 +03:00
import (
"bytes"
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
2020-07-23 03:11:41 +03:00
"reflect"
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
"strconv"
"time"
2020-07-23 03:11:41 +03:00
2020-07-16 12:51:11 +03:00
"github.com/Azure/azure-container-networking/cns"
"github.com/Azure/azure-container-networking/cns/logger"
"github.com/Azure/azure-container-networking/cns/nmagent"
"github.com/Azure/azure-container-networking/cns/types"
"github.com/Azure/azure-container-networking/common"
"github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha"
"github.com/pkg/errors"
2020-07-16 12:51:11 +03:00
)
2020-07-16 03:34:41 +03:00
// This file contains the internal functions called by either HTTP APIs (api.go) or
// internal APIs (definde in internalapi.go).
// This will be used internally (say by RequestController in case of AKS)
// GetPartitionKey - Get dnc/service partition key
func (service *HTTPRestService) GetPartitionKey() (dncPartitionKey string) {
service.RLock()
dncPartitionKey = service.dncPartitionKey
service.RUnlock()
return
}
2020-07-16 12:51:11 +03:00
// SetNodeOrchestrator :- Set node orchestrator after registering with mDNC
func (service *HTTPRestService) SetNodeOrchestrator(r *cns.SetOrchestratorTypeRequest) {
body, _ := json.Marshal(r)
req, _ := http.NewRequest(http.MethodPost, "", bytes.NewBuffer(body))
req.Header.Set(common.ContentType, common.JsonContent)
service.setOrchestratorType(httptest.NewRecorder(), req)
}
// SyncNodeStatus :- Retrieve the latest node state from DNC & returns the first occurence of returnCode and error with respect to contextFromCNI
func (service *HTTPRestService) SyncNodeStatus(dncEP, infraVnet, nodeID string, contextFromCNI json.RawMessage) (returnCode types.ResponseCode, errStr string) {
logger.Printf("[Azure CNS] SyncNodeStatus")
var (
resp *http.Response
nodeInfoResponse cns.NodeInfoResponse
body []byte
httpc = common.GetHttpClient()
)
// try to retrieve NodeInfoResponse from mDNC
url := fmt.Sprintf(common.SyncNodeNetworkContainersURLFmt, dncEP, infraVnet, nodeID, dncApiVersion)
req, _ := http.NewRequestWithContext(context.TODO(), http.MethodGet, url, nil)
resp, err := httpc.Do(req)
if err == nil {
if resp.StatusCode == http.StatusOK {
err = json.NewDecoder(resp.Body).Decode(&nodeInfoResponse)
} else {
err = errors.Errorf("http err: %d", resp.StatusCode)
}
resp.Body.Close()
}
if err != nil {
returnCode = types.UnexpectedError
errStr = fmt.Sprintf("[Azure-CNS] Failed to sync node with error: %+v", err)
logger.Errorf(errStr)
return
}
var (
ncsToBeAdded = make(map[string]cns.CreateNetworkContainerRequest)
ncsToBeDeleted = make(map[string]bool)
)
// determine new NCs and NCs to be deleted
service.RLock()
for ncid := range service.state.ContainerStatus {
ncsToBeDeleted[ncid] = true
}
for _, nc := range nodeInfoResponse.NetworkContainers {
ncid := nc.NetworkContainerid
delete(ncsToBeDeleted, ncid)
if savedNc, exists := service.state.ContainerStatus[ncid]; !exists || savedNc.CreateNetworkContainerRequest.Version < nc.Version {
ncsToBeAdded[ncid] = nc
}
}
service.RUnlock()
// check if the version is valid and save it to service state
for ncid, nc := range ncsToBeAdded {
var (
versionURL = fmt.Sprintf(nmagent.GetNetworkContainerVersionURLFmt,
nmagent.WireserverIP,
nc.PrimaryInterfaceIdentifier,
nc.NetworkContainerid,
nc.AuthorizationToken)
w = httptest.NewRecorder()
)
ncVersionURLs.Store(nc.NetworkContainerid, versionURL)
waitingForUpdate, _, _ := service.isNCWaitingForUpdate(nc.Version, nc.NetworkContainerid)
body, _ = json.Marshal(nc)
req, _ = http.NewRequest(http.MethodPost, "", bytes.NewBuffer(body))
req.Header.Set(common.ContentType, common.JsonContent)
service.createOrUpdateNetworkContainer(w, req)
if w.Result().StatusCode == http.StatusOK {
var resp cns.CreateNetworkContainerResponse
if err = json.Unmarshal(w.Body.Bytes(), &resp); err == nil && resp.Response.ReturnCode == types.Success {
service.Lock()
ncstatus := service.state.ContainerStatus[ncid]
ncstatus.VfpUpdateComplete = !waitingForUpdate
service.state.ContainerStatus[ncid] = ncstatus
service.Unlock()
}
}
}
service.Lock()
service.saveState()
service.Unlock()
// delete dangling NCs
for nc := range ncsToBeDeleted {
var body bytes.Buffer
json.NewEncoder(&body).Encode(&cns.DeleteNetworkContainerRequest{NetworkContainerid: nc})
req, err = http.NewRequest(http.MethodPost, "", &body)
if err == nil {
req.Header.Set(common.JsonContent, common.JsonContent)
service.deleteNetworkContainer(httptest.NewRecorder(), req)
} else {
logger.Errorf("[Azure-CNS] Failed to delete NC request to sync state: %s", err.Error())
}
ncVersionURLs.Delete(nc)
}
return
}
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
// SyncHostNCVersion will check NC version from NMAgent and save it as host NC version in container status.
// If NMAgent NC version got updated, CNS will refresh the pending programming IP status.
func (service *HTTPRestService) SyncHostNCVersion(ctx context.Context, channelMode string) {
service.Lock()
defer service.Unlock()
start := time.Now()
err := service.syncHostNCVersion(ctx, channelMode)
if err != nil {
logger.Errorf("sync host error %v", err)
}
syncHostNCVersionCount.WithLabelValues(strconv.FormatBool(err == nil)).Inc()
syncHostNCVersionLatency.WithLabelValues(strconv.FormatBool(err == nil)).Observe(time.Since(start).Seconds())
}
var errNonExistentContainerStatus = errors.New("nonExistantContainerstatus")
func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMode string) error {
outdatedNCs := map[string]struct{}{}
for idx := range service.state.ContainerStatus {
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
// Will open a separate PR to convert all the NC version related variable to int. Change from string to int is a pain.
localNCVersion, err := strconv.Atoi(service.state.ContainerStatus[idx].HostVersion)
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
if err != nil {
logger.Errorf("Received err when change containerstatus.HostVersion %s to int, err msg %v", service.state.ContainerStatus[idx].HostVersion, err)
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
continue
}
dncNCVersion, err := strconv.Atoi(service.state.ContainerStatus[idx].CreateNetworkContainerRequest.Version)
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
if err != nil {
logger.Errorf("Received err when change nc version %s in containerstatus to int, err msg %v", service.state.ContainerStatus[idx].CreateNetworkContainerRequest.Version, err)
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
continue
}
// host NC version is the NC version from NMAgent, if it's smaller than NC version from DNC, then append it to indicate it needs update.
if localNCVersion < dncNCVersion {
outdatedNCs[service.state.ContainerStatus[idx].ID] = struct{}{}
} else if localNCVersion > dncNCVersion {
logger.Errorf("NC version from NMAgent is larger than DNC, NC version from NMAgent is %d, NC version from DNC is %d", localNCVersion, dncNCVersion)
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
}
}
if len(outdatedNCs) == 0 {
return nil
}
ncVersionListResp, err := service.nmagentClient.GetNCVersionList(ctx)
if err != nil {
return errors.Wrap(err, "failed to get nc version list from nmagent")
}
nmaNCs := map[string]string{}
for _, nc := range ncVersionListResp.Containers {
nmaNCs[nc.NetworkContainerID] = nc.Version
}
for ncID := range outdatedNCs {
nmaNCVersionStr, ok := nmaNCs[ncID]
if !ok {
// NMA doesn't have this NC that we need programmed yet, bail out
continue
}
nmaNCVersion, err := strconv.Atoi(nmaNCVersionStr)
if err != nil {
logger.Errorf("failed to parse container version of %s: %s", ncID, err)
continue
}
// Check whether it exist in service state and get the related nc info
ncInfo, exist := service.state.ContainerStatus[ncID]
if !exist {
// if we marked this NC as needs update, but it no longer exists in internal state when we reach
// this point, our internal state has changed unexpectedly and we should bail out and try again.
return errors.Wrapf(errNonExistentContainerStatus, "can't find NC with ID %s in service state, stop updating this host NC version", ncID)
}
localNCVersion, err := strconv.Atoi(ncInfo.HostVersion)
if err != nil {
logger.Errorf("failed to parse host nc version string %s: %s", ncInfo.HostVersion, err)
continue
}
if localNCVersion > nmaNCVersion {
logger.Errorf("NC version from NMA is decreasing: have %d, got %d", localNCVersion, nmaNCVersion)
continue
}
if channelMode == cns.CRD {
service.MarkIpsAsAvailableUntransacted(ncInfo.ID, nmaNCVersion)
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
}
logger.Printf("Updating NC %s host version from %s to %s", ncID, ncInfo.HostVersion, nmaNCVersionStr)
ncInfo.HostVersion = nmaNCVersionStr
logger.Printf("Updated NC %s host version to %s", ncID, ncInfo.HostVersion)
service.state.ContainerStatus[ncID] = ncInfo
// if we successfully updated the NC, pop it from the needs update set.
delete(outdatedNCs, ncID)
}
// if we didn't empty out the needs update set, NMA has not programmed all the NCs we are expecting, and we
// need to return an error indicating that
if len(outdatedNCs) > 0 {
return errors.Errorf("unabled to update some NCs: %v, missing or bad response from NMA", outdatedNCs)
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
}
return nil
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
}
2020-07-16 13:05:28 +03:00
// This API will be called by CNS RequestController on CRD update.
func (service *HTTPRestService) ReconcileNCState(ncRequest *cns.CreateNetworkContainerRequest, podInfoByIP map[string]cns.PodInfo, nnc *v1alpha.NodeNetworkConfig) types.ResponseCode {
logger.Printf("Reconciling NC state with podInfo %+v", podInfoByIP)
// check if ncRequest is null, then return as there is no CRD state yet
if ncRequest == nil {
logger.Printf("CNS starting with no NC state, podInfoMap count %d", len(podInfoByIP))
return types.Success
}
// If the NC was created successfully, then reconcile the assigned pod state
returnCode := service.CreateOrUpdateNetworkContainerInternal(ncRequest)
if returnCode != types.Success {
return returnCode
}
// now parse the secondaryIP list, if it exists in PodInfo list, then assign that ip.
for _, secIpConfig := range ncRequest.SecondaryIPConfigs {
if podInfo, exists := podInfoByIP[secIpConfig.IPAddress]; exists {
logger.Printf("SecondaryIP %+v is assigned to Pod. %+v, ncId: %s", secIpConfig, podInfo, ncRequest.NetworkContainerid)
jsonContext, err := podInfo.OrchestratorContext()
if err != nil {
logger.Errorf("Failed to marshal KubernetesPodInfo, error: %v", err)
return types.UnexpectedError
}
ipconfigRequest := cns.IPConfigRequest{
DesiredIPAddress: secIpConfig.IPAddress,
OrchestratorContext: jsonContext,
InfraContainerID: podInfo.InfraContainerID(),
PodInterfaceID: podInfo.InterfaceID(),
}
if _, err := requestIPConfigHelper(service, ipconfigRequest); err != nil {
logger.Errorf("AllocateIPConfig failed for SecondaryIP %+v, podInfo %+v, ncId %s, error: %v", secIpConfig, podInfo, ncRequest.NetworkContainerid, err)
return types.FailedToAllocateIPConfig
}
} else {
logger.Printf("SecondaryIP %+v is not assigned. ncId: %s", secIpConfig, ncRequest.NetworkContainerid)
}
}
err := service.MarkExistingIPsAsPendingRelease(nnc.Spec.IPsNotInUse)
if err != nil {
logger.Errorf("[Azure CNS] Error. Failed to mark IPs as pending %v", nnc.Spec.IPsNotInUse)
return types.UnexpectedError
}
return 0
}
// GetNetworkContainerInternal gets network container details.
func (service *HTTPRestService) GetNetworkContainerInternal(
req cns.GetNetworkContainerRequest,
) (cns.GetNetworkContainerResponse, types.ResponseCode) {
getNetworkContainerResponse := service.getNetworkContainerResponse(req)
returnCode := getNetworkContainerResponse.Response.ReturnCode
return getNetworkContainerResponse, returnCode
}
// DeleteNetworkContainerInternal deletes a network container.
func (service *HTTPRestService) DeleteNetworkContainerInternal(
req cns.DeleteNetworkContainerRequest,
) types.ResponseCode {
_, exist := service.getNetworkContainerDetails(req.NetworkContainerid)
if !exist {
logger.Printf("network container for id %v doesn't exist", req.NetworkContainerid)
return types.Success
}
service.Lock()
defer service.Unlock()
if service.state.ContainerStatus != nil {
delete(service.state.ContainerStatus, req.NetworkContainerid)
}
if service.state.ContainerIDByOrchestratorContext != nil {
for orchestratorContext, networkContainerID := range service.state.ContainerIDByOrchestratorContext {
if networkContainerID == req.NetworkContainerid {
delete(service.state.ContainerIDByOrchestratorContext, orchestratorContext)
break
}
}
}
service.saveState()
return types.Success
}
// This API will be called by CNS RequestController on CRD update.
func (service *HTTPRestService) CreateOrUpdateNetworkContainerInternal(req *cns.CreateNetworkContainerRequest) types.ResponseCode {
2020-07-16 12:51:11 +03:00
if req.NetworkContainerid == "" {
logger.Errorf("[Azure CNS] Error. NetworkContainerid is empty")
return types.NetworkContainerNotSpecified
2020-07-16 12:51:11 +03:00
}
// For now only RequestController uses this API which will be initialized only for AKS scenario.
// Validate ContainerType is set as Docker
if service.state.OrchestratorType != cns.KubernetesCRD && service.state.OrchestratorType != cns.Kubernetes {
2020-07-16 12:51:11 +03:00
logger.Errorf("[Azure CNS] Error. Unsupported OrchestratorType: %s", service.state.OrchestratorType)
return types.UnsupportedOrchestratorType
2020-07-16 12:51:11 +03:00
}
// Validate PrimaryCA must never be empty
2020-07-23 03:11:41 +03:00
err := validateIPSubnet(req.IPConfiguration.IPSubnet)
2020-07-17 08:22:10 +03:00
if err != nil {
logger.Errorf("[Azure CNS] Error. PrimaryCA is invalid, NC Req: %v", req)
return types.InvalidPrimaryIPConfig
2020-07-17 08:22:10 +03:00
}
// Validate SecondaryIPConfig
for _, secIpconfig := range req.SecondaryIPConfigs {
2020-07-17 08:22:10 +03:00
// Validate Ipconfig
if secIpconfig.IPAddress == "" {
logger.Errorf("Failed to add IPConfig to state: %+v, empty IPSubnet.IPAddress", secIpconfig)
return types.InvalidSecondaryIPConfig
2020-07-17 08:22:10 +03:00
}
2020-07-16 12:51:11 +03:00
}
// Validate if state exists already
2020-07-23 03:11:41 +03:00
existingNCInfo, ok := service.getNetworkContainerDetails(req.NetworkContainerid)
2020-07-16 12:51:11 +03:00
if ok {
2020-07-23 03:11:41 +03:00
existingReq := existingNCInfo.CreateNetworkContainerRequest
if !reflect.DeepEqual(existingReq.IPConfiguration, req.IPConfiguration) {
2020-07-16 12:51:11 +03:00
logger.Errorf("[Azure CNS] Error. PrimaryCA is not same, NCId %s, old CA %s, new CA %s", req.NetworkContainerid, existingReq.PrimaryInterfaceIdentifier, req.PrimaryInterfaceIdentifier)
return types.PrimaryCANotSame
2020-07-16 12:51:11 +03:00
}
}
// This will Create Or Update the NC state.
returnCode, returnMessage := service.saveNetworkContainerGoalState(*req)
2020-07-16 12:51:11 +03:00
// If the NC was created successfully, log NC snapshot.
if returnCode == 0 {
logNCSnapshot(*req)
2020-07-16 12:51:11 +03:00
} else {
logger.Errorf(returnMessage)
}
if service.Options[common.OptProgramSNATIPTables] == true {
returnCode, returnMessage = service.programSNATRules(req)
if returnCode != 0 {
logger.Errorf(returnMessage)
}
}
return returnCode
}