azure-container-networking/cns/restserver/internalapi.go

636 строки
23 KiB
Go
Исходник Обычный вид История

2020-07-16 03:34:41 +03:00
// Copyright 2017 Microsoft. All rights reserved.
// MIT License
package restserver
2020-07-16 12:51:11 +03:00
import (
"bytes"
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
"context"
"encoding/json"
"fmt"
"net"
"net/http"
"net/http/httptest"
2020-07-23 03:11:41 +03:00
"reflect"
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
"strconv"
"strings"
"time"
2020-07-23 03:11:41 +03:00
2020-07-16 12:51:11 +03:00
"github.com/Azure/azure-container-networking/cns"
"github.com/Azure/azure-container-networking/cns/logger"
"github.com/Azure/azure-container-networking/cns/nodesubnet"
"github.com/Azure/azure-container-networking/cns/types"
"github.com/Azure/azure-container-networking/common"
"github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha"
"github.com/pkg/errors"
2020-07-16 12:51:11 +03:00
)
2020-07-16 03:34:41 +03:00
// This file contains the internal functions called by either HTTP APIs (api.go) or
// internal APIs (definde in internalapi.go).
// This will be used internally (say by RequestController in case of AKS)
// GetPartitionKey - Get dnc/service partition key
func (service *HTTPRestService) GetPartitionKey() (dncPartitionKey string) {
service.RLock()
dncPartitionKey = service.dncPartitionKey
service.RUnlock()
return
}
2020-07-16 12:51:11 +03:00
// SetNodeOrchestrator :- Set node orchestrator after registering with mDNC
func (service *HTTPRestService) SetNodeOrchestrator(r *cns.SetOrchestratorTypeRequest) {
body, _ := json.Marshal(r)
req, _ := http.NewRequest(http.MethodPost, "", bytes.NewBuffer(body))
req.Header.Set(common.ContentType, common.JsonContent)
service.setOrchestratorType(httptest.NewRecorder(), req)
}
func (service *HTTPRestService) SyncNodeStatus(dncEP, infraVnet, nodeID string, contextFromCNI json.RawMessage) (returnCode types.ResponseCode, errStr string) {
logger.Printf("[Azure CNS] SyncNodeStatus")
var (
resp *http.Response
nodeInfoResponse cns.NodeInfoResponse
body []byte
httpc = common.GetHttpClient()
)
// try to retrieve NodeInfoResponse from mDNC
url := fmt.Sprintf(common.SyncNodeNetworkContainersURLFmt, dncEP, infraVnet, nodeID, dncApiVersion)
req, _ := http.NewRequestWithContext(context.TODO(), http.MethodGet, url, nil)
resp, err := httpc.Do(req)
if err == nil {
if resp.StatusCode == http.StatusOK {
err = json.NewDecoder(resp.Body).Decode(&nodeInfoResponse)
} else {
err = errors.Errorf("http err: %d", resp.StatusCode)
}
resp.Body.Close()
}
if err != nil {
returnCode = types.UnexpectedError
errStr = fmt.Sprintf("[Azure-CNS] Failed to sync node with error: %+v", err)
logger.Errorf(errStr)
return
}
var (
ncsToBeAdded = make(map[string]cns.CreateNetworkContainerRequest)
ncsToBeDeleted = make(map[string]bool)
)
// determine new NCs and NCs to be deleted
service.RLock()
for ncid := range service.state.ContainerStatus {
ncsToBeDeleted[ncid] = true
}
for _, nc := range nodeInfoResponse.NetworkContainers {
ncid := nc.NetworkContainerid
delete(ncsToBeDeleted, ncid)
if savedNc, exists := service.state.ContainerStatus[ncid]; !exists || savedNc.CreateNetworkContainerRequest.Version < nc.Version {
ncsToBeAdded[ncid] = nc
}
}
service.RUnlock()
nmagent get nv version list api V2 refactor (#1744) * tmp commit for onbaording nma v2 * Remove test output file * Remove unnecessary code when CNS onboard get nc version list without token * tmp commit to fix getnc version tests when onboarding nc version api v2 from nmagent. * Fix the unit test for nmagent v2 api change. * Fix unit test TestGetNetworkContainerVersionStatus * Revert back to GetNCVersionF test. * Roll back to get nc version api v1 for test. * Continue revert back and store nc version url * Onboard nmagent get nc version api v2. * Address pr feedback of returning early and remove comment out code. * Remove unnecessary ncVersionURLs and NCVersionRequest. * Remove unnecessary variables. * Update nmagent get nc version api v2 to v2 url * Remove comment out code. * tmp commit for onbaording nma v2 * Remove test output file * Remove unnecessary code when CNS onboard get nc version list without token * tmp commit to fix getnc version tests when onboarding nc version api v2 from nmagent. * Fix the unit test for nmagent v2 api change. * Fix unit test TestGetNetworkContainerVersionStatus * Revert back to GetNCVersionF test. * Roll back to get nc version api v1 for test. * Continue revert back and store nc version url * Onboard nmagent get nc version api v2. * Address pr feedback of returning early and remove comment out code. * Remove unnecessary ncVersionURLs and NCVersionRequest. * Remove unnecessary variables. * Update nmagent get nc version api v2 to v2 url * Remove comment out code. * Update nmagent get nc version list. * Address feedback and fix golint * Fix lint issue. * Fix the remaining 2 lint issues. * Revert back test error generation to address feedback.
2023-01-20 02:10:23 +03:00
skipNCVersionCheck := false
ctx, cancel := context.WithTimeout(context.Background(), nmaAPICallTimeout)
defer cancel()
ncVersionListResp, err := service.nma.GetNCVersionList(ctx)
if err != nil {
skipNCVersionCheck = true
logger.Errorf("failed to get nc version list from nmagent")
}
nmagent get nv version list api V2 refactor (#1744) * tmp commit for onbaording nma v2 * Remove test output file * Remove unnecessary code when CNS onboard get nc version list without token * tmp commit to fix getnc version tests when onboarding nc version api v2 from nmagent. * Fix the unit test for nmagent v2 api change. * Fix unit test TestGetNetworkContainerVersionStatus * Revert back to GetNCVersionF test. * Roll back to get nc version api v1 for test. * Continue revert back and store nc version url * Onboard nmagent get nc version api v2. * Address pr feedback of returning early and remove comment out code. * Remove unnecessary ncVersionURLs and NCVersionRequest. * Remove unnecessary variables. * Update nmagent get nc version api v2 to v2 url * Remove comment out code. * tmp commit for onbaording nma v2 * Remove test output file * Remove unnecessary code when CNS onboard get nc version list without token * tmp commit to fix getnc version tests when onboarding nc version api v2 from nmagent. * Fix the unit test for nmagent v2 api change. * Fix unit test TestGetNetworkContainerVersionStatus * Revert back to GetNCVersionF test. * Roll back to get nc version api v1 for test. * Continue revert back and store nc version url * Onboard nmagent get nc version api v2. * Address pr feedback of returning early and remove comment out code. * Remove unnecessary ncVersionURLs and NCVersionRequest. * Remove unnecessary variables. * Update nmagent get nc version api v2 to v2 url * Remove comment out code. * Update nmagent get nc version list. * Address feedback and fix golint * Fix lint issue. * Fix the remaining 2 lint issues. * Revert back test error generation to address feedback.
2023-01-20 02:10:23 +03:00
if !skipNCVersionCheck {
nmaNCs := map[string]string{}
for _, nc := range ncVersionListResp.Containers {
nmaNCs[strings.TrimPrefix(lowerCaseNCGuid(nc.NetworkContainerID), cns.SwiftPrefix)] = nc.Version
nmagent get nv version list api V2 refactor (#1744) * tmp commit for onbaording nma v2 * Remove test output file * Remove unnecessary code when CNS onboard get nc version list without token * tmp commit to fix getnc version tests when onboarding nc version api v2 from nmagent. * Fix the unit test for nmagent v2 api change. * Fix unit test TestGetNetworkContainerVersionStatus * Revert back to GetNCVersionF test. * Roll back to get nc version api v1 for test. * Continue revert back and store nc version url * Onboard nmagent get nc version api v2. * Address pr feedback of returning early and remove comment out code. * Remove unnecessary ncVersionURLs and NCVersionRequest. * Remove unnecessary variables. * Update nmagent get nc version api v2 to v2 url * Remove comment out code. * tmp commit for onbaording nma v2 * Remove test output file * Remove unnecessary code when CNS onboard get nc version list without token * tmp commit to fix getnc version tests when onboarding nc version api v2 from nmagent. * Fix the unit test for nmagent v2 api change. * Fix unit test TestGetNetworkContainerVersionStatus * Revert back to GetNCVersionF test. * Roll back to get nc version api v1 for test. * Continue revert back and store nc version url * Onboard nmagent get nc version api v2. * Address pr feedback of returning early and remove comment out code. * Remove unnecessary ncVersionURLs and NCVersionRequest. * Remove unnecessary variables. * Update nmagent get nc version api v2 to v2 url * Remove comment out code. * Update nmagent get nc version list. * Address feedback and fix golint * Fix lint issue. * Fix the remaining 2 lint issues. * Revert back test error generation to address feedback.
2023-01-20 02:10:23 +03:00
}
Replace the NMAgent client in CNS with the one from the nmagent package (#1643) * Switch PublishNetworkContainer to nmagent package This removes the PublishNetworkContainer method from the CNS client's nmagent package in favor of using the one from the top-level nmagent client package. This is to ensure that there's only one nmagent client to maintain. * Use Unpublish method from nmagent package The existing unpublish endpoints within CNS used the older nmagent client. This converts them to use the newer one. * Use JoinNetwork from new nmagent client The API in CNS was using the old nmagent client endpoints which we want to phase out so that there is exactly one nmagent client across all systems. * Add SupportedAPIs method to nmagent and use it The CNS client uses this SupportedAPIs method to proxy requests from DNC and detect capabilities of the node. * Add GetNCVersion to nmagent and use it in CNS CNS previously used its own client method for accessing the GetNCVersion endpoint of nmagent. In the interest of having a singular NMAgent client, this adopts the behavior into the nmagent package and uses it within CNS instead. * Use test helpers for context and checking errs Checking whether the error was present and should it be present was annoying and repetitive, so there's a helper now to take care of that. Similarly, contexts are necessary but it's nice to create them from the test's deadline itself. There's a helper to do that now as well. * Fix broken tests & improve the suite There were some broken tests left over from implementing new NMAgent interface methods. This makes those tests pass and also improves the test suite by detangling mixed concerns of the utility functions within. Many of them returned errors and made assertions, which made them confusing to use. The utility functions now only do things you request, and the tests themselves are the only ones making assertions (sometimes through helpers that were added to make those assertions easier). * Add GetNCVersionList endpoint to NMAgent client This is the final endpoint that was being used in CNS without being present in the "official" NMAgent client. This moves that implementation, more-or-less, to the nmagent package and consumes it in NMAgent through an interface. * Remove incorrect error shadowing There were a few places where errors were shadowed. Also removed the C-ism of pre-declaring variables for the sake of pre-declaring variables. * Replace error type assertions with errors.As In two instances, type assertions were used instead of errors.As. Apart from being less obvious, there are apparently instances where this can be incorrect with error wrapping. Also, there was an instance where errors.As was mistakenly used in the init clause of an if statement, instead of the predicate. This was corrected to be a conjunctive predicate converting the error and then making assertions using that error. This is safe because short-circuit evaluation will prevent the second half of the predicate from being evaluated if the error is not of that type. * Use context for joinNetwork The linter rightly pointed out that context could be trivially propagated further than it was. This commit does exactly that. * Add error wrapping to an otherwise-opaque error The linter highlighted this error, showing that the error returned would be confusing to a user of this function. This wraps the error indicating that a join network request was issued at the point where the error was produced, to aid in debugging. * Remove error shadowing in the tests This is really somewhat unnecessary because it's just a test. It shouldn't impact correctness, since the errors were properly scoped to their respective if statements. However, to prevent other people's linters from complaining, this corrects the lint. * Silence complaints about dynamic errors in tests This is an unnecessary lint in a test, because it's useful to be able to define errors on the fly when you need them in a test. However, the linter demands it presently, so it must be silenced. * Add missing return for fmt.Errorf Surprisingly, a return was missing here. This was caught by the linter, and was most certainly incorrect. * Remove yet another shadowed err There was nothing incorrect about this shadowed err variable, but linters complain about it and fail CI. * Finish wiring in the NMAgent Client There were missing places where the nmagent client wasn't wired in properly in main. This threads the client through completely and also alters some tests to maintain compatibility. * Add config for Wireserver to NMAgent Client This was in reaction to a lint detecting a vestigial use of "WireserverIP". This package variable is no longer in use, however, the spirit of it still exists. The changes adapt the provided configuration into an nmagent.Config for use with the NMAgent Client instead. * Silence the linter The linter complained about a shadowed err, which is fine since it's scoped in the `if`. Also there was a duplicate import which resulted from refactoring. This has been de-duped. * Rename jnr -> req and nma -> nmagent The "jnr" variable was confusing when "req" is far more revealing of what it is. Also, the "nma" alias was left over from a prior iteration when the legacy nmagent package and the new one co-existed. * Rename NodeInquirer -> NodeInterrogator "Interrogator" is more revealing about the set of behavior encapsulated by the interface. Depending on that behavior allows a consumer to interrogate nodes for various information (but not modify anything about them).
2022-10-20 00:38:01 +03:00
nmagent get nv version list api V2 refactor (#1744) * tmp commit for onbaording nma v2 * Remove test output file * Remove unnecessary code when CNS onboard get nc version list without token * tmp commit to fix getnc version tests when onboarding nc version api v2 from nmagent. * Fix the unit test for nmagent v2 api change. * Fix unit test TestGetNetworkContainerVersionStatus * Revert back to GetNCVersionF test. * Roll back to get nc version api v1 for test. * Continue revert back and store nc version url * Onboard nmagent get nc version api v2. * Address pr feedback of returning early and remove comment out code. * Remove unnecessary ncVersionURLs and NCVersionRequest. * Remove unnecessary variables. * Update nmagent get nc version api v2 to v2 url * Remove comment out code. * tmp commit for onbaording nma v2 * Remove test output file * Remove unnecessary code when CNS onboard get nc version list without token * tmp commit to fix getnc version tests when onboarding nc version api v2 from nmagent. * Fix the unit test for nmagent v2 api change. * Fix unit test TestGetNetworkContainerVersionStatus * Revert back to GetNCVersionF test. * Roll back to get nc version api v1 for test. * Continue revert back and store nc version url * Onboard nmagent get nc version api v2. * Address pr feedback of returning early and remove comment out code. * Remove unnecessary ncVersionURLs and NCVersionRequest. * Remove unnecessary variables. * Update nmagent get nc version api v2 to v2 url * Remove comment out code. * Update nmagent get nc version list. * Address feedback and fix golint * Fix lint issue. * Fix the remaining 2 lint issues. * Revert back test error generation to address feedback.
2023-01-20 02:10:23 +03:00
// check if the version is valid and save it to service state
for ncid := range ncsToBeAdded {
waitingForUpdate, _, _ := service.isNCWaitingForUpdate(ncsToBeAdded[ncid].Version, ncsToBeAdded[ncid].NetworkContainerid, nmaNCs)
Replace the NMAgent client in CNS with the one from the nmagent package (#1643) * Switch PublishNetworkContainer to nmagent package This removes the PublishNetworkContainer method from the CNS client's nmagent package in favor of using the one from the top-level nmagent client package. This is to ensure that there's only one nmagent client to maintain. * Use Unpublish method from nmagent package The existing unpublish endpoints within CNS used the older nmagent client. This converts them to use the newer one. * Use JoinNetwork from new nmagent client The API in CNS was using the old nmagent client endpoints which we want to phase out so that there is exactly one nmagent client across all systems. * Add SupportedAPIs method to nmagent and use it The CNS client uses this SupportedAPIs method to proxy requests from DNC and detect capabilities of the node. * Add GetNCVersion to nmagent and use it in CNS CNS previously used its own client method for accessing the GetNCVersion endpoint of nmagent. In the interest of having a singular NMAgent client, this adopts the behavior into the nmagent package and uses it within CNS instead. * Use test helpers for context and checking errs Checking whether the error was present and should it be present was annoying and repetitive, so there's a helper now to take care of that. Similarly, contexts are necessary but it's nice to create them from the test's deadline itself. There's a helper to do that now as well. * Fix broken tests & improve the suite There were some broken tests left over from implementing new NMAgent interface methods. This makes those tests pass and also improves the test suite by detangling mixed concerns of the utility functions within. Many of them returned errors and made assertions, which made them confusing to use. The utility functions now only do things you request, and the tests themselves are the only ones making assertions (sometimes through helpers that were added to make those assertions easier). * Add GetNCVersionList endpoint to NMAgent client This is the final endpoint that was being used in CNS without being present in the "official" NMAgent client. This moves that implementation, more-or-less, to the nmagent package and consumes it in NMAgent through an interface. * Remove incorrect error shadowing There were a few places where errors were shadowed. Also removed the C-ism of pre-declaring variables for the sake of pre-declaring variables. * Replace error type assertions with errors.As In two instances, type assertions were used instead of errors.As. Apart from being less obvious, there are apparently instances where this can be incorrect with error wrapping. Also, there was an instance where errors.As was mistakenly used in the init clause of an if statement, instead of the predicate. This was corrected to be a conjunctive predicate converting the error and then making assertions using that error. This is safe because short-circuit evaluation will prevent the second half of the predicate from being evaluated if the error is not of that type. * Use context for joinNetwork The linter rightly pointed out that context could be trivially propagated further than it was. This commit does exactly that. * Add error wrapping to an otherwise-opaque error The linter highlighted this error, showing that the error returned would be confusing to a user of this function. This wraps the error indicating that a join network request was issued at the point where the error was produced, to aid in debugging. * Remove error shadowing in the tests This is really somewhat unnecessary because it's just a test. It shouldn't impact correctness, since the errors were properly scoped to their respective if statements. However, to prevent other people's linters from complaining, this corrects the lint. * Silence complaints about dynamic errors in tests This is an unnecessary lint in a test, because it's useful to be able to define errors on the fly when you need them in a test. However, the linter demands it presently, so it must be silenced. * Add missing return for fmt.Errorf Surprisingly, a return was missing here. This was caught by the linter, and was most certainly incorrect. * Remove yet another shadowed err There was nothing incorrect about this shadowed err variable, but linters complain about it and fail CI. * Finish wiring in the NMAgent Client There were missing places where the nmagent client wasn't wired in properly in main. This threads the client through completely and also alters some tests to maintain compatibility. * Add config for Wireserver to NMAgent Client This was in reaction to a lint detecting a vestigial use of "WireserverIP". This package variable is no longer in use, however, the spirit of it still exists. The changes adapt the provided configuration into an nmagent.Config for use with the NMAgent Client instead. * Silence the linter The linter complained about a shadowed err, which is fine since it's scoped in the `if`. Also there was a duplicate import which resulted from refactoring. This has been de-duped. * Rename jnr -> req and nma -> nmagent The "jnr" variable was confusing when "req" is far more revealing of what it is. Also, the "nma" alias was left over from a prior iteration when the legacy nmagent package and the new one co-existed. * Rename NodeInquirer -> NodeInterrogator "Interrogator" is more revealing about the set of behavior encapsulated by the interface. Depending on that behavior allows a consumer to interrogate nodes for various information (but not modify anything about them).
2022-10-20 00:38:01 +03:00
nmagent get nv version list api V2 refactor (#1744) * tmp commit for onbaording nma v2 * Remove test output file * Remove unnecessary code when CNS onboard get nc version list without token * tmp commit to fix getnc version tests when onboarding nc version api v2 from nmagent. * Fix the unit test for nmagent v2 api change. * Fix unit test TestGetNetworkContainerVersionStatus * Revert back to GetNCVersionF test. * Roll back to get nc version api v1 for test. * Continue revert back and store nc version url * Onboard nmagent get nc version api v2. * Address pr feedback of returning early and remove comment out code. * Remove unnecessary ncVersionURLs and NCVersionRequest. * Remove unnecessary variables. * Update nmagent get nc version api v2 to v2 url * Remove comment out code. * tmp commit for onbaording nma v2 * Remove test output file * Remove unnecessary code when CNS onboard get nc version list without token * tmp commit to fix getnc version tests when onboarding nc version api v2 from nmagent. * Fix the unit test for nmagent v2 api change. * Fix unit test TestGetNetworkContainerVersionStatus * Revert back to GetNCVersionF test. * Roll back to get nc version api v1 for test. * Continue revert back and store nc version url * Onboard nmagent get nc version api v2. * Address pr feedback of returning early and remove comment out code. * Remove unnecessary ncVersionURLs and NCVersionRequest. * Remove unnecessary variables. * Update nmagent get nc version api v2 to v2 url * Remove comment out code. * Update nmagent get nc version list. * Address feedback and fix golint * Fix lint issue. * Fix the remaining 2 lint issues. * Revert back test error generation to address feedback.
2023-01-20 02:10:23 +03:00
body, err = json.Marshal(ncsToBeAdded[ncid])
if err != nil {
logger.Errorf("[Azure-CNS] Failed to marshal nc with nc id %s and content %v", ncid, ncsToBeAdded[ncid])
}
nmagent get nv version list api V2 refactor (#1744) * tmp commit for onbaording nma v2 * Remove test output file * Remove unnecessary code when CNS onboard get nc version list without token * tmp commit to fix getnc version tests when onboarding nc version api v2 from nmagent. * Fix the unit test for nmagent v2 api change. * Fix unit test TestGetNetworkContainerVersionStatus * Revert back to GetNCVersionF test. * Roll back to get nc version api v1 for test. * Continue revert back and store nc version url * Onboard nmagent get nc version api v2. * Address pr feedback of returning early and remove comment out code. * Remove unnecessary ncVersionURLs and NCVersionRequest. * Remove unnecessary variables. * Update nmagent get nc version api v2 to v2 url * Remove comment out code. * tmp commit for onbaording nma v2 * Remove test output file * Remove unnecessary code when CNS onboard get nc version list without token * tmp commit to fix getnc version tests when onboarding nc version api v2 from nmagent. * Fix the unit test for nmagent v2 api change. * Fix unit test TestGetNetworkContainerVersionStatus * Revert back to GetNCVersionF test. * Roll back to get nc version api v1 for test. * Continue revert back and store nc version url * Onboard nmagent get nc version api v2. * Address pr feedback of returning early and remove comment out code. * Remove unnecessary ncVersionURLs and NCVersionRequest. * Remove unnecessary variables. * Update nmagent get nc version api v2 to v2 url * Remove comment out code. * Update nmagent get nc version list. * Address feedback and fix golint * Fix lint issue. * Fix the remaining 2 lint issues. * Revert back test error generation to address feedback.
2023-01-20 02:10:23 +03:00
req, err = http.NewRequestWithContext(ctx, http.MethodPost, "", bytes.NewBuffer(body))
if err != nil {
logger.Errorf("[Azure CNS] Error received while creating http POST request for nc %v", ncsToBeAdded[ncid])
}
req.Header.Set(common.ContentType, common.JsonContent)
w := httptest.NewRecorder()
service.createOrUpdateNetworkContainer(w, req)
result := w.Result()
if result.StatusCode == http.StatusOK {
var resp cns.CreateNetworkContainerResponse
if err = json.Unmarshal(w.Body.Bytes(), &resp); err == nil && resp.Response.ReturnCode == types.Success {
service.Lock()
ncstatus := service.state.ContainerStatus[ncid]
ncstatus.VfpUpdateComplete = !waitingForUpdate
service.state.ContainerStatus[ncid] = ncstatus
service.Unlock()
}
}
result.Body.Close()
}
}
service.Lock()
service.saveState()
service.Unlock()
// delete dangling NCs
for nc := range ncsToBeDeleted {
var body bytes.Buffer
json.NewEncoder(&body).Encode(&cns.DeleteNetworkContainerRequest{NetworkContainerid: nc})
req, err = http.NewRequest(http.MethodPost, "", &body)
if err == nil {
req.Header.Set(common.JsonContent, common.JsonContent)
service.deleteNetworkContainer(httptest.NewRecorder(), req)
} else {
logger.Errorf("[Azure-CNS] Failed to delete NC request to sync state: %s", err.Error())
}
}
return
}
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
// SyncHostNCVersion will check NC version from NMAgent and save it as host NC version in container status.
// If NMAgent NC version got updated, CNS will refresh the pending programming IP status.
func (service *HTTPRestService) SyncHostNCVersion(ctx context.Context, channelMode string) {
service.Lock()
defer service.Unlock()
start := time.Now()
programmedNCCount, err := service.syncHostNCVersion(ctx, channelMode)
// even if we get an error, we want to write the CNI conflist if we have any NC programmed to any version
if programmedNCCount > 0 {
// This will only be done once per lifetime of the CNS process. This function is threadsafe and will panic
// if it fails, so it is safe to call in a non-preemptable goroutine.
go service.MustGenerateCNIConflistOnce()
}
if err != nil {
logger.Errorf("sync host error %v", err)
}
syncHostNCVersionCount.WithLabelValues(strconv.FormatBool(err == nil)).Inc()
syncHostNCVersionLatency.WithLabelValues(strconv.FormatBool(err == nil)).Observe(time.Since(start).Seconds())
}
var errNonExistentContainerStatus = errors.New("nonExistantContainerstatus")
// syncHostVersion updates the CNS state with the latest programmed versions of NCs attached to the VM. If any NC in local CNS state
// does not match the version that DNC claims to have published, this function will call NMAgent and list the latest programmed versions of
// all NCs and update the CNS state accordingly. This function returns the the total number of NCs on this VM that have been programmed to
// some version, NOT the number of NCs that are up-to-date.
func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMode string) (int, error) {
outdatedNCs := map[string]struct{}{}
programmedNCs := map[string]struct{}{}
for idx := range service.state.ContainerStatus {
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
// Will open a separate PR to convert all the NC version related variable to int. Change from string to int is a pain.
localNCVersion, err := strconv.Atoi(service.state.ContainerStatus[idx].HostVersion)
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
if err != nil {
logger.Errorf("Received err when change containerstatus.HostVersion %s to int, err msg %v", service.state.ContainerStatus[idx].HostVersion, err)
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
continue
}
dncNCVersion, err := strconv.Atoi(service.state.ContainerStatus[idx].CreateNetworkContainerRequest.Version)
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
if err != nil {
logger.Errorf("Received err when change nc version %s in containerstatus to int, err msg %v", service.state.ContainerStatus[idx].CreateNetworkContainerRequest.Version, err)
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
continue
}
// host NC version is the NC version from NMAgent, if it's smaller than NC version from DNC, then append it to indicate it needs update.
if localNCVersion < dncNCVersion {
outdatedNCs[service.state.ContainerStatus[idx].ID] = struct{}{}
} else if localNCVersion > dncNCVersion {
logger.Errorf("NC version from NMAgent is larger than DNC, NC version from NMAgent is %d, NC version from DNC is %d", localNCVersion, dncNCVersion)
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
}
if localNCVersion > -1 {
programmedNCs[service.state.ContainerStatus[idx].ID] = struct{}{}
}
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
}
if len(outdatedNCs) == 0 {
return len(programmedNCs), nil
}
Replace the NMAgent client in CNS with the one from the nmagent package (#1643) * Switch PublishNetworkContainer to nmagent package This removes the PublishNetworkContainer method from the CNS client's nmagent package in favor of using the one from the top-level nmagent client package. This is to ensure that there's only one nmagent client to maintain. * Use Unpublish method from nmagent package The existing unpublish endpoints within CNS used the older nmagent client. This converts them to use the newer one. * Use JoinNetwork from new nmagent client The API in CNS was using the old nmagent client endpoints which we want to phase out so that there is exactly one nmagent client across all systems. * Add SupportedAPIs method to nmagent and use it The CNS client uses this SupportedAPIs method to proxy requests from DNC and detect capabilities of the node. * Add GetNCVersion to nmagent and use it in CNS CNS previously used its own client method for accessing the GetNCVersion endpoint of nmagent. In the interest of having a singular NMAgent client, this adopts the behavior into the nmagent package and uses it within CNS instead. * Use test helpers for context and checking errs Checking whether the error was present and should it be present was annoying and repetitive, so there's a helper now to take care of that. Similarly, contexts are necessary but it's nice to create them from the test's deadline itself. There's a helper to do that now as well. * Fix broken tests & improve the suite There were some broken tests left over from implementing new NMAgent interface methods. This makes those tests pass and also improves the test suite by detangling mixed concerns of the utility functions within. Many of them returned errors and made assertions, which made them confusing to use. The utility functions now only do things you request, and the tests themselves are the only ones making assertions (sometimes through helpers that were added to make those assertions easier). * Add GetNCVersionList endpoint to NMAgent client This is the final endpoint that was being used in CNS without being present in the "official" NMAgent client. This moves that implementation, more-or-less, to the nmagent package and consumes it in NMAgent through an interface. * Remove incorrect error shadowing There were a few places where errors were shadowed. Also removed the C-ism of pre-declaring variables for the sake of pre-declaring variables. * Replace error type assertions with errors.As In two instances, type assertions were used instead of errors.As. Apart from being less obvious, there are apparently instances where this can be incorrect with error wrapping. Also, there was an instance where errors.As was mistakenly used in the init clause of an if statement, instead of the predicate. This was corrected to be a conjunctive predicate converting the error and then making assertions using that error. This is safe because short-circuit evaluation will prevent the second half of the predicate from being evaluated if the error is not of that type. * Use context for joinNetwork The linter rightly pointed out that context could be trivially propagated further than it was. This commit does exactly that. * Add error wrapping to an otherwise-opaque error The linter highlighted this error, showing that the error returned would be confusing to a user of this function. This wraps the error indicating that a join network request was issued at the point where the error was produced, to aid in debugging. * Remove error shadowing in the tests This is really somewhat unnecessary because it's just a test. It shouldn't impact correctness, since the errors were properly scoped to their respective if statements. However, to prevent other people's linters from complaining, this corrects the lint. * Silence complaints about dynamic errors in tests This is an unnecessary lint in a test, because it's useful to be able to define errors on the fly when you need them in a test. However, the linter demands it presently, so it must be silenced. * Add missing return for fmt.Errorf Surprisingly, a return was missing here. This was caught by the linter, and was most certainly incorrect. * Remove yet another shadowed err There was nothing incorrect about this shadowed err variable, but linters complain about it and fail CI. * Finish wiring in the NMAgent Client There were missing places where the nmagent client wasn't wired in properly in main. This threads the client through completely and also alters some tests to maintain compatibility. * Add config for Wireserver to NMAgent Client This was in reaction to a lint detecting a vestigial use of "WireserverIP". This package variable is no longer in use, however, the spirit of it still exists. The changes adapt the provided configuration into an nmagent.Config for use with the NMAgent Client instead. * Silence the linter The linter complained about a shadowed err, which is fine since it's scoped in the `if`. Also there was a duplicate import which resulted from refactoring. This has been de-duped. * Rename jnr -> req and nma -> nmagent The "jnr" variable was confusing when "req" is far more revealing of what it is. Also, the "nma" alias was left over from a prior iteration when the legacy nmagent package and the new one co-existed. * Rename NodeInquirer -> NodeInterrogator "Interrogator" is more revealing about the set of behavior encapsulated by the interface. Depending on that behavior allows a consumer to interrogate nodes for various information (but not modify anything about them).
2022-10-20 00:38:01 +03:00
ncVersionListResp, err := service.nma.GetNCVersionList(ctx)
if err != nil {
return len(programmedNCs), errors.Wrap(err, "failed to get nc version list from nmagent")
}
nmaNCs := map[string]string{}
for _, nc := range ncVersionListResp.Containers {
nmaNCs[strings.ToLower(nc.NetworkContainerID)] = nc.Version
}
for ncID := range outdatedNCs {
nmaNCVersionStr, ok := nmaNCs[ncID]
if !ok {
// NMA doesn't have this NC that we need programmed yet, bail out
continue
}
nmaNCVersion, err := strconv.Atoi(nmaNCVersionStr)
if err != nil {
logger.Errorf("failed to parse container version of %s: %s", ncID, err)
continue
}
// Check whether it exist in service state and get the related nc info
ncInfo, exist := service.state.ContainerStatus[ncID]
if !exist {
// if we marked this NC as needs update, but it no longer exists in internal state when we reach
// this point, our internal state has changed unexpectedly and we should bail out and try again.
return len(programmedNCs), errors.Wrapf(errNonExistentContainerStatus, "can't find NC with ID %s in service state, stop updating this host NC version", ncID)
}
// if the NC still exists in state and is programmed to some version (doesn't have to be latest), add it to our set of NCs that have been programmed
if nmaNCVersion > -1 {
programmedNCs[ncID] = struct{}{}
}
localNCVersion, err := strconv.Atoi(ncInfo.HostVersion)
if err != nil {
logger.Errorf("failed to parse host nc version string %s: %s", ncInfo.HostVersion, err)
continue
}
if localNCVersion > nmaNCVersion {
logger.Errorf("NC version from NMA is decreasing: have %d, got %d", localNCVersion, nmaNCVersion)
continue
}
if channelMode == cns.CRD {
service.MarkIpsAsAvailableUntransacted(ncInfo.ID, nmaNCVersion)
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
}
logger.Printf("Updating NC %s host version from %s to %s", ncID, ncInfo.HostVersion, nmaNCVersionStr)
ncInfo.HostVersion = nmaNCVersionStr
logger.Printf("Updated NC %s host version to %s", ncID, ncInfo.HostVersion)
service.state.ContainerStatus[ncID] = ncInfo
// if we successfully updated the NC, pop it from the needs update set.
delete(outdatedNCs, ncID)
}
// if we didn't empty out the needs update set, NMA has not programmed all the NCs we are expecting, and we
// need to return an error indicating that
if len(outdatedNCs) > 0 {
return len(programmedNCs), errors.Errorf("unabled to update some NCs: %v, missing or bad response from NMA", outdatedNCs)
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
}
return len(programmedNCs), nil
Add a go routine to update NC host version from NMAgent periodically. (#714) * Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
2020-12-12 00:54:17 +03:00
}
func (service *HTTPRestService) ReconcileIPAssignment(podInfoByIP map[string]cns.PodInfo, ncReqs []*cns.CreateNetworkContainerRequest) types.ResponseCode {
// index all the secondary IP configs for all the nc reqs, for easier lookup later on.
allSecIPsIdx := make(map[string]*cns.CreateNetworkContainerRequest)
for i := range ncReqs {
for _, secIPConfig := range ncReqs[i].SecondaryIPConfigs {
allSecIPsIdx[secIPConfig.IPAddress] = ncReqs[i]
}
}
// we now need to reconcile IP assignment.
// considering that a single pod may have multiple ips (such as in dual stack scenarios)
// and that IP assignment in CNS (as done by requestIPConfigsHelper) does not allow
// updates (it returns the existing state if one already exists for the pod's interface),
// we need to assign all IPs for a pod interface or name+namespace at the same time.
//
// iterating over single IPs is not appropriate then, since assignment for the first IP for
// a pod will prevent the second IP from being added. the following function call transforms
// pod info indexed by ip address:
//
// {
// "10.0.0.1": podInfo{interface: "aaa-eth0"},
// "fe80::1": podInfo{interface: "aaa-eth0"},
// }
//
// to pod IPs indexed by pod key (interface or name+namespace, depending on scenario):
//
// {
// "aaa-eth0": podIPs{v4IP: 10.0.0.1, v6IP: fe80::1}
// }
//
// such that we can iterate over pod interfaces, and assign all IPs for it at once.
podKeyToPodIPs, err := newPodKeyToPodIPsMap(podInfoByIP)
if err != nil {
logger.Errorf("could not transform pods indexed by IP address to pod IPs indexed by interface: %v", err)
return types.UnexpectedError
}
for podKey, podIPs := range podKeyToPodIPs {
var (
desiredIPs []string
ncIDs []string
)
var ips []net.IP
if podIPs.v4IP != nil {
ips = append(ips, podIPs.v4IP)
}
if podIPs.v6IP != nil {
ips = append(ips, podIPs.v6IP)
}
for _, ip := range ips {
if ncReq, ok := allSecIPsIdx[ip.String()]; ok {
logger.Printf("secondary ip %s is assigned to pod %+v, ncId: %s ncVersion: %s", ip, podIPs, ncReq.NetworkContainerid, ncReq.Version)
desiredIPs = append(desiredIPs, ip.String())
ncIDs = append(ncIDs, ncReq.NetworkContainerid)
} else {
// it might still be possible to see host networking pods here (where ips are not from ncs) if we are restoring using the kube podinfo provider
// todo: once kube podinfo provider reconcile flow is removed, this line will not be necessary/should be removed.
logger.Errorf("ip %s assigned to pod %+v but not found in any nc", ip, podIPs)
}
}
if len(desiredIPs) == 0 {
// this may happen for pods in the host network
continue
}
jsonContext, err := podIPs.OrchestratorContext()
if err != nil {
logger.Errorf("Failed to marshal KubernetesPodInfo, error: %v", err)
return types.UnexpectedError
}
ipconfigsRequest := cns.IPConfigsRequest{
DesiredIPAddresses: desiredIPs,
OrchestratorContext: jsonContext,
InfraContainerID: podIPs.InfraContainerID(),
PodInterfaceID: podIPs.InterfaceID(),
}
if _, err := requestIPConfigsHelper(service, ipconfigsRequest); err != nil {
logger.Errorf("requestIPConfigsHelper failed for pod key %s, podInfo %+v, ncIds %v, error: %v", podKey, podIPs, ncIDs, err)
return types.FailedToAllocateIPConfig
}
}
return types.Success
}
func (service *HTTPRestService) CreateNCs(ncReqs []*cns.CreateNetworkContainerRequest) types.ResponseCode {
for _, ncReq := range ncReqs {
returnCode := service.CreateOrUpdateNetworkContainerInternal(ncReq)
if returnCode != types.Success {
return returnCode
}
}
return types.Success
}
func (service *HTTPRestService) ReconcileIPAMStateForSwift(ncReqs []*cns.CreateNetworkContainerRequest, podInfoByIP map[string]cns.PodInfo, nnc *v1alpha.NodeNetworkConfig) types.ResponseCode {
logger.Printf("Reconciling CNS IPAM state with nc requests: [%+v], PodInfo [%+v], NNC: [%+v]", ncReqs, podInfoByIP, nnc)
// if no nc reqs, there is no CRD state yet
if len(ncReqs) == 0 {
logger.Printf("CNS starting with no NC state, podInfoMap count %d", len(podInfoByIP))
return types.Success
}
// first step in reconciliation is to create all the NCs in CNS, no IP assignment yet.
if returnCode := service.CreateNCs(ncReqs); returnCode != types.Success {
return returnCode
}
logger.Debugf("ncReqs created successfully, now save IPs")
// now reconcile IPAM state.
if returnCode := service.ReconcileIPAssignment(podInfoByIP, ncReqs); returnCode != types.Success {
return returnCode
}
if err := service.MarkExistingIPsAsPendingRelease(nnc.Spec.IPsNotInUse); err != nil {
logger.Errorf("[Azure CNS] Error. Failed to mark IPs as pending %v", nnc.Spec.IPsNotInUse)
return types.UnexpectedError
}
return types.Success
}
// todo: there is some redundancy between this funcation and ReconcileIPAMStateForNodeSubnet. The difference is that this one
// doesn't include the NNC parameter. We may want to unify the common parts.
func (service *HTTPRestService) ReconcileIPAMStateForNodeSubnet(ncReqs []*cns.CreateNetworkContainerRequest, podInfoByIP map[string]cns.PodInfo) types.ResponseCode {
logger.Printf("Reconciling CNS IPAM state with nc requests: [%+v], PodInfo [%+v]", ncReqs, podInfoByIP)
if len(ncReqs) != 1 {
logger.Errorf("Nodesubnet should always have 1 NC to hold secondary IPs")
return types.NetworkContainerNotSpecified
}
// first step in reconciliation is to create all the NCs in CNS, no IP assignment yet.
if returnCode := service.CreateNCs(ncReqs); returnCode != types.Success {
return returnCode
}
logger.Debugf("ncReqs created successfully, now save IPs")
// now reconcile IPAM state.
if returnCode := service.ReconcileIPAssignment(podInfoByIP, ncReqs); returnCode != types.Success {
return returnCode
}
return types.Success
}
var (
errIPParse = errors.New("parse IP")
errMultipleIPPerFamily = errors.New("multiple IPs per family")
)
// newPodKeyToPodIPsMap groups IPs by interface id and returns them indexed by interface id.
func newPodKeyToPodIPsMap(podInfoByIP map[string]cns.PodInfo) (map[string]podIPs, error) {
podKeyToPodIPs := make(map[string]podIPs)
for ipStr, podInfo := range podInfoByIP {
id := podInfo.Key()
ips, ok := podKeyToPodIPs[id]
if !ok {
ips.PodInfo = podInfo
}
ip := net.ParseIP(ipStr)
switch {
case ip == nil:
return nil, errors.Wrapf(errIPParse, "could not parse ip string %q on pod %+v", ipStr, podInfo)
case ip.To4() != nil:
if ips.v4IP != nil {
return nil, errors.Wrapf(errMultipleIPPerFamily, "multiple ipv4 addresses (%v, %v) associated to pod %+v", ips.v4IP, ip, podInfo)
}
ips.v4IP = ip
case ip.To16() != nil:
if ips.v6IP != nil {
return nil, errors.Wrapf(errMultipleIPPerFamily, "multiple ipv6 addresses (%v, %v) associated to pod %+v", ips.v6IP, ip, podInfo)
}
ips.v6IP = ip
}
podKeyToPodIPs[id] = ips
}
return podKeyToPodIPs, nil
}
// podIPs are all the IPs associated with a pod, along with pod info
type podIPs struct {
cns.PodInfo
v4IP net.IP
v6IP net.IP
}
// GetNetworkContainerInternal gets network container details.
func (service *HTTPRestService) GetNetworkContainerInternal(
req cns.GetNetworkContainerRequest,
) (cns.GetNetworkContainerResponse, types.ResponseCode) {
getNetworkContainerResponses := service.getAllNetworkContainerResponses(req)
return getNetworkContainerResponses[0], getNetworkContainerResponses[0].Response.ReturnCode
}
// DeleteNetworkContainerInternal deletes a network container.
func (service *HTTPRestService) DeleteNetworkContainerInternal(
req cns.DeleteNetworkContainerRequest,
) types.ResponseCode {
ncid := req.NetworkContainerid
_, exist := service.getNetworkContainerDetails(ncid)
if !exist {
logger.Printf("network container for id %v doesn't exist", ncid)
return types.Success
}
service.Lock()
defer service.Unlock()
if service.state.ContainerStatus != nil {
delete(service.state.ContainerStatus, ncid)
}
if service.state.ContainerIDByOrchestratorContext != nil {
for orchestratorContext, networkContainerIDs := range service.state.ContainerIDByOrchestratorContext { //nolint:gocritic // copy is ok
if networkContainerIDs.Contains(ncid) {
networkContainerIDs.Delete(ncid)
if *networkContainerIDs == "" {
delete(service.state.ContainerIDByOrchestratorContext, orchestratorContext)
break
}
}
}
}
service.saveState()
return types.Success
}
func (service *HTTPRestService) MustEnsureNoStaleNCs(validNCIDs []string) {
valid := make(map[string]struct{})
for _, ncID := range validNCIDs {
valid[ncID] = struct{}{}
}
service.Lock()
defer service.Unlock()
ncIDToAssignedIPs := make(map[string][]cns.IPConfigurationStatus)
for _, ipInfo := range service.PodIPConfigState { // nolint:gocritic // copy is fine; it's a larger change to modify the map to hold pointers
if ipInfo.GetState() == types.Assigned {
ncIDToAssignedIPs[ipInfo.NCID] = append(ncIDToAssignedIPs[ipInfo.NCID], ipInfo)
}
}
mutated := false
for ncID := range service.state.ContainerStatus {
if _, ok := valid[ncID]; !ok {
// stale NCs with assigned IPs are an unexpected CNS state which we need to alert on.
if assignedIPs, hasAssignedIPs := ncIDToAssignedIPs[ncID]; hasAssignedIPs {
msg := fmt.Sprintf("Unexpected state: found stale NC ID %s in CNS state with %d assigned IPs: %+v", ncID, len(assignedIPs), assignedIPs)
logger.Errorf(msg)
panic(msg)
}
logger.Errorf("[Azure CNS] Found stale NC ID %s in CNS state. Removing...", ncID)
delete(service.state.ContainerStatus, ncID)
mutated = true
}
}
if mutated {
_ = service.saveState()
}
}
// This API will be called by CNS RequestController on CRD update.
func (service *HTTPRestService) CreateOrUpdateNetworkContainerInternal(req *cns.CreateNetworkContainerRequest) types.ResponseCode {
2020-07-16 12:51:11 +03:00
if req.NetworkContainerid == "" {
logger.Errorf("[Azure CNS] Error. NetworkContainerid is empty")
return types.NetworkContainerNotSpecified
2020-07-16 12:51:11 +03:00
}
// For now only RequestController uses this API which will be initialized only for AKS scenario.
// Validate ContainerType is set as Docker
if service.state.OrchestratorType != cns.KubernetesCRD && service.state.OrchestratorType != cns.Kubernetes {
2020-07-16 12:51:11 +03:00
logger.Errorf("[Azure CNS] Error. Unsupported OrchestratorType: %s", service.state.OrchestratorType)
return types.UnsupportedOrchestratorType
2020-07-16 12:51:11 +03:00
}
if req.NetworkContainerid == nodesubnet.NodeSubnetNCID {
// For NodeSubnet scenarios, Validate PrimaryCA must be empty
if req.IPConfiguration.IPSubnet.IPAddress != "" {
logger.Errorf("[Azure CNS] Error. PrimaryCA is invalid, NC Req: %v", req)
return types.InvalidPrimaryIPConfig
}
} else {
// For Swift scenarios, Validate PrimaryCA must never be empty
err := validateIPSubnet(req.IPConfiguration.IPSubnet)
if err != nil {
logger.Errorf("[Azure CNS] Error. PrimaryCA is invalid, NC Req: %v", req)
return types.InvalidPrimaryIPConfig
}
2020-07-17 08:22:10 +03:00
}
// Validate SecondaryIPConfig
for _, secIPConfig := range req.SecondaryIPConfigs {
2020-07-17 08:22:10 +03:00
// Validate Ipconfig
if secIPConfig.IPAddress == "" {
logger.Errorf("Failed to add IPConfig to state: %+v, empty IPSubnet.IPAddress", secIPConfig)
return types.InvalidSecondaryIPConfig
2020-07-17 08:22:10 +03:00
}
2020-07-16 12:51:11 +03:00
}
// Validate if state exists already
2020-07-23 03:11:41 +03:00
existingNCInfo, ok := service.getNetworkContainerDetails(req.NetworkContainerid)
2020-07-16 12:51:11 +03:00
if ok {
2020-07-23 03:11:41 +03:00
existingReq := existingNCInfo.CreateNetworkContainerRequest
if !reflect.DeepEqual(existingReq.IPConfiguration.IPSubnet, req.IPConfiguration.IPSubnet) {
logger.Errorf("[Azure CNS] Error. PrimaryCA is not same, NCId %s, old CA %s/%d, new CA %s/%d",
req.NetworkContainerid,
existingReq.IPConfiguration.IPSubnet.IPAddress,
existingReq.IPConfiguration.IPSubnet.PrefixLength,
req.IPConfiguration.IPSubnet.IPAddress,
req.IPConfiguration.IPSubnet.PrefixLength)
return types.PrimaryCANotSame
2020-07-16 12:51:11 +03:00
}
}
// This will Create Or Update the NC state.
returnCode, returnMessage := service.saveNetworkContainerGoalState(*req)
2020-07-16 12:51:11 +03:00
// If the NC was created successfully, log NC snapshot.
if returnCode == 0 {
logNCSnapshot(*req)
service.publishIPStateMetrics()
2020-07-16 12:51:11 +03:00
} else {
logger.Errorf(returnMessage)
}
if service.Options[common.OptProgramSNATIPTables] == true {
returnCode, returnMessage = service.programSNATRules(req)
if returnCode != 0 {
logger.Errorf(returnMessage)
}
}
return returnCode
}
func (service *HTTPRestService) SetVFForAccelnetNICs() error {
return service.setVFForAccelnetNICs()
}