fix: generate cni conflist if NCs already exist in state (#1940)

* fix: generate cni conflist if NCs already exist in state

* fix: lint

* fix: data race in test

* fix: lint in tests
This commit is contained in:
Matthew Long 2023-05-04 16:29:25 -07:00 коммит произвёл GitHub
Родитель b9861e2582
Коммит 7f223f2b49
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 224 добавлений и 11 удалений

Просмотреть файл

@ -164,7 +164,13 @@ func (service *HTTPRestService) SyncHostNCVersion(ctx context.Context, channelMo
service.Lock()
defer service.Unlock()
start := time.Now()
err := service.syncHostNCVersion(ctx, channelMode)
programmedNCCount, err := service.syncHostNCVersion(ctx, channelMode)
// even if we get an error, we want to write the CNI conflist if we have any NC programmed to any version
if programmedNCCount > 0 {
// This will only be done once per lifetime of the CNS process. This function is threadsafe and will panic
// if it fails, so it is safe to call in a non-preemptable goroutine.
go service.MustGenerateCNIConflistOnce()
}
if err != nil {
logger.Errorf("sync host error %v", err)
}
@ -174,8 +180,13 @@ func (service *HTTPRestService) SyncHostNCVersion(ctx context.Context, channelMo
var errNonExistentContainerStatus = errors.New("nonExistantContainerstatus")
func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMode string) error {
// syncHostVersion updates the CNS state with the latest programmed versions of NCs attached to the VM. If any NC in local CNS state
// does not match the version that DNC claims to have published, this function will call NMAgent and list the latest programmed versions of
// all NCs and update the CNS state accordingly. This function returns the the total number of NCs on this VM that have been programmed to
// some version, NOT the number of NCs that are up-to-date.
func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMode string) (int, error) {
outdatedNCs := map[string]struct{}{}
programmedNCs := map[string]struct{}{}
for idx := range service.state.ContainerStatus {
// Will open a separate PR to convert all the NC version related variable to int. Change from string to int is a pain.
localNCVersion, err := strconv.Atoi(service.state.ContainerStatus[idx].HostVersion)
@ -194,13 +205,17 @@ func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMo
} else if localNCVersion > dncNCVersion {
logger.Errorf("NC version from NMAgent is larger than DNC, NC version from NMAgent is %d, NC version from DNC is %d", localNCVersion, dncNCVersion)
}
if localNCVersion > -1 {
programmedNCs[service.state.ContainerStatus[idx].ID] = struct{}{}
}
}
if len(outdatedNCs) == 0 {
return nil
return len(programmedNCs), nil
}
ncVersionListResp, err := service.nma.GetNCVersionList(ctx)
if err != nil {
return errors.Wrap(err, "failed to get nc version list from nmagent")
return len(programmedNCs), errors.Wrap(err, "failed to get nc version list from nmagent")
}
nmaNCs := map[string]string{}
@ -223,8 +238,13 @@ func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMo
if !exist {
// if we marked this NC as needs update, but it no longer exists in internal state when we reach
// this point, our internal state has changed unexpectedly and we should bail out and try again.
return errors.Wrapf(errNonExistentContainerStatus, "can't find NC with ID %s in service state, stop updating this host NC version", ncID)
return len(programmedNCs), errors.Wrapf(errNonExistentContainerStatus, "can't find NC with ID %s in service state, stop updating this host NC version", ncID)
}
// if the NC still exists in state and is programmed to some version (doesn't have to be latest), add it to our set of NCs that have been programmed
if nmaNCVersion > -1 {
programmedNCs[ncID] = struct{}{}
}
localNCVersion, err := strconv.Atoi(ncInfo.HostVersion)
if err != nil {
logger.Errorf("failed to parse host nc version string %s: %s", ncInfo.HostVersion, err)
@ -247,14 +267,10 @@ func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMo
// if we didn't empty out the needs update set, NMA has not programmed all the NCs we are expecting, and we
// need to return an error indicating that
if len(outdatedNCs) > 0 {
return errors.Errorf("unabled to update some NCs: %v, missing or bad response from NMA", outdatedNCs)
return len(programmedNCs), errors.Errorf("unabled to update some NCs: %v, missing or bad response from NMA", outdatedNCs)
}
// if NMA has programmed all the NCs that we expect, we should write the CNI conflist. This will only be done
// once per lifetime of the CNS process. This function is threadsafe and will panic if it fails, so it is safe
// to call in a non-preemptable goroutine.
go service.MustGenerateCNIConflistOnce()
return nil
return len(programmedNCs), nil
}
// This API will be called by CNS RequestController on CRD update.

Просмотреть файл

@ -9,7 +9,9 @@ import (
"os"
"reflect"
"strconv"
"sync"
"testing"
"time"
"github.com/Azure/azure-container-networking/cns"
"github.com/Azure/azure-container-networking/cns/fakes"
@ -17,6 +19,8 @@ import (
"github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha"
nma "github.com/Azure/azure-container-networking/nmagent"
"github.com/google/uuid"
"github.com/pkg/errors"
"github.com/stretchr/testify/assert"
)
const (
@ -656,3 +660,196 @@ func restartService() {
os.Exit(1)
}
}
type mockCNIConflistGenerator struct {
generatedCount int
mutex sync.Mutex
}
func (*mockCNIConflistGenerator) Close() error { return nil }
func (m *mockCNIConflistGenerator) Generate() error {
m.mutex.Lock()
defer m.mutex.Unlock()
m.generatedCount++
return nil
}
func (m *mockCNIConflistGenerator) getGeneratedCount() int {
m.mutex.Lock()
defer m.mutex.Unlock()
return m.generatedCount
}
// TestCNIConflistGenerationNewNC tests that discovering a new programmed NC in CNS state will trigger CNI conflist generation
func TestCNIConflistGenerationNewNC(t *testing.T) {
ncID := "some-new-nc" //nolint:goconst // value not shared across tests, can change without issue
mockgen := &mockCNIConflistGenerator{}
service := &HTTPRestService{
cniConflistGenerator: mockgen,
state: &httpRestServiceState{
ContainerStatus: map[string]containerstatus{
ncID: {
ID: ncID,
HostVersion: "-1",
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
Version: "0",
},
},
},
},
nma: &fakes.NMAgentClientFake{
GetNCVersionListF: func(_ context.Context) (nma.NCVersionList, error) {
return nma.NCVersionList{
Containers: []nma.NCVersion{
{
NetworkContainerID: ncID,
Version: "0",
},
},
}, nil
},
},
}
service.SyncHostNCVersion(context.Background(), cns.CRD)
// CNI conflist gen happens in goroutine so sleep for a second to let it run
time.Sleep(time.Second)
assert.Equal(t, 1, mockgen.getGeneratedCount())
}
// TestCNIConflistGenerationExistingNC tests that if the CNS starts up with a NC already in its state, it will still generate the conflist
func TestCNIConflistGenerationExistingNC(t *testing.T) {
ncID := "some-existing-nc" //nolint:goconst // value not shared across tests, can change without issue
mockgen := &mockCNIConflistGenerator{}
service := &HTTPRestService{
cniConflistGenerator: mockgen,
state: &httpRestServiceState{
ContainerStatus: map[string]containerstatus{
ncID: {
ID: ncID,
HostVersion: "0",
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
Version: "0",
},
},
},
},
}
service.SyncHostNCVersion(context.Background(), cns.CRD)
// CNI conflist gen happens in goroutine so sleep for a second to let it run
time.Sleep(time.Second)
assert.Equal(t, 1, mockgen.getGeneratedCount())
}
// TestCNIConflistGenerationNewNCTwice tests that discovering a new programmed NC in CNS state will trigger CNI conflist generation, but syncing
// the host NC version a second time does not regenerate the conflist (conflist should only get generated once per binary lifetime)
func TestCNIConflistGenerationNewNCTwice(t *testing.T) {
ncID := "some-new-nc" //nolint:goconst // value not shared across tests, can change without issue
mockgen := &mockCNIConflistGenerator{}
service := &HTTPRestService{
cniConflistGenerator: mockgen,
state: &httpRestServiceState{
ContainerStatus: map[string]containerstatus{
ncID: {
ID: ncID,
HostVersion: "-1",
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
Version: "0",
},
},
},
},
nma: &fakes.NMAgentClientFake{
GetNCVersionListF: func(_ context.Context) (nma.NCVersionList, error) {
return nma.NCVersionList{
Containers: []nma.NCVersion{
{
NetworkContainerID: ncID,
Version: "0",
},
},
}, nil
},
},
}
service.SyncHostNCVersion(context.Background(), cns.CRD)
// CNI conflist gen happens in goroutine so sleep for a second to let it run
time.Sleep(time.Second)
assert.Equal(t, 1, mockgen.getGeneratedCount())
service.SyncHostNCVersion(context.Background(), cns.CRD)
// CNI conflist gen happens in goroutine so sleep for a second to let it run
time.Sleep(time.Second)
assert.Equal(t, 1, mockgen.getGeneratedCount()) // should still be one
}
// TestCNIConflistNotGenerated tests that the cni conflist is not generated if no NCs are programmed
func TestCNIConflistNotGenerated(t *testing.T) {
newNCID := "some-new-nc" //nolint:goconst // value not shared across tests, can change without issue
mockgen := &mockCNIConflistGenerator{}
service := &HTTPRestService{
cniConflistGenerator: mockgen,
state: &httpRestServiceState{
ContainerStatus: map[string]containerstatus{
newNCID: {
ID: newNCID,
HostVersion: "-1",
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
Version: "0",
},
},
},
},
nma: &fakes.NMAgentClientFake{
GetNCVersionListF: func(_ context.Context) (nma.NCVersionList, error) {
return nma.NCVersionList{}, nil
},
},
}
service.SyncHostNCVersion(context.Background(), cns.CRD)
// CNI conflist gen happens in goroutine so sleep for a second to let it run
time.Sleep(time.Second)
assert.Equal(t, 0, mockgen.getGeneratedCount())
}
// TestCNIConflistGenerationOnNMAError tests that the cni conflist is generated as long as we have at least one programmed NC even if
// the call to NMA to list NCs fails
func TestCNIConflistGenerationOnNMAError(t *testing.T) {
newNCID := "some-new-nc" //nolint:goconst // value not shared across tests, can change without issue
existingNCID := "some-existing-nc" //nolint:goconst // value not shared across tests, can change without issue
mockgen := &mockCNIConflistGenerator{}
service := &HTTPRestService{
cniConflistGenerator: mockgen,
state: &httpRestServiceState{
ContainerStatus: map[string]containerstatus{
newNCID: {
ID: newNCID,
HostVersion: "-1",
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
Version: "0",
},
},
existingNCID: {
ID: existingNCID,
HostVersion: "0",
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
Version: "0",
},
},
},
},
nma: &fakes.NMAgentClientFake{
GetNCVersionListF: func(_ context.Context) (nma.NCVersionList, error) {
return nma.NCVersionList{}, errors.New("some nma error")
},
},
}
service.SyncHostNCVersion(context.Background(), cns.CRD)
// CNI conflist gen happens in goroutine so sleep for a second to let it run
time.Sleep(time.Second)
assert.Equal(t, 1, mockgen.getGeneratedCount())
}