CNS: Fix IPAM scaler down to handle incremental decrease (#731)

This commit is contained in:
neaggarwMS 2020-11-16 16:47:28 -08:00 коммит произвёл GitHub
Родитель f0907b4e82
Коммит 28207fc493
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 112 добавлений и 20 удалений

Просмотреть файл

@ -135,17 +135,19 @@ func (ipm *IPStateManager) MarkIPsAsPending(numberOfIPsToMark int) (map[string]c
defer ipm.Unlock()
var (
err error
pendingRelease []cns.IPConfigurationStatus
err error
)
pendingRelease := make(map[string]cns.IPConfigurationStatus)
defer func() {
// if there was an error, and not all ip's have been freed, restore state
if err != nil && len(pendingRelease) != numberOfIPsToMark {
for i := range pendingRelease {
ipm.AvailableIPIDStack.Push(pendingRelease[i].ID)
ipm.AvailableIPConfigState[pendingRelease[i].ID] = pendingRelease[i]
delete(ipm.PendingReleaseIPConfigState, pendingRelease[i].ID)
for uuid, ipState := range pendingRelease {
ipState.State = cns.Available
ipm.AvailableIPIDStack.Push(pendingRelease[uuid].ID)
ipm.AvailableIPConfigState[pendingRelease[uuid].ID] = ipState
delete(ipm.PendingReleaseIPConfigState, pendingRelease[uuid].ID)
}
}
}()
@ -157,7 +159,10 @@ func (ipm *IPStateManager) MarkIPsAsPending(numberOfIPsToMark int) (map[string]c
}
// add all pending release to a slice
pendingRelease = append(pendingRelease, ipm.AvailableIPConfigState[id])
ipConfig := ipm.AvailableIPConfigState[id]
ipConfig.State = cns.PendingRelease
pendingRelease[id] = ipConfig
delete(ipm.AvailableIPConfigState, id)
}
@ -166,7 +171,7 @@ func (ipm *IPStateManager) MarkIPsAsPending(numberOfIPsToMark int) (map[string]c
ipm.PendingReleaseIPConfigState[pendingRelease[i].ID] = pendingRelease[i]
}
return ipm.PendingReleaseIPConfigState, nil
return pendingRelease, nil
}
type HTTPServiceFake struct {

Просмотреть файл

@ -107,12 +107,12 @@ func (pm *CNSIPAMPoolMonitor) increasePoolSize() error {
pm.cachedNNC.Spec.RequestedIPCount += pm.scalarUnits.BatchSize
// pass nil map to CNStoCRDSpec because we don't want to modify the to be deleted ipconfigs
pm.cachedNNC.Spec, err = MarkIPsAsPendingInCRD(nil, pm.cachedNNC.Spec.RequestedIPCount)
pm.cachedNNC.Spec, err = pm.createNNCSpecForCRD(false)
if err != nil {
return err
}
logger.Printf("[ipam-pool-monitor] Increasing pool size, Current Pool Size: %v, Requested IP Count: %v, Pods with IP's:%v", len(pm.cns.GetPodIPConfigState()), pm.cachedNNC.Spec.RequestedIPCount, len(pm.cns.GetAllocatedIPConfigs()))
logger.Printf("[ipam-pool-monitor] Increasing pool size, Current Pool Size: %v, Requested IP Count: %v, Pods with IP's:%v, ToBeDeleted Count: %v", len(pm.cns.GetPodIPConfigState()), pm.cachedNNC.Spec.RequestedIPCount, len(pm.cns.GetAllocatedIPConfigs()), len(pm.cachedNNC.Spec.IPsNotInUse))
return pm.rc.UpdateCRDSpec(context.Background(), pm.cachedNNC.Spec)
}
@ -124,18 +124,20 @@ func (pm *CNSIPAMPoolMonitor) decreasePoolSize() error {
pm.cachedNNC.Spec.RequestedIPCount -= pm.scalarUnits.BatchSize
// mark n number of IP's as pending
pendingIPAddresses, err := pm.cns.MarkIPsAsPending(int(pm.scalarUnits.BatchSize))
pendingIpAddresses, err := pm.cns.MarkIPsAsPending(int(pm.scalarUnits.BatchSize))
if err != nil {
return err
}
logger.Printf("[ipam-pool-monitor] Updated Requested count %v, Releasing ips: %+v", pm.cachedNNC.Spec.RequestedIPCount, pendingIpAddresses)
// convert the pending IP addresses to a spec
pm.cachedNNC.Spec, err = MarkIPsAsPendingInCRD(pendingIPAddresses, pm.cachedNNC.Spec.RequestedIPCount)
pm.cachedNNC.Spec, err = pm.createNNCSpecForCRD(false)
if err != nil {
return err
}
pm.pendingRelease = true
logger.Printf("[ipam-pool-monitor] Decreasing pool size, Current Pool Size: %v, Requested IP Count: %v, Pods with IP's: %v", len(pm.cns.GetPodIPConfigState()), pm.cachedNNC.Spec.RequestedIPCount, len(pm.cns.GetAllocatedIPConfigs()))
logger.Printf("[ipam-pool-monitor] Decreasing pool size, Current Pool Size: %v, Requested IP Count: %v, Pods with IP's: %v, ToBeDeleted Count: %v", len(pm.cns.GetPodIPConfigState()), pm.cachedNNC.Spec.RequestedIPCount, len(pm.cns.GetAllocatedIPConfigs()), len(pm.cachedNNC.Spec.IPsNotInUse))
return pm.rc.UpdateCRDSpec(context.Background(), pm.cachedNNC.Spec)
}
@ -146,7 +148,7 @@ func (pm *CNSIPAMPoolMonitor) cleanPendingRelease() error {
defer pm.mu.Unlock()
var err error
pm.cachedNNC.Spec, err = MarkIPsAsPendingInCRD(nil, pm.cachedNNC.Spec.RequestedIPCount)
pm.cachedNNC.Spec, err = pm.createNNCSpecForCRD(true)
if err != nil {
logger.Printf("[ipam-pool-monitor] Failed to translate ")
}
@ -156,19 +158,22 @@ func (pm *CNSIPAMPoolMonitor) cleanPendingRelease() error {
}
// CNSToCRDSpec translates CNS's map of Ips to be released and requested ip count into a CRD Spec
func MarkIPsAsPendingInCRD(toBeDeletedSecondaryIPConfigs map[string]cns.IPConfigurationStatus, ipCount int64) (nnc.NodeNetworkConfigSpec, error) {
func (pm *CNSIPAMPoolMonitor) createNNCSpecForCRD(resetNotInUseList bool) (nnc.NodeNetworkConfigSpec, error) {
var (
spec nnc.NodeNetworkConfigSpec
uuid string
)
spec.RequestedIPCount = ipCount
// DUpdate the count from cached spec
spec.RequestedIPCount = pm.cachedNNC.Spec.RequestedIPCount
if toBeDeletedSecondaryIPConfigs == nil {
// Discard the ToBeDeleted list if requested. This happens if DNC has cleaned up the pending ips and CNS has also updated its state.
if resetNotInUseList == true {
spec.IPsNotInUse = make([]string, 0)
} else {
for uuid = range toBeDeletedSecondaryIPConfigs {
spec.IPsNotInUse = append(spec.IPsNotInUse, uuid)
// Get All Pending IPs from CNS and populate it again.
pendingIps := pm.cns.GetPendingReleaseIPConfigs()
for _, pendingIp := range pendingIps {
spec.IPsNotInUse = append(spec.IPsNotInUse, pendingIp.ID)
}
}

Просмотреть файл

@ -313,3 +313,85 @@ func TestPoolSizeDecreaseWhenDecreaseHasAlreadyBeenRequested(t *testing.T) {
t.Fatalf("Expected IP's not in use to be 0 after reconcile, expected %v, actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse))
}
}
func TestPoolSizeDecreaseToReallyLow(t *testing.T) {
var (
batchSize = 10
initialIPConfigCount = 30
requestThresholdPercent = 30
releaseThresholdPercent = 100
)
fakecns, fakerc, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent)
log.Printf("Min free IP's %v", poolmonitor.MinimumFreeIps)
log.Printf("Max free IP %v", poolmonitor.MaximumFreeIps)
// initial pool count is 30, set 23 of them to be allocated
err := fakecns.SetNumberOfAllocatedIPs(23)
if err != nil {
t.Error(err)
}
// Pool monitor does nothing, as the current number of IP's falls in the threshold
err = poolmonitor.Reconcile()
if err != nil {
t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err)
}
// Now Drop the Allocated count to really low, say 3. This should trigger release in 2 batches
err = fakecns.SetNumberOfAllocatedIPs(3)
if err != nil {
t.Error(err)
}
// Pool monitor does nothing, as the current number of IP's falls in the threshold
t.Logf("Reconcile after Allocated count from 33 -> 3, Exepected free count = 10")
err = poolmonitor.Reconcile()
if err != nil {
t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err)
}
// Ensure the size of the requested spec is still the same
if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != batchSize {
t.Fatalf("Expected IP's not in use is not correct, expected %v, actual %v", batchSize, len(poolmonitor.cachedNNC.Spec.IPsNotInUse))
}
// Ensure the request ipcount is now one batch size smaller than the inital IP count
if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount-batchSize) {
t.Fatalf("Expected pool size to be one batch size smaller after reconcile, expected %v, actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse))
}
// Reconcile again, it should release the second batch
t.Logf("Reconcile again - 2, Exepected free count = 20")
err = poolmonitor.Reconcile()
if err != nil {
t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err)
}
// Ensure the size of the requested spec is still the same
if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != batchSize*2 {
t.Fatalf("Expected IP's not in use is not correct, expected %v, actual %v", batchSize*2, len(poolmonitor.cachedNNC.Spec.IPsNotInUse))
}
// Ensure the request ipcount is now one batch size smaller than the inital IP count
if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount-(batchSize*2)) {
t.Fatalf("Expected pool size to be one batch size smaller after reconcile, expected %v, actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse))
}
t.Logf("Update Request Controller")
err = fakerc.Reconcile()
if err != nil {
t.Error(err)
}
err = poolmonitor.Reconcile()
if err != nil {
t.Errorf("Expected no pool monitor failure after request controller reconcile: %v", err)
}
// Ensure the spec doesn't have any IPsNotInUse after request controller has reconciled
if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != 0 {
t.Fatalf("Expected IP's not in use to be 0 after reconcile, expected %v, actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse))
}
}