* fix: [Linux] [NPM] handle #2977 for netpols without cidrs Signed-off-by: Hunter Gregory <42728408+huntergregory@users.noreply.github.com> * fix: lock and no need to track policy key Signed-off-by: Hunter Gregory <42728408+huntergregory@users.noreply.github.com> * style: remove dead code Signed-off-by: Hunter Gregory <42728408+huntergregory@users.noreply.github.com> --------- Signed-off-by: Hunter Gregory <42728408+huntergregory@users.noreply.github.com>
This commit is contained in:
Родитель
47b4329b06
Коммит
60ddd3531a
|
@ -1,3 +1,5 @@
|
|||
vendor/
|
||||
|
||||
# Binaries
|
||||
out/*
|
||||
output/*
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/Azure/azure-container-networking/common"
|
||||
|
@ -18,12 +19,12 @@ import (
|
|||
const (
|
||||
reconcileDuration = time.Duration(5 * time.Minute)
|
||||
|
||||
contextBackground = "BACKGROUND"
|
||||
contextApplyDP = "APPLY-DP"
|
||||
contextAddNetPol = "ADD-NETPOL"
|
||||
contextAddNetPolBootup = "BOOTUP-ADD-NETPOL"
|
||||
contextAddNetPolCIDRPrecaution = "ADD-NETPOL-CIDR-PRECAUTION"
|
||||
contextDelNetPol = "DEL-NETPOL"
|
||||
contextBackground = "BACKGROUND"
|
||||
contextApplyDP = "APPLY-DP"
|
||||
contextAddNetPol = "ADD-NETPOL"
|
||||
contextAddNetPolBootup = "BOOTUP-ADD-NETPOL"
|
||||
contextAddNetPolPrecaution = "ADD-NETPOL-PRECAUTION"
|
||||
contextDelNetPol = "DEL-NETPOL"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -48,6 +49,11 @@ type Config struct {
|
|||
*policies.PolicyManagerCfg
|
||||
}
|
||||
|
||||
type removePolicyInfo struct {
|
||||
sync.Mutex
|
||||
previousRemovePolicyIPSetsFailed bool
|
||||
}
|
||||
|
||||
type DataPlane struct {
|
||||
*Config
|
||||
applyInBackground bool
|
||||
|
@ -64,7 +70,10 @@ type DataPlane struct {
|
|||
endpointQuery *endpointQuery
|
||||
applyInfo *applyInfo
|
||||
netPolQueue *netPolQueue
|
||||
stopChannel <-chan struct{}
|
||||
// removePolicyInfo tracks when a policy was removed yet had ApplyIPSet failures.
|
||||
// This field is only relevant for Linux.
|
||||
removePolicyInfo removePolicyInfo
|
||||
stopChannel <-chan struct{}
|
||||
}
|
||||
|
||||
func NewDataPlane(nodeName string, ioShim *common.IOShim, cfg *Config, stopChannel <-chan struct{}) (*DataPlane, error) {
|
||||
|
@ -335,6 +344,9 @@ func (dp *DataPlane) applyDataPlaneNow(context string) error {
|
|||
}
|
||||
klog.Infof("[DataPlane] [ApplyDataPlane] [%s] finished applying ipsets", context)
|
||||
|
||||
// see comment in RemovePolicy() for why this is here
|
||||
dp.setRemovePolicyFailure(false)
|
||||
|
||||
if dp.applyInBackground {
|
||||
dp.applyInfo.Lock()
|
||||
dp.applyInfo.numBatches = 0
|
||||
|
@ -472,26 +484,17 @@ func (dp *DataPlane) addPolicies(netPols []*policies.NPMNetworkPolicy) error {
|
|||
}
|
||||
}
|
||||
|
||||
if !util.IsWindowsDP() {
|
||||
for _, netPol := range netPols {
|
||||
if !(netPol.HasCIDRRules() && dp.ipsetMgr.PreviousApplyFailed()) {
|
||||
continue
|
||||
}
|
||||
|
||||
if inBootupPhase {
|
||||
// this should never happen because bootup phase is for windows, but just in case, we don't want to applyDataplaneNow() or else there will be a deadlock on dp.applyInfo
|
||||
msg := fmt.Sprintf("[DataPlane] [%s] at risk of improperly applying a CIDR policy which is removed then readded", contextAddNetPolCIDRPrecaution)
|
||||
klog.Warning(msg)
|
||||
metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, msg)
|
||||
break
|
||||
}
|
||||
|
||||
if dp.hadRemovePolicyFailure() {
|
||||
if inBootupPhase {
|
||||
// this should never happen because bootup phase is for windows, but just in case, we don't want to applyDataplaneNow() or else there will be a deadlock on dp.applyInfo
|
||||
msg := fmt.Sprintf("[DataPlane] [%s] at risk of improperly applying a policy which is removed then readded", contextAddNetPolPrecaution)
|
||||
klog.Warning(msg)
|
||||
metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, msg)
|
||||
} else {
|
||||
// prevent #2977
|
||||
if err := dp.applyDataPlaneNow(contextAddNetPolCIDRPrecaution); err != nil {
|
||||
if err := dp.applyDataPlaneNow(contextAddNetPolPrecaution); err != nil {
|
||||
return err // nolint:wrapcheck // unnecessary to wrap error since the provided context is included in the error
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -531,6 +534,9 @@ func (dp *DataPlane) addPolicies(netPols []*policies.NPMNetworkPolicy) error {
|
|||
}
|
||||
klog.Infof("[DataPlane] [%s] finished applying ipsets", contextAddNetPolBootup)
|
||||
|
||||
// see comment in RemovePolicy() for why this is here
|
||||
dp.setRemovePolicyFailure(false)
|
||||
|
||||
dp.applyInfo.numBatches = 0
|
||||
}
|
||||
|
||||
|
@ -627,7 +633,16 @@ func (dp *DataPlane) RemovePolicy(policyKey string) error {
|
|||
return err
|
||||
}
|
||||
|
||||
return dp.applyDataPlaneNow(contextApplyDP)
|
||||
if err := dp.applyDataPlaneNow(contextDelNetPol); err != nil {
|
||||
// Failed to apply IPSets while removing this policy.
|
||||
// Consider this removepolicy call a failure until apply IPSets is successful.
|
||||
// Related to #2977
|
||||
klog.Info("[DataPlane] remove policy has failed to apply ipsets. setting remove policy failure")
|
||||
dp.setRemovePolicyFailure(true)
|
||||
return err // nolint:wrapcheck // unnecessary to wrap error since the provided context is included in the error
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdatePolicy takes in updated policy object, calculates the delta and applies changes
|
||||
|
@ -749,3 +764,23 @@ func (dp *DataPlane) deleteIPSetsAndReferences(sets []*ipsets.TranslatedIPSet, n
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (dp *DataPlane) setRemovePolicyFailure(failed bool) {
|
||||
if util.IsWindowsDP() {
|
||||
return
|
||||
}
|
||||
|
||||
dp.removePolicyInfo.Lock()
|
||||
defer dp.removePolicyInfo.Unlock()
|
||||
dp.removePolicyInfo.previousRemovePolicyIPSetsFailed = failed
|
||||
}
|
||||
|
||||
func (dp *DataPlane) hadRemovePolicyFailure() bool {
|
||||
if util.IsWindowsDP() {
|
||||
return false
|
||||
}
|
||||
|
||||
dp.removePolicyInfo.Lock()
|
||||
defer dp.removePolicyInfo.Unlock()
|
||||
return dp.removePolicyInfo.previousRemovePolicyIPSetsFailed
|
||||
}
|
||||
|
|
|
@ -79,13 +79,6 @@ func NewIPSetManager(iMgrCfg *IPSetManagerCfg, ioShim *common.IOShim) *IPSetMana
|
|||
}
|
||||
}
|
||||
|
||||
// PreviousApplyFailed is only relevant for Linux right now since Windows doesn't track failures
|
||||
func (iMgr *IPSetManager) PreviousApplyFailed() bool {
|
||||
iMgr.Lock()
|
||||
defer iMgr.Unlock()
|
||||
return iMgr.consecutiveApplyFailures > 0
|
||||
}
|
||||
|
||||
/*
|
||||
Reconcile removes empty/unreferenced sets from the cache.
|
||||
For ApplyAllIPSets mode, those sets are added to the toDeleteCache.
|
||||
|
|
Загрузка…
Ссылка в новой задаче