fix: use cached ctrlruntime client in IPAM pool monitor (#2043)
Signed-off-by: Evan Baker <rbtr@users.noreply.github.com>
This commit is contained in:
Родитель
6325924bf1
Коммит
97fdf81f89
|
@ -485,6 +485,10 @@ func (service *HTTPRestService) handleDebugPodContext(w http.ResponseWriter, r *
|
|||
func (service *HTTPRestService) handleDebugRestData(w http.ResponseWriter, r *http.Request) {
|
||||
service.RLock()
|
||||
defer service.RUnlock()
|
||||
if service.IPAMPoolMonitor == nil {
|
||||
http.Error(w, "not ready", http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
resp := GetHTTPServiceDataResponse{
|
||||
HTTPRestServiceData: HTTPRestServiceData{
|
||||
PodIPIDByPodInterfaceKey: service.PodIPIDByPodInterfaceKey,
|
||||
|
|
|
@ -1147,20 +1147,12 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
|
|||
if err != nil {
|
||||
return errors.Wrap(err, "failed to create ctrl client")
|
||||
}
|
||||
nnccli := nodenetworkconfig.NewClient(directcli)
|
||||
directnnccli := nodenetworkconfig.NewClient(directcli)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to create NNC client")
|
||||
}
|
||||
// TODO(rbtr): nodename and namespace should be in the cns config
|
||||
scopedcli := nncctrl.NewScopedClient(nnccli, types.NamespacedName{Namespace: "kube-system", Name: nodeName})
|
||||
|
||||
clusterSubnetStateChan := make(chan v1alpha1.ClusterSubnetState)
|
||||
// initialize the ipam pool monitor
|
||||
poolOpts := ipampool.Options{
|
||||
RefreshDelay: poolIPAMRefreshRateInMilliseconds * time.Millisecond,
|
||||
}
|
||||
poolMonitor := ipampool.NewMonitor(httpRestServiceImplementation, scopedcli, clusterSubnetStateChan, &poolOpts)
|
||||
httpRestServiceImplementation.IPAMPoolMonitor = poolMonitor
|
||||
directscopedcli := nncctrl.NewScopedClient(directnnccli, types.NamespacedName{Namespace: "kube-system", Name: nodeName})
|
||||
|
||||
logger.Printf("Reconciling initial CNS state")
|
||||
// apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for
|
||||
|
@ -1170,7 +1162,7 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
|
|||
err = retry.Do(func() error {
|
||||
attempt++
|
||||
logger.Printf("reconciling initial CNS state attempt: %d", attempt)
|
||||
err = reconcileInitialCNSState(ctx, scopedcli, httpRestServiceImplementation, podInfoByIPProvider)
|
||||
err = reconcileInitialCNSState(ctx, directscopedcli, httpRestServiceImplementation, podInfoByIPProvider)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to reconcile initial CNS state, attempt: %d err: %v", attempt, err)
|
||||
}
|
||||
|
@ -1181,16 +1173,6 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
|
|||
}
|
||||
logger.Printf("reconciled initial CNS state after %d attempts", attempt)
|
||||
|
||||
// start the pool Monitor before the Reconciler, since it needs to be ready to receive an
|
||||
// NodeNetworkConfig update by the time the Reconciler tries to send it.
|
||||
go func() {
|
||||
logger.Printf("Starting IPAM Pool Monitor")
|
||||
if e := poolMonitor.Start(ctx); e != nil {
|
||||
logger.Errorf("[Azure CNS] Failed to start pool monitor with err: %v", e)
|
||||
}
|
||||
}()
|
||||
logger.Printf("initialized and started IPAM pool monitor")
|
||||
|
||||
// the nodeScopedCache sets Selector options on the Manager cache which are used
|
||||
// to perform *server-side* filtering of the cached objects. This is very important
|
||||
// for high node/pod count clusters, as it keeps us from watching objects at the
|
||||
|
@ -1220,6 +1202,25 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
|
|||
return errors.Wrap(err, "failed to create manager")
|
||||
}
|
||||
|
||||
// Build the IPAM Pool monitor
|
||||
clusterSubnetStateChan := make(chan v1alpha1.ClusterSubnetState)
|
||||
|
||||
// this cachedscopedclient is built using the Manager's cached client, which is
|
||||
// NOT SAFE TO USE UNTIL THE MANAGER IS STARTED!
|
||||
// This is okay because it is only used to build the IPAMPoolMonitor, which does not
|
||||
// attempt to use the client until it has received a NodeNetworkConfig to update, and
|
||||
// that can only happen once the Manager has started and the NodeNetworkConfig
|
||||
// reconciler has pushed the Monitor a NodeNetworkConfig.
|
||||
cachedscopedcli := nncctrl.NewScopedClient(nodenetworkconfig.NewClient(manager.GetClient()), types.NamespacedName{Namespace: "kube-system", Name: nodeName})
|
||||
|
||||
poolOpts := ipampool.Options{
|
||||
RefreshDelay: poolIPAMRefreshRateInMilliseconds * time.Millisecond,
|
||||
}
|
||||
poolMonitor := ipampool.NewMonitor(httpRestServiceImplementation, cachedscopedcli, clusterSubnetStateChan, &poolOpts)
|
||||
httpRestServiceImplementation.IPAMPoolMonitor = poolMonitor
|
||||
|
||||
// Start building the NNC Reconciler
|
||||
|
||||
// get our Node so that we can xref it against the NodeNetworkConfig's to make sure that the
|
||||
// NNC is not stale and represents the Node we're running on.
|
||||
node, err := clientset.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
|
||||
|
@ -1252,6 +1253,16 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
|
|||
httpRestServiceImplementation.RegisterPProfEndpoints()
|
||||
}
|
||||
|
||||
// start the pool Monitor before the Reconciler, since it needs to be ready to receive an
|
||||
// NodeNetworkConfig update by the time the Reconciler tries to send it.
|
||||
go func() {
|
||||
logger.Printf("Starting IPAM Pool Monitor")
|
||||
if e := poolMonitor.Start(ctx); e != nil {
|
||||
logger.Errorf("[Azure CNS] Failed to start pool monitor with err: %v", e)
|
||||
}
|
||||
}()
|
||||
logger.Printf("initialized and started IPAM pool monitor")
|
||||
|
||||
// Start the Manager which starts the reconcile loop.
|
||||
// The Reconciler will send an initial NodeNetworkConfig update to the PoolMonitor, starting the
|
||||
// Monitor's internal loop.
|
||||
|
|
Загрузка…
Ссылка в новой задаче