azure-container-networking/cns/ipampool/metrics/observer.go

200 строки
7.3 KiB
Go

package metrics
import (
"context"
"fmt"
"net/netip"
"github.com/Azure/azure-container-networking/cns"
"github.com/Azure/azure-container-networking/cns/types"
"github.com/Azure/azure-container-networking/crd/clustersubnetstate/api/v1alpha1"
"github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha"
"github.com/pkg/errors"
"golang.org/x/sync/errgroup"
)
// Subnet ARM ID /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/$(SUBNET)
const subnetARMIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/virtualNetworks/%s/subnets/%s"
// ipPoolState is the current actual state of the CNS IP pool.
type ipPoolState struct {
// allocatedToPods are the IPs CNS gives to Pods.
allocatedToPods int64
// available are the IPs in state "Available".
available int64
// currentAvailableIPs are the current available IPs: allocated - assigned - pendingRelease.
currentAvailableIPs int64
// expectedAvailableIPs are the "future" available IPs, if the requested IP count is honored: requested - assigned.
expectedAvailableIPs int64
// pendingProgramming are the IPs in state "PendingProgramming".
pendingProgramming int64
// pendingRelease are the IPs in state "PendingRelease".
pendingRelease int64
// requestedIPs are the IPs CNS has requested that it be allocated by DNC.
requestedIPs int64
// secondaryIPs are all the IPs given to CNS by DNC, not including the primary IP of the NC.
secondaryIPs int64
}
// metaState is the Monitor's configuration state for the IP pool.
type metaState struct {
batch int64
exhausted bool
max int64
primaryIPAddresses map[string]struct{}
subnet string
subnetARMID string
subnetCIDR string
}
type observer struct {
ipSrc func() map[string]cns.IPConfigurationStatus
nncSrc func(context.Context) (*v1alpha.NodeNetworkConfig, error)
cssSrc func(context.Context) ([]v1alpha1.ClusterSubnetState, error)
}
// NewLegacyMetricsObserver creates a closed functional scope which can be invoked to
// observe the legacy IPAM pool metrics.
//
//nolint:lll // ignore line length
func NewLegacyMetricsObserver(ipSrc func() map[string]cns.IPConfigurationStatus, nncSrc func(context.Context) (*v1alpha.NodeNetworkConfig, error), cssSrc func(context.Context) ([]v1alpha1.ClusterSubnetState, error)) func(context.Context) error {
return (&observer{
ipSrc: ipSrc,
nncSrc: nncSrc,
cssSrc: cssSrc,
}).observeMetrics
}
// generateARMID uses the Subnet ARM ID format to populate the ARM ID with the metadata.
// If either of the metadata attributes are empty, then the ARM ID will be an empty string.
func generateARMID(nc *v1alpha.NetworkContainer) string {
subscription := nc.SubscriptionID
resourceGroup := nc.ResourceGroupID
vnetID := nc.VNETID
subnetID := nc.SubnetID
if subscription == "" || resourceGroup == "" || vnetID == "" || subnetID == "" {
return ""
}
return fmt.Sprintf(subnetARMIDTemplate, subscription, resourceGroup, vnetID, subnetID)
}
// observeMetrics observes the IP pool and updates the metrics. Blocking.
//
//nolint:lll // ignore line length
func (o *observer) observeMetrics(ctx context.Context) error {
// The error group is used to allow individual metrics sources to fail without
// failing out the entire attempt to observe the Pool. This may happen if there is a
// transient issue with the source of the data, or if the source is not available
// (like if the CRD is not installed).
var g errgroup.Group
// Get the current state of world.
var meta metaState
g.Go(func() error {
// Try to fetch the ClusterSubnetState, if available.
if o.cssSrc != nil {
csslist, err := o.cssSrc(ctx)
if err != nil {
return err
}
for i := range csslist {
if csslist[i].Status.Exhausted {
meta.exhausted = true
break
}
}
}
return nil
})
var state ipPoolState
g.Go(func() error {
// Try to fetch the NodeNetworkConfig, if available.
if o.nncSrc != nil {
nnc, err := o.nncSrc(ctx)
if err != nil {
return err
}
if len(nnc.Status.NetworkContainers) > 0 {
// Set SubnetName, SubnetAddressSpace and Pod Network ARM ID values to the global subnet, subnetCIDR and subnetARM variables.
meta.subnet = nnc.Status.NetworkContainers[0].SubnetName
meta.subnetCIDR = nnc.Status.NetworkContainers[0].SubnetAddressSpace
meta.subnetARMID = generateARMID(&nnc.Status.NetworkContainers[0])
}
meta.primaryIPAddresses = make(map[string]struct{})
// Add Primary IP to Map, if not present.
// This is only for Swift i.e. if NC Type is vnet.
for i := 0; i < len(nnc.Status.NetworkContainers); i++ {
nc := nnc.Status.NetworkContainers[i]
if nc.Type == "" || nc.Type == v1alpha.VNET {
meta.primaryIPAddresses[nc.PrimaryIP] = struct{}{}
}
if nc.Type == v1alpha.VNETBlock {
primaryPrefix, err := netip.ParsePrefix(nc.PrimaryIP)
if err != nil {
return errors.Wrapf(err, "unable to parse ip prefix: %s", nc.PrimaryIP)
}
meta.primaryIPAddresses[primaryPrefix.Addr().String()] = struct{}{}
}
}
state.requestedIPs = nnc.Spec.RequestedIPCount
meta.batch = nnc.Status.Scaler.BatchSize
meta.max = nnc.Status.Scaler.MaxIPCount
}
return nil
})
g.Go(func() error {
// Try to fetch the IPConfigurations, if available.
if o.ipSrc != nil {
ips := o.ipSrc()
state.secondaryIPs = int64(len(ips))
for i := range ips {
ip := ips[i]
switch ip.GetState() {
case types.Assigned:
state.allocatedToPods++
case types.Available:
state.available++
case types.PendingProgramming:
state.pendingProgramming++
case types.PendingRelease:
state.pendingRelease++
}
}
}
return nil
})
err := g.Wait()
state.currentAvailableIPs = state.secondaryIPs - state.allocatedToPods - state.pendingRelease
state.expectedAvailableIPs = state.requestedIPs - state.allocatedToPods
// Update the metrics.
labels := []string{meta.subnet, meta.subnetCIDR, meta.subnetARMID}
IpamAllocatedIPCount.WithLabelValues(labels...).Set(float64(state.allocatedToPods))
IpamAvailableIPCount.WithLabelValues(labels...).Set(float64(state.available))
IpamBatchSize.WithLabelValues(labels...).Set(float64(meta.batch))
IpamCurrentAvailableIPcount.WithLabelValues(labels...).Set(float64(state.currentAvailableIPs))
IpamExpectedAvailableIPCount.WithLabelValues(labels...).Set(float64(state.expectedAvailableIPs))
IpamMaxIPCount.WithLabelValues(labels...).Set(float64(meta.max))
IpamPendingProgramIPCount.WithLabelValues(labels...).Set(float64(state.pendingProgramming))
IpamPendingReleaseIPCount.WithLabelValues(labels...).Set(float64(state.pendingRelease))
IpamPrimaryIPCount.WithLabelValues(labels...).Set(float64(len(meta.primaryIPAddresses)))
IpamRequestedIPConfigCount.WithLabelValues(labels...).Set(float64(state.requestedIPs))
IpamSecondaryIPCount.WithLabelValues(labels...).Set(float64(state.secondaryIPs))
IpamTotalIPCount.WithLabelValues(labels...).Set(float64(state.secondaryIPs + int64(len(meta.primaryIPAddresses))))
if meta.exhausted {
IpamSubnetExhaustionState.WithLabelValues(labels...).Set(float64(SubnetIPExhausted))
} else {
IpamSubnetExhaustionState.WithLabelValues(labels...).Set(float64(SubnetIPNotExhausted))
}
if err != nil {
return errors.Wrap(err, "failed to collect all metrics")
}
return nil
}