2024-09-04 19:01:25 +03:00
|
|
|
package metrics
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"net/netip"
|
|
|
|
|
|
|
|
"github.com/Azure/azure-container-networking/cns"
|
|
|
|
"github.com/Azure/azure-container-networking/cns/types"
|
|
|
|
"github.com/Azure/azure-container-networking/crd/clustersubnetstate/api/v1alpha1"
|
|
|
|
"github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha"
|
|
|
|
"github.com/pkg/errors"
|
2024-10-01 23:47:51 +03:00
|
|
|
"golang.org/x/sync/errgroup"
|
2024-09-04 19:01:25 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
// Subnet ARM ID /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/$(SUBNET)
|
|
|
|
const subnetARMIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/virtualNetworks/%s/subnets/%s"
|
|
|
|
|
|
|
|
// ipPoolState is the current actual state of the CNS IP pool.
|
|
|
|
type ipPoolState struct {
|
|
|
|
// allocatedToPods are the IPs CNS gives to Pods.
|
|
|
|
allocatedToPods int64
|
|
|
|
// available are the IPs in state "Available".
|
|
|
|
available int64
|
|
|
|
// currentAvailableIPs are the current available IPs: allocated - assigned - pendingRelease.
|
|
|
|
currentAvailableIPs int64
|
|
|
|
// expectedAvailableIPs are the "future" available IPs, if the requested IP count is honored: requested - assigned.
|
|
|
|
expectedAvailableIPs int64
|
|
|
|
// pendingProgramming are the IPs in state "PendingProgramming".
|
|
|
|
pendingProgramming int64
|
|
|
|
// pendingRelease are the IPs in state "PendingRelease".
|
|
|
|
pendingRelease int64
|
|
|
|
// requestedIPs are the IPs CNS has requested that it be allocated by DNC.
|
|
|
|
requestedIPs int64
|
|
|
|
// secondaryIPs are all the IPs given to CNS by DNC, not including the primary IP of the NC.
|
|
|
|
secondaryIPs int64
|
|
|
|
}
|
|
|
|
|
|
|
|
// metaState is the Monitor's configuration state for the IP pool.
|
|
|
|
type metaState struct {
|
|
|
|
batch int64
|
|
|
|
exhausted bool
|
|
|
|
max int64
|
|
|
|
primaryIPAddresses map[string]struct{}
|
|
|
|
subnet string
|
|
|
|
subnetARMID string
|
|
|
|
subnetCIDR string
|
|
|
|
}
|
|
|
|
|
2024-10-01 23:47:51 +03:00
|
|
|
type observer struct {
|
|
|
|
ipSrc func() map[string]cns.IPConfigurationStatus
|
|
|
|
nncSrc func(context.Context) (*v1alpha.NodeNetworkConfig, error)
|
|
|
|
cssSrc func(context.Context) ([]v1alpha1.ClusterSubnetState, error)
|
|
|
|
}
|
|
|
|
|
2024-09-04 19:01:25 +03:00
|
|
|
// NewLegacyMetricsObserver creates a closed functional scope which can be invoked to
|
|
|
|
// observe the legacy IPAM pool metrics.
|
|
|
|
//
|
|
|
|
//nolint:lll // ignore line length
|
2024-10-01 23:47:51 +03:00
|
|
|
func NewLegacyMetricsObserver(ipSrc func() map[string]cns.IPConfigurationStatus, nncSrc func(context.Context) (*v1alpha.NodeNetworkConfig, error), cssSrc func(context.Context) ([]v1alpha1.ClusterSubnetState, error)) func(context.Context) error {
|
|
|
|
return (&observer{
|
|
|
|
ipSrc: ipSrc,
|
|
|
|
nncSrc: nncSrc,
|
|
|
|
cssSrc: cssSrc,
|
|
|
|
}).observeMetrics
|
2024-09-04 19:01:25 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// generateARMID uses the Subnet ARM ID format to populate the ARM ID with the metadata.
|
|
|
|
// If either of the metadata attributes are empty, then the ARM ID will be an empty string.
|
|
|
|
func generateARMID(nc *v1alpha.NetworkContainer) string {
|
|
|
|
subscription := nc.SubscriptionID
|
|
|
|
resourceGroup := nc.ResourceGroupID
|
|
|
|
vnetID := nc.VNETID
|
|
|
|
subnetID := nc.SubnetID
|
|
|
|
|
|
|
|
if subscription == "" || resourceGroup == "" || vnetID == "" || subnetID == "" {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
return fmt.Sprintf(subnetARMIDTemplate, subscription, resourceGroup, vnetID, subnetID)
|
|
|
|
}
|
|
|
|
|
|
|
|
// observeMetrics observes the IP pool and updates the metrics. Blocking.
|
|
|
|
//
|
|
|
|
//nolint:lll // ignore line length
|
2024-10-01 23:47:51 +03:00
|
|
|
func (o *observer) observeMetrics(ctx context.Context) error {
|
|
|
|
// The error group is used to allow individual metrics sources to fail without
|
|
|
|
// failing out the entire attempt to observe the Pool. This may happen if there is a
|
|
|
|
// transient issue with the source of the data, or if the source is not available
|
|
|
|
// (like if the CRD is not installed).
|
|
|
|
var g errgroup.Group
|
2024-09-04 19:01:25 +03:00
|
|
|
|
2024-10-01 23:47:51 +03:00
|
|
|
// Get the current state of world.
|
2024-09-04 19:01:25 +03:00
|
|
|
var meta metaState
|
2024-10-01 23:47:51 +03:00
|
|
|
g.Go(func() error {
|
|
|
|
// Try to fetch the ClusterSubnetState, if available.
|
|
|
|
if o.cssSrc != nil {
|
|
|
|
csslist, err := o.cssSrc(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
for i := range csslist {
|
|
|
|
if csslist[i].Status.Exhausted {
|
|
|
|
meta.exhausted = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2024-09-04 19:01:25 +03:00
|
|
|
}
|
2024-10-01 23:47:51 +03:00
|
|
|
return nil
|
|
|
|
})
|
2024-09-04 19:01:25 +03:00
|
|
|
|
2024-10-01 23:47:51 +03:00
|
|
|
var state ipPoolState
|
|
|
|
g.Go(func() error {
|
|
|
|
// Try to fetch the NodeNetworkConfig, if available.
|
|
|
|
if o.nncSrc != nil {
|
|
|
|
nnc, err := o.nncSrc(ctx)
|
2024-09-04 19:01:25 +03:00
|
|
|
if err != nil {
|
2024-10-01 23:47:51 +03:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
if len(nnc.Status.NetworkContainers) > 0 {
|
|
|
|
// Set SubnetName, SubnetAddressSpace and Pod Network ARM ID values to the global subnet, subnetCIDR and subnetARM variables.
|
|
|
|
meta.subnet = nnc.Status.NetworkContainers[0].SubnetName
|
|
|
|
meta.subnetCIDR = nnc.Status.NetworkContainers[0].SubnetAddressSpace
|
|
|
|
meta.subnetARMID = generateARMID(&nnc.Status.NetworkContainers[0])
|
|
|
|
}
|
|
|
|
meta.primaryIPAddresses = make(map[string]struct{})
|
|
|
|
// Add Primary IP to Map, if not present.
|
|
|
|
// This is only for Swift i.e. if NC Type is vnet.
|
|
|
|
for i := 0; i < len(nnc.Status.NetworkContainers); i++ {
|
|
|
|
nc := nnc.Status.NetworkContainers[i]
|
|
|
|
if nc.Type == "" || nc.Type == v1alpha.VNET {
|
|
|
|
meta.primaryIPAddresses[nc.PrimaryIP] = struct{}{}
|
|
|
|
}
|
|
|
|
|
|
|
|
if nc.Type == v1alpha.VNETBlock {
|
|
|
|
primaryPrefix, err := netip.ParsePrefix(nc.PrimaryIP)
|
|
|
|
if err != nil {
|
|
|
|
return errors.Wrapf(err, "unable to parse ip prefix: %s", nc.PrimaryIP)
|
|
|
|
}
|
|
|
|
meta.primaryIPAddresses[primaryPrefix.Addr().String()] = struct{}{}
|
|
|
|
}
|
2024-09-04 19:01:25 +03:00
|
|
|
}
|
2024-10-01 23:47:51 +03:00
|
|
|
state.requestedIPs = nnc.Spec.RequestedIPCount
|
|
|
|
meta.batch = nnc.Status.Scaler.BatchSize
|
|
|
|
meta.max = nnc.Status.Scaler.MaxIPCount
|
2024-09-04 19:01:25 +03:00
|
|
|
}
|
2024-10-01 23:47:51 +03:00
|
|
|
return nil
|
|
|
|
})
|
2024-09-04 19:01:25 +03:00
|
|
|
|
2024-10-01 23:47:51 +03:00
|
|
|
g.Go(func() error {
|
|
|
|
// Try to fetch the IPConfigurations, if available.
|
|
|
|
if o.ipSrc != nil {
|
|
|
|
ips := o.ipSrc()
|
|
|
|
state.secondaryIPs = int64(len(ips))
|
|
|
|
for i := range ips {
|
|
|
|
ip := ips[i]
|
|
|
|
switch ip.GetState() {
|
|
|
|
case types.Assigned:
|
|
|
|
state.allocatedToPods++
|
|
|
|
case types.Available:
|
|
|
|
state.available++
|
|
|
|
case types.PendingProgramming:
|
|
|
|
state.pendingProgramming++
|
|
|
|
case types.PendingRelease:
|
|
|
|
state.pendingRelease++
|
|
|
|
}
|
|
|
|
}
|
2024-09-04 19:01:25 +03:00
|
|
|
}
|
2024-10-01 23:47:51 +03:00
|
|
|
return nil
|
|
|
|
})
|
|
|
|
|
|
|
|
err := g.Wait()
|
|
|
|
|
2024-09-04 19:01:25 +03:00
|
|
|
state.currentAvailableIPs = state.secondaryIPs - state.allocatedToPods - state.pendingRelease
|
|
|
|
state.expectedAvailableIPs = state.requestedIPs - state.allocatedToPods
|
|
|
|
|
2024-10-01 23:47:51 +03:00
|
|
|
// Update the metrics.
|
2024-09-04 19:01:25 +03:00
|
|
|
labels := []string{meta.subnet, meta.subnetCIDR, meta.subnetARMID}
|
|
|
|
IpamAllocatedIPCount.WithLabelValues(labels...).Set(float64(state.allocatedToPods))
|
|
|
|
IpamAvailableIPCount.WithLabelValues(labels...).Set(float64(state.available))
|
|
|
|
IpamBatchSize.WithLabelValues(labels...).Set(float64(meta.batch))
|
|
|
|
IpamCurrentAvailableIPcount.WithLabelValues(labels...).Set(float64(state.currentAvailableIPs))
|
|
|
|
IpamExpectedAvailableIPCount.WithLabelValues(labels...).Set(float64(state.expectedAvailableIPs))
|
|
|
|
IpamMaxIPCount.WithLabelValues(labels...).Set(float64(meta.max))
|
|
|
|
IpamPendingProgramIPCount.WithLabelValues(labels...).Set(float64(state.pendingProgramming))
|
|
|
|
IpamPendingReleaseIPCount.WithLabelValues(labels...).Set(float64(state.pendingRelease))
|
|
|
|
IpamPrimaryIPCount.WithLabelValues(labels...).Set(float64(len(meta.primaryIPAddresses)))
|
|
|
|
IpamRequestedIPConfigCount.WithLabelValues(labels...).Set(float64(state.requestedIPs))
|
|
|
|
IpamSecondaryIPCount.WithLabelValues(labels...).Set(float64(state.secondaryIPs))
|
|
|
|
IpamTotalIPCount.WithLabelValues(labels...).Set(float64(state.secondaryIPs + int64(len(meta.primaryIPAddresses))))
|
|
|
|
if meta.exhausted {
|
|
|
|
IpamSubnetExhaustionState.WithLabelValues(labels...).Set(float64(SubnetIPExhausted))
|
|
|
|
} else {
|
|
|
|
IpamSubnetExhaustionState.WithLabelValues(labels...).Set(float64(SubnetIPNotExhausted))
|
|
|
|
}
|
2024-10-01 23:47:51 +03:00
|
|
|
if err != nil {
|
|
|
|
return errors.Wrap(err, "failed to collect all metrics")
|
|
|
|
}
|
2024-09-04 19:01:25 +03:00
|
|
|
return nil
|
|
|
|
}
|