Added IP Usage metrics at Rest server. (#1932)

* Added IP Usage metrics at Rest server.
This commit is contained in:
rsagasthya 2023-05-02 13:21:55 -07:00 коммит произвёл GitHub
Родитель 66acf016c6
Коммит 41f451a1e3
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 166 добавлений и 45 удалений

Просмотреть файл

@ -408,6 +408,11 @@ func (service *HTTPRestService) reserveIPAddress(w http.ResponseWriter, r *http.
Message: returnMessage,
}
if resp.ReturnCode == 0 {
// If Response is success i.e. code 0, then publish metrics.
publishIPStateMetrics(service.buildIPState())
}
reserveResp := &cns.ReserveIPAddressResponse{Response: resp, IPAddress: address}
err = service.Listener.Encode(w, &reserveResp)
logger.Response(service.Name, reserveResp, resp.ReturnCode, err)
@ -475,6 +480,11 @@ func (service *HTTPRestService) releaseIPAddress(w http.ResponseWriter, r *http.
Message: returnMessage,
}
if resp.ReturnCode == 0 {
// If Response is success i.e. code 0, then publish metrics.
publishIPStateMetrics(service.buildIPState())
}
err = service.Listener.Encode(w, &resp)
logger.Response(service.Name, resp, resp.ReturnCode, err)
}

Просмотреть файл

@ -395,6 +395,8 @@ func (service *HTTPRestService) CreateOrUpdateNetworkContainerInternal(req *cns.
// If the NC was created successfully, log NC snapshot.
if returnCode == 0 {
logNCSnapshot(*req)
publishIPStateMetrics(service.buildIPState())
} else {
logger.Errorf(returnMessage)
}

50
cns/restserver/ipusage.go Normal file
Просмотреть файл

@ -0,0 +1,50 @@
package restserver
import (
"github.com/Azure/azure-container-networking/cns/logger"
"github.com/Azure/azure-container-networking/cns/types"
)
type ipState struct {
// allocatedIPs are all the IPs given to CNS by DNC.
allocatedIPs int64
// assignedIPs are the IPs CNS gives to Pods.
assignedIPs int64
// availableIPs are the IPs in state "Available".
availableIPs int64
// programmingIPs are the IPs in state "PendingProgramming".
programmingIPs int64
// releasingIPs are the IPs in state "PendingReleasr".
releasingIPs int64
}
func (service *HTTPRestService) buildIPState() *ipState {
service.Lock()
defer service.Unlock()
state := ipState{
allocatedIPs: 0,
assignedIPs: 0,
availableIPs: 0,
}
//nolint:gocritic // This has to iterate over the IP Config state to get the counts.
for _, ipConfig := range service.PodIPConfigState {
state.allocatedIPs++
if ipConfig.GetState() == types.Assigned {
state.assignedIPs++
}
if ipConfig.GetState() == types.Available {
state.availableIPs++
}
if ipConfig.GetState() == types.PendingProgramming {
state.programmingIPs++
}
if ipConfig.GetState() == types.PendingRelease {
state.releasingIPs++
}
}
logger.Printf("[IP Usage] allocated IPs: %d, assigned IPs: %d, available IPs: %d", state.allocatedIPs, state.assignedIPs, state.availableIPs)
return &state
}

Просмотреть файл

@ -10,51 +10,98 @@ import (
"sigs.k8s.io/controller-runtime/pkg/metrics"
)
var httpRequestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_request_latency_seconds",
Help: "Request latency in seconds by endpoint, verb, and response code.",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
[]string{"url", "verb", "cns_return_code"},
const (
subnetLabel = "subnet"
subnetCIDRLabel = "subnet_cidr"
podnetARMIDLabel = "podnet_arm_id"
cnsReturnCode = "cns_return_code"
customerMetricLabel = "customer_metric"
customerMetricLabelValue = "customer metric"
)
var ipAssignmentLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "ip_assignment_latency_seconds",
Help: "Pod IP assignment latency in seconds",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
)
var ipConfigStatusStateTransitionTime = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "ipconfigstatus_state_transition_seconds",
Help: "Time spent by the IP Configuration Status in each state transition",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
[]string{"previous_state", "next_state"},
)
var syncHostNCVersionCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "sync_host_nc_version_total",
Help: "Count of Sync Host NC by success or failure",
},
[]string{"ok"},
)
var syncHostNCVersionLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "sync_host_nc_version_latency_seconds",
Help: "Sync Host NC Latency",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
[]string{"ok"},
var (
httpRequestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_request_latency_seconds",
Help: "Request latency in seconds by endpoint, verb, and response code.",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
[]string{"url", "verb", "cns_return_code"},
)
ipAssignmentLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "ip_assignment_latency_seconds",
Help: "Pod IP assignment latency in seconds",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
)
ipConfigStatusStateTransitionTime = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "ipconfigstatus_state_transition_seconds",
Help: "Time spent by the IP Configuration Status in each state transition",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
[]string{"previous_state", "next_state"},
)
syncHostNCVersionCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "sync_host_nc_version_total",
Help: "Count of Sync Host NC by success or failure",
},
[]string{"ok"},
)
syncHostNCVersionLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "sync_host_nc_version_latency_seconds",
Help: "Sync Host NC Latency",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
[]string{"ok"},
)
allocatedIPCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "cx_allocated_ips_v2",
Help: "Count of IPs CNS has Allocated",
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
},
[]string{},
)
assignedIPCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "cx_assigned_ips_v2",
Help: "Count of IPs CNS has Assigned to Pods",
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
},
[]string{},
)
availableIPCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "cx_available_ips_v2",
Help: "Count of IPs Available",
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
},
[]string{},
)
pendingProgrammingIPCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "cx_pending_programming_ips_v2",
Help: "Count of IPs in Pending Programming State",
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
},
[]string{},
)
pendingReleaseIPCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "cx_pending_release_ips_v2",
Help: "Count of IPs in Pending Release State",
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
},
[]string{},
)
)
func init() {
@ -64,11 +111,14 @@ func init() {
ipConfigStatusStateTransitionTime,
syncHostNCVersionCount,
syncHostNCVersionLatency,
allocatedIPCount,
assignedIPCount,
availableIPCount,
pendingProgrammingIPCount,
pendingReleaseIPCount,
)
}
const cnsReturnCode = "Cns-Return-Code"
// Every http response is 200 so we really want cns response code.
// Hard tto do with middleware unless we derserialize the responses but making it an explit header works around it.
// if that doesn't work we could have a separate countervec just for response codes.
@ -91,3 +141,12 @@ func stateTransitionMiddleware(i *cns.IPConfigurationStatus, s types.IPState) {
}
ipConfigStatusStateTransitionTime.WithLabelValues(string(i.GetState()), string(s)).Observe(time.Since(i.LastStateTransition).Seconds())
}
func publishIPStateMetrics(state *ipState) {
labels := []string{} // TODO. ragasthya Add dimensions to the IP Usage metrics.
allocatedIPCount.WithLabelValues(labels...).Set(float64(state.allocatedIPs))
assignedIPCount.WithLabelValues(labels...).Set(float64(state.assignedIPs))
availableIPCount.WithLabelValues(labels...).Set(float64(state.availableIPs))
pendingProgrammingIPCount.WithLabelValues(labels...).Set(float64(state.programmingIPs))
pendingReleaseIPCount.WithLabelValues(labels...).Set(float64(state.releasingIPs))
}