Added IP Usage metrics at Rest server. (#1932)
* Added IP Usage metrics at Rest server.
This commit is contained in:
Родитель
66acf016c6
Коммит
41f451a1e3
|
@ -408,6 +408,11 @@ func (service *HTTPRestService) reserveIPAddress(w http.ResponseWriter, r *http.
|
|||
Message: returnMessage,
|
||||
}
|
||||
|
||||
if resp.ReturnCode == 0 {
|
||||
// If Response is success i.e. code 0, then publish metrics.
|
||||
publishIPStateMetrics(service.buildIPState())
|
||||
}
|
||||
|
||||
reserveResp := &cns.ReserveIPAddressResponse{Response: resp, IPAddress: address}
|
||||
err = service.Listener.Encode(w, &reserveResp)
|
||||
logger.Response(service.Name, reserveResp, resp.ReturnCode, err)
|
||||
|
@ -475,6 +480,11 @@ func (service *HTTPRestService) releaseIPAddress(w http.ResponseWriter, r *http.
|
|||
Message: returnMessage,
|
||||
}
|
||||
|
||||
if resp.ReturnCode == 0 {
|
||||
// If Response is success i.e. code 0, then publish metrics.
|
||||
publishIPStateMetrics(service.buildIPState())
|
||||
}
|
||||
|
||||
err = service.Listener.Encode(w, &resp)
|
||||
logger.Response(service.Name, resp, resp.ReturnCode, err)
|
||||
}
|
||||
|
|
|
@ -395,6 +395,8 @@ func (service *HTTPRestService) CreateOrUpdateNetworkContainerInternal(req *cns.
|
|||
// If the NC was created successfully, log NC snapshot.
|
||||
if returnCode == 0 {
|
||||
logNCSnapshot(*req)
|
||||
|
||||
publishIPStateMetrics(service.buildIPState())
|
||||
} else {
|
||||
logger.Errorf(returnMessage)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
package restserver
|
||||
|
||||
import (
|
||||
"github.com/Azure/azure-container-networking/cns/logger"
|
||||
"github.com/Azure/azure-container-networking/cns/types"
|
||||
)
|
||||
|
||||
type ipState struct {
|
||||
// allocatedIPs are all the IPs given to CNS by DNC.
|
||||
allocatedIPs int64
|
||||
// assignedIPs are the IPs CNS gives to Pods.
|
||||
assignedIPs int64
|
||||
// availableIPs are the IPs in state "Available".
|
||||
availableIPs int64
|
||||
// programmingIPs are the IPs in state "PendingProgramming".
|
||||
programmingIPs int64
|
||||
// releasingIPs are the IPs in state "PendingReleasr".
|
||||
releasingIPs int64
|
||||
}
|
||||
|
||||
func (service *HTTPRestService) buildIPState() *ipState {
|
||||
service.Lock()
|
||||
defer service.Unlock()
|
||||
|
||||
state := ipState{
|
||||
allocatedIPs: 0,
|
||||
assignedIPs: 0,
|
||||
availableIPs: 0,
|
||||
}
|
||||
|
||||
//nolint:gocritic // This has to iterate over the IP Config state to get the counts.
|
||||
for _, ipConfig := range service.PodIPConfigState {
|
||||
state.allocatedIPs++
|
||||
if ipConfig.GetState() == types.Assigned {
|
||||
state.assignedIPs++
|
||||
}
|
||||
if ipConfig.GetState() == types.Available {
|
||||
state.availableIPs++
|
||||
}
|
||||
if ipConfig.GetState() == types.PendingProgramming {
|
||||
state.programmingIPs++
|
||||
}
|
||||
if ipConfig.GetState() == types.PendingRelease {
|
||||
state.releasingIPs++
|
||||
}
|
||||
}
|
||||
|
||||
logger.Printf("[IP Usage] allocated IPs: %d, assigned IPs: %d, available IPs: %d", state.allocatedIPs, state.assignedIPs, state.availableIPs)
|
||||
return &state
|
||||
}
|
|
@ -10,51 +10,98 @@ import (
|
|||
"sigs.k8s.io/controller-runtime/pkg/metrics"
|
||||
)
|
||||
|
||||
var httpRequestLatency = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "http_request_latency_seconds",
|
||||
Help: "Request latency in seconds by endpoint, verb, and response code.",
|
||||
//nolint:gomnd // default bucket consts
|
||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
|
||||
},
|
||||
[]string{"url", "verb", "cns_return_code"},
|
||||
const (
|
||||
subnetLabel = "subnet"
|
||||
subnetCIDRLabel = "subnet_cidr"
|
||||
podnetARMIDLabel = "podnet_arm_id"
|
||||
cnsReturnCode = "cns_return_code"
|
||||
customerMetricLabel = "customer_metric"
|
||||
customerMetricLabelValue = "customer metric"
|
||||
)
|
||||
|
||||
var ipAssignmentLatency = prometheus.NewHistogram(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "ip_assignment_latency_seconds",
|
||||
Help: "Pod IP assignment latency in seconds",
|
||||
//nolint:gomnd // default bucket consts
|
||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
|
||||
},
|
||||
)
|
||||
|
||||
var ipConfigStatusStateTransitionTime = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "ipconfigstatus_state_transition_seconds",
|
||||
Help: "Time spent by the IP Configuration Status in each state transition",
|
||||
//nolint:gomnd // default bucket consts
|
||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
|
||||
},
|
||||
[]string{"previous_state", "next_state"},
|
||||
)
|
||||
|
||||
var syncHostNCVersionCount = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "sync_host_nc_version_total",
|
||||
Help: "Count of Sync Host NC by success or failure",
|
||||
},
|
||||
[]string{"ok"},
|
||||
)
|
||||
|
||||
var syncHostNCVersionLatency = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "sync_host_nc_version_latency_seconds",
|
||||
Help: "Sync Host NC Latency",
|
||||
//nolint:gomnd // default bucket consts
|
||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
|
||||
},
|
||||
[]string{"ok"},
|
||||
var (
|
||||
httpRequestLatency = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "http_request_latency_seconds",
|
||||
Help: "Request latency in seconds by endpoint, verb, and response code.",
|
||||
//nolint:gomnd // default bucket consts
|
||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
|
||||
},
|
||||
[]string{"url", "verb", "cns_return_code"},
|
||||
)
|
||||
ipAssignmentLatency = prometheus.NewHistogram(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "ip_assignment_latency_seconds",
|
||||
Help: "Pod IP assignment latency in seconds",
|
||||
//nolint:gomnd // default bucket consts
|
||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
|
||||
},
|
||||
)
|
||||
ipConfigStatusStateTransitionTime = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "ipconfigstatus_state_transition_seconds",
|
||||
Help: "Time spent by the IP Configuration Status in each state transition",
|
||||
//nolint:gomnd // default bucket consts
|
||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
|
||||
},
|
||||
[]string{"previous_state", "next_state"},
|
||||
)
|
||||
syncHostNCVersionCount = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "sync_host_nc_version_total",
|
||||
Help: "Count of Sync Host NC by success or failure",
|
||||
},
|
||||
[]string{"ok"},
|
||||
)
|
||||
syncHostNCVersionLatency = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "sync_host_nc_version_latency_seconds",
|
||||
Help: "Sync Host NC Latency",
|
||||
//nolint:gomnd // default bucket consts
|
||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
|
||||
},
|
||||
[]string{"ok"},
|
||||
)
|
||||
allocatedIPCount = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "cx_allocated_ips_v2",
|
||||
Help: "Count of IPs CNS has Allocated",
|
||||
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
|
||||
},
|
||||
[]string{},
|
||||
)
|
||||
assignedIPCount = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "cx_assigned_ips_v2",
|
||||
Help: "Count of IPs CNS has Assigned to Pods",
|
||||
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
|
||||
},
|
||||
[]string{},
|
||||
)
|
||||
availableIPCount = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "cx_available_ips_v2",
|
||||
Help: "Count of IPs Available",
|
||||
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
|
||||
},
|
||||
[]string{},
|
||||
)
|
||||
pendingProgrammingIPCount = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "cx_pending_programming_ips_v2",
|
||||
Help: "Count of IPs in Pending Programming State",
|
||||
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
|
||||
},
|
||||
[]string{},
|
||||
)
|
||||
pendingReleaseIPCount = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "cx_pending_release_ips_v2",
|
||||
Help: "Count of IPs in Pending Release State",
|
||||
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
|
||||
},
|
||||
[]string{},
|
||||
)
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
@ -64,11 +111,14 @@ func init() {
|
|||
ipConfigStatusStateTransitionTime,
|
||||
syncHostNCVersionCount,
|
||||
syncHostNCVersionLatency,
|
||||
allocatedIPCount,
|
||||
assignedIPCount,
|
||||
availableIPCount,
|
||||
pendingProgrammingIPCount,
|
||||
pendingReleaseIPCount,
|
||||
)
|
||||
}
|
||||
|
||||
const cnsReturnCode = "Cns-Return-Code"
|
||||
|
||||
// Every http response is 200 so we really want cns response code.
|
||||
// Hard tto do with middleware unless we derserialize the responses but making it an explit header works around it.
|
||||
// if that doesn't work we could have a separate countervec just for response codes.
|
||||
|
@ -91,3 +141,12 @@ func stateTransitionMiddleware(i *cns.IPConfigurationStatus, s types.IPState) {
|
|||
}
|
||||
ipConfigStatusStateTransitionTime.WithLabelValues(string(i.GetState()), string(s)).Observe(time.Since(i.LastStateTransition).Seconds())
|
||||
}
|
||||
|
||||
func publishIPStateMetrics(state *ipState) {
|
||||
labels := []string{} // TODO. ragasthya Add dimensions to the IP Usage metrics.
|
||||
allocatedIPCount.WithLabelValues(labels...).Set(float64(state.allocatedIPs))
|
||||
assignedIPCount.WithLabelValues(labels...).Set(float64(state.assignedIPs))
|
||||
availableIPCount.WithLabelValues(labels...).Set(float64(state.availableIPs))
|
||||
pendingProgrammingIPCount.WithLabelValues(labels...).Set(float64(state.programmingIPs))
|
||||
pendingReleaseIPCount.WithLabelValues(labels...).Set(float64(state.releasingIPs))
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче