azure-container-networking/cns/metric/pool.go

110 строки
3.3 KiB
Go

package metric
import (
"strconv"
"time"
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)
// IP pool scaling requests go through the NodeNetworkConfig CRD,
// meaning they are asynchronous and eventually consistent, and have
// no guarantees on the order they are processed in. It's currently
// impractical to include an identifier with these requests that could
// be used to correlate the request with the result.
//
// The public functions in this file are a way to get some (dirty, incomplete)
// data on the amount of time it takes from when we request more or less IPs
// to when we receive an updated NNC CRD with more or less IPs.
//
// We get this data by recording a start time when we push an alloc/dealloc
// request via the NNC, and then popping any alloc/dealloc start times that
// we have saved as soon we get an updated NNC.
// If we have not received a new NNC, multiple requests to start an
// alloc/dealloc timer will noop - we only record the longest span between
// pushing an NNC spec and receiving an updated NNC status.
//
// This data will be left-skewed: if there is no in-flight alloc or dealloc,
// and we queue one right before receiving an NNC status update, we have no
// way to decorrelate that update with the in-flight requests and will record
// a very short response latency.
var incLatency prometheus.ObserverVec = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Buckets: prometheus.ExponentialBuckets(0.05, 2, 15), //nolint:gomnd // 50 ms to ~800 seconds
Help: "IP pool size increase latency in seconds by batch size",
Name: "ip_pool_inc_latency_seconds",
},
[]string{"batch"},
)
var decLatency prometheus.ObserverVec = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Buckets: prometheus.ExponentialBuckets(0.05, 2, 15), //nolint:gomnd // 50 ms to ~800 seconds
Help: "IP pool size decrease latency in seconds by batch size",
Name: "ip_pool_dec_latency_seconds",
},
[]string{"batch"},
)
type scaleEvent struct {
start time.Time
batch int64
}
var (
incEvents = make(chan scaleEvent, 1)
decEvents = make(chan scaleEvent, 1)
)
// StartPoolIncreaseTimer records the start of an IP allocation request.
// If an IP allocation request is already in flight, this method noops.
func StartPoolIncreaseTimer(batch int64) {
e := scaleEvent{
start: time.Now(),
batch: batch,
}
select {
case incEvents <- e:
default:
}
}
// StartPoolDecreaseTimer records the start of an IP deallocation request.
// If an IP deallocation request is already in flight, this method noops.
func StartPoolDecreaseTimer(batch int64) {
e := scaleEvent{
start: time.Now(),
batch: batch,
}
select {
case decEvents <- e:
default:
}
}
// ObserverPoolScaleLatency records the elapsed interval since the oldest
// unobserved allocation and deallocation requests. If there are no recorded
// request starts, this method noops.
func ObserverPoolScaleLatency() {
select {
case e := <-incEvents:
incLatency.WithLabelValues(strconv.FormatInt(e.batch, 10)).Observe(time.Since(e.start).Seconds()) //nolint:gomnd // it's decimal
default:
}
select {
case e := <-decEvents:
decLatency.WithLabelValues(strconv.FormatInt(e.batch, 10)).Observe(time.Since(e.start).Seconds()) //nolint:gomnd // it's decimal
default:
}
}
func init() {
metrics.Registry.MustRegister(
incLatency,
decLatency,
)
}