add metric for tracking failure to start the controller-runtime manager (#1860)
Signed-off-by: Evan Baker <rbtr@users.noreply.github.com>
This commit is contained in:
Родитель
b77f715274
Коммит
ae8a11c7c8
|
@ -1260,7 +1260,8 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
|
||||||
if err := manager.Start(ctx); err != nil {
|
if err := manager.Start(ctx); err != nil {
|
||||||
logger.Errorf("[Azure CNS] Failed to start request controller: %v", err)
|
logger.Errorf("[Azure CNS] Failed to start request controller: %v", err)
|
||||||
// retry to start the request controller
|
// retry to start the request controller
|
||||||
// todo: add a CNS metric to count # of failures
|
// inc the managerStartFailures metric for failure tracking
|
||||||
|
managerStartFailures.Inc()
|
||||||
} else {
|
} else {
|
||||||
logger.Printf("exiting NodeNetworkConfig reconciler")
|
logger.Printf("exiting NodeNetworkConfig reconciler")
|
||||||
return
|
return
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"sigs.k8s.io/controller-runtime/pkg/metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
// managerStartFailures is a monotic counter which tracks the number of times the controller-runtime
|
||||||
|
// manager failed to start. To drive alerting based on this metric, it is recommended to use the rate
|
||||||
|
// of increase over a period of time. A positive rate of change indicates that the CNS is actively
|
||||||
|
// failing and retrying.
|
||||||
|
var managerStartFailures = prometheus.NewCounter(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: "manager_start_failures_total",
|
||||||
|
Help: "Number of times the controller-runtime manager failed to start.",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
metrics.Registry.MustRegister(
|
||||||
|
managerStartFailures,
|
||||||
|
)
|
||||||
|
}
|
Загрузка…
Ссылка в новой задаче