зеркало из https://github.com/Azure/ARO-RP.git
add summary metric and dummy cache for reporting
This commit is contained in:
Родитель
01f13f2565
Коммит
32dfd1aaa2
|
@ -119,6 +119,10 @@ func (t ProvisioningState) IsTerminal() bool {
|
|||
return ProvisioningStateFailed == t || ProvisioningStateSucceeded == t
|
||||
}
|
||||
|
||||
func (t ProvisioningState) String() string {
|
||||
return string(t)
|
||||
}
|
||||
|
||||
// ClusterProfile represents a cluster profile.
|
||||
type ClusterProfile struct {
|
||||
MissingFields
|
||||
|
|
|
@ -35,7 +35,7 @@ func (mon *Monitor) emitAroOperatorConditions(ctx context.Context) error {
|
|||
"type": string(c.Type),
|
||||
})
|
||||
|
||||
if mon.logMessages {
|
||||
if mon.hourlyRun {
|
||||
mon.log.WithFields(logrus.Fields{
|
||||
"metric": "arooperator.conditions",
|
||||
"status": c.Status,
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
configv1 "github.com/openshift/api/config/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
func (mon *Monitor) getClusterVersion() (*configv1.ClusterVersion, error) {
|
||||
if mon.cache.cv != nil {
|
||||
return mon.cache.cv, nil
|
||||
}
|
||||
|
||||
var err error
|
||||
mon.cache.cv, err = mon.configcli.ConfigV1().ClusterVersions().Get("version", metav1.GetOptions{})
|
||||
return mon.cache.cv, err
|
||||
}
|
||||
|
||||
func (mon *Monitor) listClusterOperators() (*configv1.ClusterOperatorList, error) {
|
||||
if mon.cache.cos != nil {
|
||||
return mon.cache.cos, nil
|
||||
}
|
||||
|
||||
var err error
|
||||
mon.cache.cos, err = mon.configcli.ConfigV1().ClusterOperators().List(metav1.ListOptions{})
|
||||
return mon.cache.cos, err
|
||||
}
|
||||
|
||||
func (mon *Monitor) listNodes() (*v1.NodeList, error) {
|
||||
if mon.cache.ns != nil {
|
||||
return mon.cache.ns, nil
|
||||
}
|
||||
|
||||
var err error
|
||||
mon.cache.ns, err = mon.cli.CoreV1().Nodes().List(metav1.ListOptions{})
|
||||
return mon.cache.ns, err
|
||||
}
|
|
@ -10,9 +10,11 @@ import (
|
|||
"runtime"
|
||||
|
||||
"github.com/Azure/go-autorest/autorest/azure"
|
||||
configv1 "github.com/openshift/api/config/v1"
|
||||
configclient "github.com/openshift/client-go/config/clientset/versioned"
|
||||
mcoclient "github.com/openshift/machine-config-operator/pkg/generated/clientset/versioned"
|
||||
"github.com/sirupsen/logrus"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
|
||||
"github.com/Azure/ARO-RP/pkg/api"
|
||||
|
@ -23,9 +25,9 @@ import (
|
|||
)
|
||||
|
||||
type Monitor struct {
|
||||
env env.Interface
|
||||
log *logrus.Entry
|
||||
logMessages bool
|
||||
env env.Interface
|
||||
log *logrus.Entry
|
||||
hourlyRun bool
|
||||
|
||||
oc *api.OpenShiftCluster
|
||||
dims map[string]string
|
||||
|
@ -35,9 +37,16 @@ type Monitor struct {
|
|||
mcocli mcoclient.Interface
|
||||
m metrics.Interface
|
||||
arocli aroclient.AroV1alpha1Interface
|
||||
|
||||
// access below only via the helper functions in cache.go
|
||||
cache struct {
|
||||
cos *configv1.ClusterOperatorList
|
||||
cv *configv1.ClusterVersion
|
||||
ns *v1.NodeList
|
||||
}
|
||||
}
|
||||
|
||||
func NewMonitor(ctx context.Context, env env.Interface, log *logrus.Entry, oc *api.OpenShiftCluster, m metrics.Interface, logMessages bool) (*Monitor, error) {
|
||||
func NewMonitor(ctx context.Context, env env.Interface, log *logrus.Entry, oc *api.OpenShiftCluster, m metrics.Interface, hourlyRun bool) (*Monitor, error) {
|
||||
r, err := azure.ParseResourceID(oc.ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -84,9 +93,9 @@ func NewMonitor(ctx context.Context, env env.Interface, log *logrus.Entry, oc *a
|
|||
}
|
||||
|
||||
return &Monitor{
|
||||
env: env,
|
||||
log: log,
|
||||
logMessages: logMessages,
|
||||
env: env,
|
||||
log: log,
|
||||
hourlyRun: hourlyRun,
|
||||
|
||||
oc: oc,
|
||||
dims: dims,
|
||||
|
@ -126,6 +135,7 @@ func (mon *Monitor) Monitor(ctx context.Context) {
|
|||
mon.emitPodConditions,
|
||||
mon.emitReplicasetStatuses,
|
||||
mon.emitStatefulsetStatuses,
|
||||
mon.emitSummary,
|
||||
mon.emitPrometheusAlerts, // at the end for now because it's the slowest/least reliable
|
||||
} {
|
||||
err = f(ctx)
|
||||
|
|
|
@ -8,7 +8,6 @@ import (
|
|||
|
||||
configv1 "github.com/openshift/api/config/v1"
|
||||
"github.com/sirupsen/logrus"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
type clusterOperatorConditionsIgnoreStruct struct {
|
||||
|
@ -35,11 +34,10 @@ var clusterOperatorConditionsExpected = map[configv1.ClusterStatusConditionType]
|
|||
}
|
||||
|
||||
func (mon *Monitor) emitClusterOperatorConditions(ctx context.Context) error {
|
||||
cos, err := mon.configcli.ConfigV1().ClusterOperators().List(metav1.ListOptions{})
|
||||
cos, err := mon.listClusterOperators()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
mon.emitGauge("clusteroperator.count", int64(len(cos.Items)), nil)
|
||||
|
||||
for _, co := range cos.Items {
|
||||
|
@ -54,7 +52,7 @@ func (mon *Monitor) emitClusterOperatorConditions(ctx context.Context) error {
|
|||
"type": string(c.Type),
|
||||
})
|
||||
|
||||
if mon.logMessages {
|
||||
if mon.hourlyRun {
|
||||
mon.log.WithFields(logrus.Fields{
|
||||
"metric": "clusteroperator.conditions",
|
||||
"name": co.Name,
|
||||
|
|
|
@ -5,17 +5,15 @@ package cluster
|
|||
|
||||
import (
|
||||
"context"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
func (mon *Monitor) emitClusterOperatorVersions(ctx context.Context) error {
|
||||
cv, err := mon.configcli.ConfigV1().ClusterVersions().Get("version", metav1.GetOptions{})
|
||||
cv, err := mon.getClusterVersion()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cos, err := mon.configcli.ConfigV1().ClusterOperators().List(metav1.ListOptions{})
|
||||
cos, err := mon.listClusterOperators()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -8,7 +8,6 @@ import (
|
|||
|
||||
configv1 "github.com/openshift/api/config/v1"
|
||||
"github.com/sirupsen/logrus"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
var clusterVersionConditionsExpected = map[configv1.ClusterStatusConditionType]configv1.ConditionStatus{
|
||||
|
@ -19,7 +18,7 @@ var clusterVersionConditionsExpected = map[configv1.ClusterStatusConditionType]c
|
|||
}
|
||||
|
||||
func (mon *Monitor) emitClusterVersionConditions(ctx context.Context) error {
|
||||
cv, err := mon.configcli.ConfigV1().ClusterVersions().Get("version", metav1.GetOptions{})
|
||||
cv, err := mon.getClusterVersion()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -28,13 +27,12 @@ func (mon *Monitor) emitClusterVersionConditions(ctx context.Context) error {
|
|||
if c.Status == clusterVersionConditionsExpected[c.Type] {
|
||||
continue
|
||||
}
|
||||
|
||||
mon.emitGauge("clusterversion.conditions", 1, map[string]string{
|
||||
"status": string(c.Status),
|
||||
"type": string(c.Type),
|
||||
})
|
||||
|
||||
if mon.logMessages {
|
||||
if mon.hourlyRun {
|
||||
mon.log.WithFields(logrus.Fields{
|
||||
"metric": "clusterversion.conditions",
|
||||
"status": c.Status,
|
||||
|
|
|
@ -7,28 +7,15 @@ import (
|
|||
"context"
|
||||
|
||||
configv1 "github.com/openshift/api/config/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
func (mon *Monitor) emitClusterVersions(ctx context.Context) error {
|
||||
cv, err := mon.configcli.ConfigV1().ClusterVersions().Get("version", metav1.GetOptions{})
|
||||
cv, err := mon.getClusterVersion()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Find the actual current cluster state. The history is ordered by most
|
||||
// recent first, so find the latest "Completed" status to get current
|
||||
// cluster version
|
||||
var actualVersion string
|
||||
for _, history := range cv.Status.History {
|
||||
if history.State == configv1.CompletedUpdate {
|
||||
actualVersion = history.Version
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
mon.emitGauge("cluster.versions", 1, map[string]string{
|
||||
"actualVersion": actualVersion,
|
||||
"actualVersion": actualVersion(cv),
|
||||
"desiredVersion": desiredVersion(cv),
|
||||
"resourceProviderVersion": mon.oc.Properties.ProvisionedBy,
|
||||
})
|
||||
|
@ -36,6 +23,18 @@ func (mon *Monitor) emitClusterVersions(ctx context.Context) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// actualVersion finds the actual current cluster state. The history is ordered by most
|
||||
// recent first, so find the latest "Completed" status to get current
|
||||
// cluster version
|
||||
func actualVersion(cv *configv1.ClusterVersion) string {
|
||||
for _, history := range cv.Status.History {
|
||||
if history.State == configv1.CompletedUpdate {
|
||||
return history.Version
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func desiredVersion(cv *configv1.ClusterVersion) string {
|
||||
if cv.Spec.DesiredUpdate != nil &&
|
||||
cv.Spec.DesiredUpdate.Version != "" {
|
||||
|
|
|
@ -38,7 +38,7 @@ func (mon *Monitor) emitMachineConfigPoolConditions(ctx context.Context) error {
|
|||
"type": string(c.Type),
|
||||
})
|
||||
|
||||
if mon.logMessages {
|
||||
if mon.hourlyRun {
|
||||
mon.log.WithFields(logrus.Fields{
|
||||
"metric": "machineconfigpool.conditions",
|
||||
"name": mcp.Name,
|
||||
|
|
|
@ -8,7 +8,6 @@ import (
|
|||
|
||||
"github.com/sirupsen/logrus"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
var nodeConditionsExpected = map[v1.NodeConditionType]v1.ConditionStatus{
|
||||
|
@ -19,7 +18,7 @@ var nodeConditionsExpected = map[v1.NodeConditionType]v1.ConditionStatus{
|
|||
}
|
||||
|
||||
func (mon *Monitor) emitNodeConditions(ctx context.Context) error {
|
||||
ns, err := mon.cli.CoreV1().Nodes().List(metav1.ListOptions{})
|
||||
ns, err := mon.listNodes()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -38,7 +37,7 @@ func (mon *Monitor) emitNodeConditions(ctx context.Context) error {
|
|||
"type": string(c.Type),
|
||||
})
|
||||
|
||||
if mon.logMessages {
|
||||
if mon.hourlyRun {
|
||||
mon.log.WithFields(logrus.Fields{
|
||||
"metric": "node.conditions",
|
||||
"name": n.Name,
|
||||
|
@ -48,6 +47,12 @@ func (mon *Monitor) emitNodeConditions(ctx context.Context) error {
|
|||
}).Print()
|
||||
}
|
||||
}
|
||||
|
||||
mon.emitGauge("node.kubelet.version", 1, map[string]string{
|
||||
"name": n.Name,
|
||||
"kubeletVersion": n.Status.NodeInfo.KubeletVersion,
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
|
@ -29,6 +29,9 @@ func TestEmitNodeConditions(t *testing.T) {
|
|||
Status: corev1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
NodeInfo: corev1.NodeSystemInfo{
|
||||
KubeletVersion: "v1.17.1+9d33dd3",
|
||||
},
|
||||
},
|
||||
}, &corev1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
|
@ -41,6 +44,9 @@ func TestEmitNodeConditions(t *testing.T) {
|
|||
Status: corev1.ConditionFalse,
|
||||
},
|
||||
},
|
||||
NodeInfo: corev1.NodeSystemInfo{
|
||||
KubeletVersion: "v1.17.1+9d33dd3",
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
|
@ -66,6 +72,15 @@ func TestEmitNodeConditions(t *testing.T) {
|
|||
"type": "Ready",
|
||||
})
|
||||
|
||||
m.EXPECT().EmitGauge("node.kubelet.version", int64(1), map[string]string{
|
||||
"name": "aro-master-0",
|
||||
"kubeletVersion": "v1.17.1+9d33dd3",
|
||||
})
|
||||
m.EXPECT().EmitGauge("node.kubelet.version", int64(1), map[string]string{
|
||||
"name": "aro-master-1",
|
||||
"kubeletVersion": "v1.17.1+9d33dd3",
|
||||
})
|
||||
|
||||
err := mon.emitNodeConditions(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
|
|
@ -55,7 +55,7 @@ func (mon *Monitor) _emitPodConditions(ps *v1.PodList) {
|
|||
"type": string(c.Type),
|
||||
})
|
||||
|
||||
if mon.logMessages {
|
||||
if mon.hourlyRun {
|
||||
mon.log.WithFields(logrus.Fields{
|
||||
"metric": "pod.conditions",
|
||||
"name": p.Name,
|
||||
|
@ -91,7 +91,7 @@ func (mon *Monitor) _emitPodContainerStatuses(ps *v1.PodList) {
|
|||
"reason": cs.State.Waiting.Reason,
|
||||
})
|
||||
|
||||
if mon.logMessages {
|
||||
if mon.hourlyRun {
|
||||
mon.log.WithFields(logrus.Fields{
|
||||
"metric": "pod.containerstatuses",
|
||||
"name": p.Name,
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
const (
|
||||
masterRoleLabel = "node-role.kubernetes.io/master"
|
||||
workerRoleLabel = "node-role.kubernetes.io/worker"
|
||||
)
|
||||
|
||||
// emitSummary emits joined metric to be able to report better on all clusters
|
||||
// state in single dashboard
|
||||
func (mon *Monitor) emitSummary(ctx context.Context) error {
|
||||
if !mon.hourlyRun {
|
||||
return nil
|
||||
}
|
||||
|
||||
cv, err := mon.getClusterVersion()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ns, err := mon.listNodes()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var masterCount, workerCount int
|
||||
for _, node := range ns.Items {
|
||||
if _, ok := node.Labels[masterRoleLabel]; ok {
|
||||
masterCount++
|
||||
}
|
||||
if _, ok := node.Labels[workerRoleLabel]; ok {
|
||||
workerCount++
|
||||
}
|
||||
}
|
||||
|
||||
mon.emitGauge("cluster.summary", 1, map[string]string{
|
||||
"actualVersion": actualVersion(cv),
|
||||
"desiredVersion": desiredVersion(cv),
|
||||
"masterCount": strconv.Itoa(masterCount),
|
||||
"workerCount": strconv.Itoa(workerCount),
|
||||
"provisioningState": mon.oc.Properties.ProvisioningState.String(),
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/golang/mock/gomock"
|
||||
configv1 "github.com/openshift/api/config/v1"
|
||||
configfake "github.com/openshift/client-go/config/clientset/versioned/fake"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
|
||||
"github.com/Azure/ARO-RP/pkg/api"
|
||||
mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
|
||||
)
|
||||
|
||||
func TestEmitSummary(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
configcli := configfake.NewSimpleClientset(&configv1.ClusterVersion{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "version",
|
||||
},
|
||||
Status: configv1.ClusterVersionStatus{
|
||||
Desired: configv1.Update{
|
||||
Version: "4.3.3",
|
||||
},
|
||||
History: []configv1.UpdateHistory{
|
||||
{
|
||||
State: configv1.CompletedUpdate,
|
||||
Version: "4.3.0",
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
cli := fake.NewSimpleClientset(&corev1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "aro-master-0",
|
||||
Labels: map[string]string{
|
||||
masterRoleLabel: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
&corev1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "aro-node-1",
|
||||
Labels: map[string]string{
|
||||
workerRoleLabel: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
&corev1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "aro-node-2",
|
||||
Labels: map[string]string{
|
||||
workerRoleLabel: "",
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
controller := gomock.NewController(t)
|
||||
defer controller.Finish()
|
||||
|
||||
m := mock_metrics.NewMockInterface(controller)
|
||||
|
||||
mon := &Monitor{
|
||||
configcli: configcli,
|
||||
cli: cli,
|
||||
m: m,
|
||||
oc: &api.OpenShiftCluster{
|
||||
Properties: api.OpenShiftClusterProperties{
|
||||
ProvisioningState: api.ProvisioningStateSucceeded,
|
||||
},
|
||||
},
|
||||
hourlyRun: true,
|
||||
}
|
||||
|
||||
m.EXPECT().EmitGauge("cluster.summary", int64(1), map[string]string{
|
||||
"actualVersion": "4.3.0",
|
||||
"desiredVersion": "4.3.3",
|
||||
"masterCount": "1",
|
||||
"workerCount": "2",
|
||||
"provisioningState": mon.oc.Properties.ProvisioningState.String(),
|
||||
})
|
||||
|
||||
err := mon.emitSummary(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
|
@ -187,11 +187,11 @@ out:
|
|||
}
|
||||
|
||||
// workOne checks the API server health of a cluster
|
||||
func (mon *monitor) workOne(ctx context.Context, log *logrus.Entry, doc *api.OpenShiftClusterDocument, logMessages bool) {
|
||||
func (mon *monitor) workOne(ctx context.Context, log *logrus.Entry, doc *api.OpenShiftClusterDocument, hourlyRun bool) {
|
||||
ctx, cancel := context.WithTimeout(ctx, 50*time.Second)
|
||||
defer cancel()
|
||||
|
||||
c, err := cluster.NewMonitor(ctx, mon.env, log, doc.OpenShiftCluster, mon.clusterm, logMessages)
|
||||
c, err := cluster.NewMonitor(ctx, mon.env, log, doc.OpenShiftCluster, mon.clusterm, hourlyRun)
|
||||
if err != nil {
|
||||
log.Error(err)
|
||||
return
|
||||
|
|
Загрузка…
Ссылка в новой задаче