зеркало из https://github.com/Azure/ARO-RP.git
Merge pull request #533 from mjudeikis/cluster.monitor
add more cluster monitoring
This commit is contained in:
Коммит
b3976e064c
|
@ -297,35 +297,12 @@ func TestValidateAdminKubernetesObjectsNonCustomer(t *testing.T) {
|
|||
name string
|
||||
wantErr string
|
||||
}{
|
||||
{
|
||||
test: "valid openshift-ns namespace",
|
||||
groupKind: "Valid-kind.openshift.io",
|
||||
namespace: "openshift-ns",
|
||||
name: "Valid-NAME-01",
|
||||
},
|
||||
{
|
||||
test: "valid openshift namespace",
|
||||
groupKind: "Valid-kind.openshift.io",
|
||||
namespace: "openshift",
|
||||
name: "Valid-NAME-01",
|
||||
},
|
||||
{
|
||||
test: "valid kube-ns namespace",
|
||||
groupKind: "Valid-kind.openshift.io",
|
||||
namespace: "kube-ns",
|
||||
name: "Valid-NAME-01",
|
||||
},
|
||||
{
|
||||
test: "valid default namespace",
|
||||
groupKind: "Valid-kind.openshift.io",
|
||||
namespace: "default",
|
||||
name: "Valid-NAME-01",
|
||||
},
|
||||
{
|
||||
test: "valid empty namespace",
|
||||
groupKind: "Valid-kind.openshift.io",
|
||||
name: "Valid-NAME-01",
|
||||
},
|
||||
{
|
||||
test: "invalid customer namespace",
|
||||
groupKind: "Valid-kind.openshift.io",
|
||||
|
|
|
@ -14,6 +14,7 @@ import (
|
|||
|
||||
"github.com/Azure/ARO-RP/pkg/api"
|
||||
"github.com/Azure/ARO-RP/pkg/database/cosmosdb"
|
||||
pkgnamespace "github.com/Azure/ARO-RP/pkg/util/namespace"
|
||||
)
|
||||
|
||||
func validateTerminalProvisioningState(state api.ProvisioningState) error {
|
||||
|
@ -84,11 +85,7 @@ func validateAdminJmespathFilter(filter string) (*jmespath.JMESPath, error) {
|
|||
var rxKubernetesString = regexp.MustCompile(`(?i)^[-a-z0-9.]{0,255}$`)
|
||||
|
||||
func validateAdminKubernetesObjectsNonCustomer(method, groupKind, namespace, name string) error {
|
||||
if namespace != "" &&
|
||||
namespace != "default" &&
|
||||
namespace != "openshift" &&
|
||||
!strings.HasPrefix(string(namespace), "kube-") &&
|
||||
!strings.HasPrefix(string(namespace), "openshift-") {
|
||||
if !pkgnamespace.IsOpenShift(namespace) {
|
||||
return api.NewCloudError(http.StatusForbidden, api.CloudErrorCodeForbidden, "", "Access to the provided namespace '%s' is forbidden.", namespace)
|
||||
}
|
||||
|
||||
|
|
|
@ -22,8 +22,9 @@ import (
|
|||
)
|
||||
|
||||
type Monitor struct {
|
||||
env env.Interface
|
||||
log *logrus.Entry
|
||||
env env.Interface
|
||||
log *logrus.Entry
|
||||
logMessages bool
|
||||
|
||||
oc *api.OpenShiftCluster
|
||||
dims map[string]string
|
||||
|
@ -34,7 +35,7 @@ type Monitor struct {
|
|||
m metrics.Interface
|
||||
}
|
||||
|
||||
func NewMonitor(ctx context.Context, env env.Interface, log *logrus.Entry, oc *api.OpenShiftCluster, m metrics.Interface) (*Monitor, error) {
|
||||
func NewMonitor(ctx context.Context, env env.Interface, log *logrus.Entry, oc *api.OpenShiftCluster, m metrics.Interface, logMessages bool) (*Monitor, error) {
|
||||
r, err := azure.ParseResourceID(oc.ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -76,8 +77,9 @@ func NewMonitor(ctx context.Context, env env.Interface, log *logrus.Entry, oc *a
|
|||
}
|
||||
|
||||
return &Monitor{
|
||||
env: env,
|
||||
log: log,
|
||||
env: env,
|
||||
log: log,
|
||||
logMessages: logMessages,
|
||||
|
||||
oc: oc,
|
||||
dims: dims,
|
||||
|
@ -103,12 +105,18 @@ func (mon *Monitor) Monitor(ctx context.Context) {
|
|||
return
|
||||
}
|
||||
|
||||
for _, f := range []func(ctx context.Context) error{
|
||||
mon.emitClusterOperatorsMetrics,
|
||||
mon.emitClusterVersionMetrics,
|
||||
mon.emitNodesMetrics,
|
||||
for _, f := range []func(context.Context) error{
|
||||
mon.emitClusterOperatorConditions,
|
||||
mon.emitClusterOperatorVersions,
|
||||
mon.emitClusterVersions,
|
||||
mon.emitDaemonsetStatuses,
|
||||
mon.emitDeploymentStatuses,
|
||||
mon.emitMachineConfigPoolConditions,
|
||||
mon.emitNodeConditions,
|
||||
mon.emitPodConditions,
|
||||
mon.emitPrometheusAlerts,
|
||||
mon.emitMachineConfigPoolMetrics,
|
||||
mon.emitReplicasetStatuses,
|
||||
mon.emitStatefulsetStatuses,
|
||||
} {
|
||||
err = f(ctx)
|
||||
if err != nil {
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
configv1 "github.com/openshift/api/config/v1"
|
||||
"github.com/sirupsen/logrus"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
type clusterOperatorConditionsIgnoreStruct struct {
|
||||
Name string
|
||||
Type configv1.ClusterStatusConditionType
|
||||
Status configv1.ConditionStatus
|
||||
}
|
||||
|
||||
// clusterOperatorConditionsIgnore contains list of failures we know we can
|
||||
// ignore for now
|
||||
var clusterOperatorConditionsIgnore = map[clusterOperatorConditionsIgnoreStruct]struct{}{
|
||||
{"insights", "Disabled", configv1.ConditionFalse}: {},
|
||||
{"insights", "Disabled", configv1.ConditionTrue}: {},
|
||||
{"openshift-controller-manager", configv1.OperatorUpgradeable, configv1.ConditionUnknown}: {},
|
||||
{"service-ca", configv1.OperatorUpgradeable, configv1.ConditionUnknown}: {},
|
||||
{"service-catalog-apiserver", configv1.OperatorUpgradeable, configv1.ConditionUnknown}: {},
|
||||
}
|
||||
|
||||
var clusterOperatorConditionsExpected = map[configv1.ClusterStatusConditionType]configv1.ConditionStatus{
|
||||
configv1.OperatorAvailable: configv1.ConditionTrue,
|
||||
configv1.OperatorDegraded: configv1.ConditionFalse,
|
||||
configv1.OperatorProgressing: configv1.ConditionFalse,
|
||||
configv1.OperatorUpgradeable: configv1.ConditionTrue,
|
||||
}
|
||||
|
||||
func (mon *Monitor) emitClusterOperatorConditions(ctx context.Context) error {
|
||||
cos, err := mon.configcli.ConfigV1().ClusterOperators().List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, co := range cos.Items {
|
||||
for _, c := range co.Status.Conditions {
|
||||
if clusterOperatorConditionIsExpected(&co, &c) {
|
||||
continue
|
||||
}
|
||||
|
||||
mon.emitGauge("clusteroperator.conditions", 1, map[string]string{
|
||||
"name": co.Name,
|
||||
"status": string(c.Status),
|
||||
"type": string(c.Type),
|
||||
})
|
||||
|
||||
if mon.logMessages {
|
||||
mon.log.WithFields(logrus.Fields{
|
||||
"metric": "clusteroperator.conditions",
|
||||
"name": co.Name,
|
||||
"status": c.Status,
|
||||
"type": c.Type,
|
||||
"message": c.Message,
|
||||
}).Print()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func clusterOperatorConditionIsExpected(co *configv1.ClusterOperator, c *configv1.ClusterOperatorStatusCondition) bool {
|
||||
if _, ok := clusterOperatorConditionsIgnore[clusterOperatorConditionsIgnoreStruct{
|
||||
Name: co.Name,
|
||||
Type: c.Type,
|
||||
Status: c.Status,
|
||||
}]; ok {
|
||||
return true
|
||||
}
|
||||
|
||||
return c.Status == clusterOperatorConditionsExpected[c.Type]
|
||||
}
|
|
@ -15,7 +15,7 @@ import (
|
|||
mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
|
||||
)
|
||||
|
||||
func TestEmitClusterOperatorsMetrics(t *testing.T) {
|
||||
func TestEmitClusterOperatorConditions(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
configcli := fake.NewSimpleClientset(&configv1.ClusterOperator{
|
||||
|
@ -28,10 +28,22 @@ func TestEmitClusterOperatorsMetrics(t *testing.T) {
|
|||
Type: configv1.OperatorAvailable,
|
||||
Status: configv1.ConditionFalse,
|
||||
},
|
||||
{
|
||||
Type: configv1.OperatorAvailable,
|
||||
Status: configv1.ConditionTrue,
|
||||
},
|
||||
{
|
||||
Type: configv1.OperatorDegraded,
|
||||
Status: configv1.ConditionFalse,
|
||||
},
|
||||
{
|
||||
Type: configv1.OperatorDegraded,
|
||||
Status: configv1.ConditionTrue,
|
||||
},
|
||||
{
|
||||
Type: configv1.OperatorProgressing,
|
||||
Status: configv1.ConditionFalse,
|
||||
},
|
||||
{
|
||||
Type: configv1.OperatorProgressing,
|
||||
Status: configv1.ConditionTrue,
|
||||
|
@ -41,22 +53,12 @@ func TestEmitClusterOperatorsMetrics(t *testing.T) {
|
|||
Status: configv1.ConditionFalse,
|
||||
},
|
||||
{
|
||||
Type: "dummy",
|
||||
Type: configv1.OperatorUpgradeable,
|
||||
Status: configv1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
Versions: []configv1.OperandVersion{
|
||||
{
|
||||
Name: "dummy",
|
||||
Version: "4.3.2",
|
||||
},
|
||||
{
|
||||
Name: "operator",
|
||||
Version: "4.3.1",
|
||||
},
|
||||
{
|
||||
Name: "operator",
|
||||
Version: "4.3.0",
|
||||
Type: "dummy",
|
||||
Status: configv1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -72,32 +74,37 @@ func TestEmitClusterOperatorsMetrics(t *testing.T) {
|
|||
m: m,
|
||||
}
|
||||
|
||||
m.EXPECT().EmitGauge("clusteroperators.conditions.count", int64(1), map[string]string{
|
||||
"clusteroperator": "console",
|
||||
"condition": "NotAvailable",
|
||||
m.EXPECT().EmitGauge("clusteroperator.conditions", int64(1), map[string]string{
|
||||
"name": "console",
|
||||
"type": "Available",
|
||||
"status": "False",
|
||||
})
|
||||
|
||||
m.EXPECT().EmitGauge("clusteroperators.conditions.count", int64(1), map[string]string{
|
||||
"clusteroperator": "console",
|
||||
"condition": "Degraded",
|
||||
m.EXPECT().EmitGauge("clusteroperator.conditions", int64(1), map[string]string{
|
||||
"name": "console",
|
||||
"type": "Degraded",
|
||||
"status": "True",
|
||||
})
|
||||
|
||||
m.EXPECT().EmitGauge("clusteroperators.conditions.count", int64(1), map[string]string{
|
||||
"clusteroperator": "console",
|
||||
"condition": "Progressing",
|
||||
m.EXPECT().EmitGauge("clusteroperator.conditions", int64(1), map[string]string{
|
||||
"name": "console",
|
||||
"type": "Progressing",
|
||||
"status": "True",
|
||||
})
|
||||
|
||||
m.EXPECT().EmitGauge("clusteroperators.conditions.count", int64(1), map[string]string{
|
||||
"clusteroperator": "console",
|
||||
"condition": "NotUpgradeable",
|
||||
m.EXPECT().EmitGauge("clusteroperator.conditions", int64(1), map[string]string{
|
||||
"name": "console",
|
||||
"type": "Upgradeable",
|
||||
"status": "False",
|
||||
})
|
||||
|
||||
m.EXPECT().EmitGauge("clusteroperators.version", int64(1), map[string]string{
|
||||
"clusteroperator": "console",
|
||||
"version": "4.3.1",
|
||||
m.EXPECT().EmitGauge("clusteroperator.conditions", int64(1), map[string]string{
|
||||
"name": "console",
|
||||
"type": "dummy",
|
||||
"status": "True",
|
||||
})
|
||||
|
||||
err := mon.emitClusterOperatorsMetrics(ctx)
|
||||
err := mon.emitClusterOperatorConditions(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
|
@ -1,67 +0,0 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
configv1 "github.com/openshift/api/config/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
var clusterOperatorsConditionsWhitelist = map[configv1.ClusterStatusConditionType]struct{}{
|
||||
configv1.OperatorAvailable: {},
|
||||
configv1.OperatorDegraded: {},
|
||||
configv1.OperatorProgressing: {},
|
||||
configv1.OperatorUpgradeable: {},
|
||||
}
|
||||
|
||||
var clusterOperatorsNotConditions = map[configv1.ClusterStatusConditionType]struct{}{
|
||||
configv1.OperatorAvailable: {},
|
||||
configv1.OperatorUpgradeable: {},
|
||||
}
|
||||
|
||||
func (mon *Monitor) emitClusterOperatorsMetrics(ctx context.Context) error {
|
||||
cos, err := mon.configcli.ConfigV1().ClusterOperators().List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, co := range cos.Items {
|
||||
for _, c := range co.Status.Conditions {
|
||||
if _, ok := clusterOperatorsConditionsWhitelist[c.Type]; !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := clusterOperatorsNotConditions[c.Type]; ok {
|
||||
if c.Status == configv1.ConditionFalse {
|
||||
mon.emitGauge("clusteroperators.conditions.count", 1, map[string]string{
|
||||
"clusteroperator": co.Name,
|
||||
"condition": "Not" + string(c.Type),
|
||||
})
|
||||
}
|
||||
} else {
|
||||
if c.Status == configv1.ConditionTrue {
|
||||
mon.emitGauge("clusteroperators.conditions.count", 1, map[string]string{
|
||||
"clusteroperator": co.Name,
|
||||
"condition": string(c.Type),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
for _, v := range co.Status.Versions {
|
||||
if v.Name == "operator" {
|
||||
mon.emitGauge("clusteroperators.version", 1, map[string]string{
|
||||
"clusteroperator": co.Name,
|
||||
"version": v.Version,
|
||||
})
|
||||
break out
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
func (mon *Monitor) emitClusterOperatorVersions(ctx context.Context) error {
|
||||
cv, err := mon.configcli.ConfigV1().ClusterVersions().Get("version", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cos, err := mon.configcli.ConfigV1().ClusterOperators().List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, co := range cos.Items {
|
||||
for _, v := range co.Status.Versions {
|
||||
if v.Name != "operator" {
|
||||
continue
|
||||
}
|
||||
|
||||
if v.Version == desiredVersion(cv) {
|
||||
continue
|
||||
}
|
||||
|
||||
mon.emitGauge("clusteroperator.versions", 1, map[string]string{
|
||||
"name": co.Name,
|
||||
"version": v.Version,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/golang/mock/gomock"
|
||||
configv1 "github.com/openshift/api/config/v1"
|
||||
"github.com/openshift/client-go/config/clientset/versioned/fake"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
|
||||
)
|
||||
|
||||
func TestEmitClusterOperatorVersion(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
configcli := fake.NewSimpleClientset(
|
||||
&configv1.ClusterOperator{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "console",
|
||||
},
|
||||
Status: configv1.ClusterOperatorStatus{
|
||||
Versions: []configv1.OperandVersion{
|
||||
{
|
||||
Name: "operator",
|
||||
Version: "4.3.0",
|
||||
},
|
||||
{
|
||||
Name: "operator-good", // no metrics exected
|
||||
Version: "4.3.1",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
&configv1.ClusterVersion{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "version",
|
||||
},
|
||||
Status: configv1.ClusterVersionStatus{
|
||||
Desired: configv1.Update{
|
||||
Version: "4.3.1",
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
controller := gomock.NewController(t)
|
||||
defer controller.Finish()
|
||||
|
||||
m := mock_metrics.NewMockInterface(controller)
|
||||
|
||||
mon := &Monitor{
|
||||
configcli: configcli,
|
||||
m: m,
|
||||
}
|
||||
|
||||
m.EXPECT().EmitGauge("clusteroperator.versions", int64(1), map[string]string{
|
||||
"name": "console",
|
||||
"version": "4.3.0",
|
||||
})
|
||||
|
||||
err := mon.emitClusterOperatorVersions(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
|
@ -10,18 +10,12 @@ import (
|
|||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
func (mon *Monitor) emitClusterVersionMetrics(ctx context.Context) error {
|
||||
func (mon *Monitor) emitClusterVersions(ctx context.Context) error {
|
||||
cv, err := mon.configcli.ConfigV1().ClusterVersions().Get("version", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
desiredVersion := cv.Status.Desired.Version
|
||||
if cv.Spec.DesiredUpdate != nil &&
|
||||
cv.Spec.DesiredUpdate.Version != "" {
|
||||
desiredVersion = cv.Spec.DesiredUpdate.Version
|
||||
}
|
||||
|
||||
// Find the actual current cluster state. The history is ordered by most
|
||||
// recent first, so find the latest "Completed" status to get current
|
||||
// cluster version
|
||||
|
@ -33,10 +27,19 @@ func (mon *Monitor) emitClusterVersionMetrics(ctx context.Context) error {
|
|||
}
|
||||
}
|
||||
|
||||
mon.emitGauge("cluster.version", 1, map[string]string{
|
||||
mon.emitGauge("cluster.versions", 1, map[string]string{
|
||||
"actualVersion": actualVersion,
|
||||
"desiredVersion": desiredVersion,
|
||||
"desiredVersion": desiredVersion(cv),
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func desiredVersion(cv *configv1.ClusterVersion) string {
|
||||
if cv.Spec.DesiredUpdate != nil &&
|
||||
cv.Spec.DesiredUpdate.Version != "" {
|
||||
return cv.Spec.DesiredUpdate.Version
|
||||
}
|
||||
|
||||
return cv.Status.Desired.Version
|
||||
}
|
|
@ -15,7 +15,7 @@ import (
|
|||
mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
|
||||
)
|
||||
|
||||
func TestEmitClusterVersionMetrics(t *testing.T) {
|
||||
func TestEmitClusterVersion(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
for _, tt := range []struct {
|
||||
|
@ -86,12 +86,12 @@ func TestEmitClusterVersionMetrics(t *testing.T) {
|
|||
m: m,
|
||||
}
|
||||
|
||||
m.EXPECT().EmitGauge("cluster.version", int64(1), map[string]string{
|
||||
m.EXPECT().EmitGauge("cluster.versions", int64(1), map[string]string{
|
||||
"actualVersion": tt.wantActualVersion,
|
||||
"desiredVersion": tt.wantDesiredVersion,
|
||||
})
|
||||
|
||||
err := mon.emitClusterVersionMetrics(ctx)
|
||||
err := mon.emitClusterVersions(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
"github.com/Azure/ARO-RP/pkg/util/namespace"
|
||||
)
|
||||
|
||||
func (mon *Monitor) emitDaemonsetStatuses(ctx context.Context) error {
|
||||
dss, err := mon.cli.AppsV1().DaemonSets("").List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, ds := range dss.Items {
|
||||
if !namespace.IsOpenShift(ds.Namespace) {
|
||||
continue
|
||||
}
|
||||
|
||||
if ds.Status.DesiredNumberScheduled == ds.Status.NumberAvailable {
|
||||
continue
|
||||
}
|
||||
|
||||
mon.emitGauge("daemonset.statuses", 1, map[string]string{
|
||||
"desiredNumberScheduled": strconv.Itoa(int(ds.Status.DesiredNumberScheduled)),
|
||||
"name": ds.Name,
|
||||
"namespace": ds.Namespace,
|
||||
"numberAvailable": strconv.Itoa(int(ds.Status.NumberAvailable)),
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/golang/mock/gomock"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
|
||||
mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
|
||||
)
|
||||
|
||||
func TestEmitDaemonsetStatuses(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
cli := fake.NewSimpleClientset(
|
||||
&appsv1.DaemonSet{ // metrics expected
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "name1",
|
||||
Namespace: "openshift",
|
||||
},
|
||||
Status: appsv1.DaemonSetStatus{
|
||||
DesiredNumberScheduled: 2,
|
||||
NumberAvailable: 1,
|
||||
},
|
||||
}, &appsv1.DaemonSet{ // no metric expected
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "name2",
|
||||
Namespace: "openshift",
|
||||
},
|
||||
Status: appsv1.DaemonSetStatus{
|
||||
DesiredNumberScheduled: 2,
|
||||
NumberAvailable: 2,
|
||||
},
|
||||
}, &appsv1.DaemonSet{
|
||||
ObjectMeta: metav1.ObjectMeta{ // no metric expected -customer
|
||||
Name: "name2",
|
||||
Namespace: "customer",
|
||||
},
|
||||
Status: appsv1.DaemonSetStatus{
|
||||
DesiredNumberScheduled: 2,
|
||||
NumberAvailable: 1,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
controller := gomock.NewController(t)
|
||||
defer controller.Finish()
|
||||
|
||||
m := mock_metrics.NewMockInterface(controller)
|
||||
|
||||
mon := &Monitor{
|
||||
cli: cli,
|
||||
m: m,
|
||||
}
|
||||
|
||||
m.EXPECT().EmitGauge("daemonset.statuses", int64(1), map[string]string{
|
||||
"desiredNumberScheduled": strconv.Itoa(2),
|
||||
"name": "name1",
|
||||
"namespace": "openshift",
|
||||
"numberAvailable": strconv.Itoa(1),
|
||||
})
|
||||
|
||||
err := mon.emitDaemonsetStatuses(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
"github.com/Azure/ARO-RP/pkg/util/namespace"
|
||||
)
|
||||
|
||||
func (mon *Monitor) emitDeploymentStatuses(ctx context.Context) error {
|
||||
ds, err := mon.cli.AppsV1().Deployments("").List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, d := range ds.Items {
|
||||
if !namespace.IsOpenShift(d.Namespace) {
|
||||
continue
|
||||
}
|
||||
|
||||
if d.Status.Replicas == d.Status.AvailableReplicas {
|
||||
continue
|
||||
}
|
||||
|
||||
mon.emitGauge("deployment.statuses", 1, map[string]string{
|
||||
"availableReplicas": strconv.Itoa(int(d.Status.AvailableReplicas)),
|
||||
"name": d.Name,
|
||||
"namespace": d.Namespace,
|
||||
"replicas": strconv.Itoa(int(d.Status.Replicas)),
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/golang/mock/gomock"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
|
||||
mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
|
||||
)
|
||||
|
||||
func TestEmitDeploymentStatuses(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
cli := fake.NewSimpleClientset(
|
||||
&appsv1.Deployment{ // metrics expected
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "name1",
|
||||
Namespace: "openshift",
|
||||
},
|
||||
Status: appsv1.DeploymentStatus{
|
||||
Replicas: 2,
|
||||
AvailableReplicas: 1,
|
||||
},
|
||||
}, &appsv1.Deployment{ // no metric expected
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "name2",
|
||||
Namespace: "openshift",
|
||||
},
|
||||
Status: appsv1.DeploymentStatus{
|
||||
Replicas: 2,
|
||||
AvailableReplicas: 2,
|
||||
},
|
||||
}, &appsv1.Deployment{
|
||||
ObjectMeta: metav1.ObjectMeta{ // no metric expected -customer
|
||||
Name: "name2",
|
||||
Namespace: "customer",
|
||||
},
|
||||
Status: appsv1.DeploymentStatus{
|
||||
Replicas: 2,
|
||||
AvailableReplicas: 1,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
controller := gomock.NewController(t)
|
||||
defer controller.Finish()
|
||||
|
||||
m := mock_metrics.NewMockInterface(controller)
|
||||
|
||||
mon := &Monitor{
|
||||
cli: cli,
|
||||
m: m,
|
||||
}
|
||||
|
||||
m.EXPECT().EmitGauge("deployment.statuses", int64(1), map[string]string{
|
||||
"availableReplicas": strconv.Itoa(1),
|
||||
"name": "name1",
|
||||
"namespace": "openshift",
|
||||
"replicas": strconv.Itoa(2),
|
||||
})
|
||||
|
||||
err := mon.emitDeploymentStatuses(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
}
|
|
@ -7,6 +7,7 @@ import (
|
|||
"context"
|
||||
|
||||
v1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
|
||||
"github.com/sirupsen/logrus"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
@ -19,7 +20,7 @@ var machineConfigPoolConditionsExpected = map[v1.MachineConfigPoolConditionType]
|
|||
v1.MachineConfigPoolUpdating: corev1.ConditionFalse,
|
||||
}
|
||||
|
||||
func (mon *Monitor) emitMachineConfigPoolMetrics(ctx context.Context) error {
|
||||
func (mon *Monitor) emitMachineConfigPoolConditions(ctx context.Context) error {
|
||||
mcps, err := mon.mcocli.MachineconfigurationV1().MachineConfigPools().List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -31,11 +32,21 @@ func (mon *Monitor) emitMachineConfigPoolMetrics(ctx context.Context) error {
|
|||
continue
|
||||
}
|
||||
|
||||
mon.emitGauge("machineconfigpools.conditions", 1, map[string]string{
|
||||
mon.emitGauge("machineconfigpool.conditions", 1, map[string]string{
|
||||
"name": mcp.Name,
|
||||
"type": string(c.Type),
|
||||
"status": string(c.Status),
|
||||
"type": string(c.Type),
|
||||
})
|
||||
|
||||
if mon.logMessages {
|
||||
mon.log.WithFields(logrus.Fields{
|
||||
"metric": "machineconfigpool.conditions",
|
||||
"name": mcp.Name,
|
||||
"status": c.Status,
|
||||
"type": c.Type,
|
||||
"message": c.Message,
|
||||
}).Print()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -16,7 +16,7 @@ import (
|
|||
mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
|
||||
)
|
||||
|
||||
func TestEmitMachineConfigPoolMetrics(t *testing.T) {
|
||||
func TestEmitMachineConfigPoolConditions(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
mcocli := fake.NewSimpleClientset(&v1.MachineConfigPool{
|
||||
|
@ -59,37 +59,37 @@ func TestEmitMachineConfigPoolMetrics(t *testing.T) {
|
|||
m: m,
|
||||
}
|
||||
|
||||
m.EXPECT().EmitGauge("machineconfigpools.conditions", int64(1), map[string]string{
|
||||
m.EXPECT().EmitGauge("machineconfigpool.conditions", int64(1), map[string]string{
|
||||
"name": "machine-config-pool",
|
||||
"type": "Degraded",
|
||||
"status": "True",
|
||||
})
|
||||
|
||||
m.EXPECT().EmitGauge("machineconfigpools.conditions", int64(1), map[string]string{
|
||||
m.EXPECT().EmitGauge("machineconfigpool.conditions", int64(1), map[string]string{
|
||||
"name": "machine-config-pool",
|
||||
"type": "NodeDegraded",
|
||||
"status": "True",
|
||||
})
|
||||
|
||||
m.EXPECT().EmitGauge("machineconfigpools.conditions", int64(1), map[string]string{
|
||||
m.EXPECT().EmitGauge("machineconfigpool.conditions", int64(1), map[string]string{
|
||||
"name": "machine-config-pool",
|
||||
"type": "RenderDegraded",
|
||||
"status": "True",
|
||||
})
|
||||
|
||||
m.EXPECT().EmitGauge("machineconfigpools.conditions", int64(1), map[string]string{
|
||||
m.EXPECT().EmitGauge("machineconfigpool.conditions", int64(1), map[string]string{
|
||||
"name": "machine-config-pool",
|
||||
"type": "Updated",
|
||||
"status": "False",
|
||||
})
|
||||
|
||||
m.EXPECT().EmitGauge("machineconfigpools.conditions", int64(1), map[string]string{
|
||||
m.EXPECT().EmitGauge("machineconfigpool.conditions", int64(1), map[string]string{
|
||||
"name": "machine-config-pool",
|
||||
"type": "Updating",
|
||||
"status": "True",
|
||||
})
|
||||
|
||||
err := mon.emitMachineConfigPoolMetrics(ctx)
|
||||
err := mon.emitMachineConfigPoolConditions(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
var nodeConditionsExpected = map[v1.NodeConditionType]v1.ConditionStatus{
|
||||
v1.NodeDiskPressure: v1.ConditionFalse,
|
||||
v1.NodeMemoryPressure: v1.ConditionFalse,
|
||||
v1.NodePIDPressure: v1.ConditionFalse,
|
||||
v1.NodeReady: v1.ConditionTrue,
|
||||
}
|
||||
|
||||
func (mon *Monitor) emitNodeConditions(ctx context.Context) error {
|
||||
ns, err := mon.cli.CoreV1().Nodes().List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
mon.emitGauge("nodes.count", int64(len(ns.Items)), nil)
|
||||
|
||||
for _, n := range ns.Items {
|
||||
for _, c := range n.Status.Conditions {
|
||||
if c.Status == nodeConditionsExpected[c.Type] {
|
||||
continue
|
||||
}
|
||||
|
||||
mon.emitGauge("node.conditions", 1, map[string]string{
|
||||
"name": n.Name,
|
||||
"status": string(c.Status),
|
||||
"type": string(c.Type),
|
||||
})
|
||||
|
||||
if mon.logMessages {
|
||||
mon.log.WithFields(logrus.Fields{
|
||||
"metric": "node.conditions",
|
||||
"name": n.Name,
|
||||
"status": c.Status,
|
||||
"type": c.Type,
|
||||
"message": c.Message,
|
||||
}).Print()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -15,7 +15,7 @@ import (
|
|||
mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
|
||||
)
|
||||
|
||||
func TestEmitNodesMetrics(t *testing.T) {
|
||||
func TestEmitNodeConditions(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
cli := fake.NewSimpleClientset(&corev1.Node{
|
||||
|
@ -55,14 +55,18 @@ func TestEmitNodesMetrics(t *testing.T) {
|
|||
}
|
||||
|
||||
m.EXPECT().EmitGauge("nodes.count", int64(2), map[string]string{})
|
||||
m.EXPECT().EmitGauge("nodes.conditions.count", int64(1), map[string]string{
|
||||
"condition": "NotReady",
|
||||
m.EXPECT().EmitGauge("node.conditions", int64(1), map[string]string{
|
||||
"name": "aro-master-0",
|
||||
"status": "True",
|
||||
"type": "MemoryPressure",
|
||||
})
|
||||
m.EXPECT().EmitGauge("nodes.conditions.count", int64(1), map[string]string{
|
||||
"condition": "MemoryPressure",
|
||||
m.EXPECT().EmitGauge("node.conditions", int64(1), map[string]string{
|
||||
"name": "aro-master-1",
|
||||
"status": "False",
|
||||
"type": "Ready",
|
||||
})
|
||||
|
||||
err := mon.emitNodesMetrics(ctx)
|
||||
err := mon.emitNodeConditions(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
|
@ -1,51 +0,0 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
var nodesNotConditions = map[corev1.NodeConditionType]struct{}{
|
||||
corev1.NodeReady: {},
|
||||
}
|
||||
|
||||
func (mon *Monitor) emitNodesMetrics(ctx context.Context) error {
|
||||
nodes, err := mon.cli.CoreV1().Nodes().List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
mon.emitGauge("nodes.count", int64(len(nodes.Items)), nil)
|
||||
|
||||
counters := map[string]int64{}
|
||||
for _, node := range nodes.Items {
|
||||
for _, c := range node.Status.Conditions {
|
||||
// count 'Unknown' status as unhealthy state for each condition. In this way
|
||||
// we can flag issues without creating additional timeseries for each condition.
|
||||
// for NodeReady count a node when the status is False (not ready) or Unknown
|
||||
// for other conditions count when the status is True or Unknown
|
||||
if _, ok := nodesNotConditions[c.Type]; ok {
|
||||
if c.Status != corev1.ConditionTrue {
|
||||
counters["Not"+string(c.Type)]++
|
||||
}
|
||||
} else {
|
||||
if c.Status != corev1.ConditionFalse {
|
||||
counters[string(c.Type)]++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for condition, count := range counters {
|
||||
mon.emitGauge("nodes.conditions.count", count, map[string]string{
|
||||
"condition": condition,
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
"github.com/Azure/ARO-RP/pkg/util/namespace"
|
||||
)
|
||||
|
||||
var podConditionsExpected = map[v1.PodConditionType]v1.ConditionStatus{
|
||||
v1.ContainersReady: v1.ConditionTrue,
|
||||
v1.PodInitialized: v1.ConditionTrue,
|
||||
v1.PodScheduled: v1.ConditionTrue,
|
||||
v1.PodReady: v1.ConditionTrue,
|
||||
}
|
||||
|
||||
func (mon *Monitor) emitPodConditions(ctx context.Context) error {
|
||||
// to list pods once
|
||||
ps, err := mon.cli.CoreV1().Pods("").List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
mon._emitPodConditions(ps)
|
||||
mon._emitPodContainerStatuses(ps)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mon *Monitor) _emitPodConditions(ps *v1.PodList) {
|
||||
for _, p := range ps.Items {
|
||||
if !namespace.IsOpenShift(p.Namespace) {
|
||||
continue
|
||||
}
|
||||
|
||||
if p.Status.Phase == v1.PodSucceeded {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, c := range p.Status.Conditions {
|
||||
if c.Status == podConditionsExpected[c.Type] {
|
||||
continue
|
||||
}
|
||||
|
||||
mon.emitGauge("pod.conditions", 1, map[string]string{
|
||||
"name": p.Name,
|
||||
"namespace": p.Namespace,
|
||||
"status": string(c.Status),
|
||||
"type": string(c.Type),
|
||||
})
|
||||
|
||||
if mon.logMessages {
|
||||
mon.log.WithFields(logrus.Fields{
|
||||
"metric": "pod.conditions",
|
||||
"name": p.Name,
|
||||
"namespace": p.Namespace,
|
||||
"status": c.Status,
|
||||
"type": c.Type,
|
||||
"message": c.Message,
|
||||
}).Print()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (mon *Monitor) _emitPodContainerStatuses(ps *v1.PodList) {
|
||||
for _, p := range ps.Items {
|
||||
if !namespace.IsOpenShift(p.Namespace) {
|
||||
continue
|
||||
}
|
||||
|
||||
if p.Status.Phase == v1.PodSucceeded {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, cs := range p.Status.ContainerStatuses {
|
||||
if cs.State.Waiting == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
mon.emitGauge("pod.containerstatuses", 1, map[string]string{
|
||||
"name": p.Name,
|
||||
"namespace": p.Namespace,
|
||||
"containername": cs.Name,
|
||||
"reason": cs.State.Waiting.Reason,
|
||||
})
|
||||
|
||||
if mon.logMessages {
|
||||
mon.log.WithFields(logrus.Fields{
|
||||
"metric": "pod.containerstatuses",
|
||||
"name": p.Name,
|
||||
"namespace": p.Namespace,
|
||||
"containername": cs.Name,
|
||||
"reason": cs.State.Waiting.Reason,
|
||||
"message": cs.State.Waiting.Message,
|
||||
}).Print()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/golang/mock/gomock"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
|
||||
mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
|
||||
)
|
||||
|
||||
func TestEmitPodConditions(t *testing.T) {
|
||||
cli := fake.NewSimpleClientset(
|
||||
&corev1.Pod{ // metrics expected
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "name",
|
||||
Namespace: "openshift",
|
||||
},
|
||||
Status: corev1.PodStatus{
|
||||
Conditions: []corev1.PodCondition{
|
||||
{
|
||||
Type: corev1.PodReady,
|
||||
Status: corev1.ConditionFalse,
|
||||
},
|
||||
{
|
||||
Type: corev1.PodInitialized,
|
||||
Status: corev1.ConditionFalse,
|
||||
},
|
||||
{
|
||||
Type: corev1.PodScheduled,
|
||||
Status: corev1.ConditionFalse,
|
||||
},
|
||||
{
|
||||
Type: corev1.ContainersReady,
|
||||
Status: corev1.ConditionFalse,
|
||||
},
|
||||
{
|
||||
Type: corev1.PodReady,
|
||||
Status: corev1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
controller := gomock.NewController(t)
|
||||
defer controller.Finish()
|
||||
|
||||
m := mock_metrics.NewMockInterface(controller)
|
||||
|
||||
mon := &Monitor{
|
||||
cli: cli,
|
||||
m: m,
|
||||
}
|
||||
|
||||
m.EXPECT().EmitGauge("pod.conditions", int64(1), map[string]string{
|
||||
"name": "name",
|
||||
"namespace": "openshift",
|
||||
"status": "False",
|
||||
"type": "ContainersReady",
|
||||
})
|
||||
m.EXPECT().EmitGauge("pod.conditions", int64(1), map[string]string{
|
||||
"name": "name",
|
||||
"namespace": "openshift",
|
||||
"status": "False",
|
||||
"type": "Initialized",
|
||||
})
|
||||
m.EXPECT().EmitGauge("pod.conditions", int64(1), map[string]string{
|
||||
"name": "name",
|
||||
"namespace": "openshift",
|
||||
"status": "False",
|
||||
"type": "PodScheduled",
|
||||
})
|
||||
m.EXPECT().EmitGauge("pod.conditions", int64(1), map[string]string{
|
||||
"name": "name",
|
||||
"namespace": "openshift",
|
||||
"status": "False",
|
||||
"type": "Ready",
|
||||
})
|
||||
|
||||
ps, _ := cli.CoreV1().Pods("").List(metav1.ListOptions{})
|
||||
mon._emitPodConditions(ps)
|
||||
}
|
||||
|
||||
func TestEmitPodContainerStatuses(t *testing.T) {
|
||||
cli := fake.NewSimpleClientset(
|
||||
&corev1.Pod{ // metrics expected
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "name",
|
||||
Namespace: "openshift",
|
||||
},
|
||||
Status: corev1.PodStatus{
|
||||
ContainerStatuses: []corev1.ContainerStatus{
|
||||
{
|
||||
Name: "containername",
|
||||
State: corev1.ContainerState{
|
||||
Waiting: &corev1.ContainerStateWaiting{
|
||||
Reason: "ImagePullBackOff",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
controller := gomock.NewController(t)
|
||||
defer controller.Finish()
|
||||
|
||||
m := mock_metrics.NewMockInterface(controller)
|
||||
|
||||
mon := &Monitor{
|
||||
cli: cli,
|
||||
m: m,
|
||||
}
|
||||
|
||||
m.EXPECT().EmitGauge("pod.containerstatuses", int64(1), map[string]string{
|
||||
"name": "name",
|
||||
"namespace": "openshift",
|
||||
"containername": "containername",
|
||||
"reason": "ImagePullBackOff",
|
||||
})
|
||||
|
||||
ps, _ := cli.CoreV1().Pods("").List(metav1.ListOptions{})
|
||||
mon._emitPodContainerStatuses(ps)
|
||||
}
|
|
@ -13,6 +13,7 @@ import (
|
|||
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/Azure/ARO-RP/pkg/util/namespace"
|
||||
"github.com/Azure/ARO-RP/pkg/util/portforward"
|
||||
)
|
||||
|
||||
|
@ -66,6 +67,10 @@ func (mon *Monitor) emitPrometheusAlerts(ctx context.Context) error {
|
|||
}{}
|
||||
|
||||
for _, alert := range alerts {
|
||||
if !namespace.IsOpenShift(string(alert.Labels["namespace"])) {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(alert.Name(), "UsingDeprecatedAPI") {
|
||||
continue
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
"github.com/Azure/ARO-RP/pkg/util/namespace"
|
||||
)
|
||||
|
||||
func (mon *Monitor) emitReplicasetStatuses(ctx context.Context) error {
|
||||
rss, err := mon.cli.AppsV1().ReplicaSets("").List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, rs := range rss.Items {
|
||||
if !namespace.IsOpenShift(rs.Namespace) {
|
||||
continue
|
||||
}
|
||||
|
||||
if rs.Status.Replicas == rs.Status.AvailableReplicas {
|
||||
continue
|
||||
}
|
||||
|
||||
mon.emitGauge("replicaset.statuses", 1, map[string]string{
|
||||
"availableReplicas": strconv.Itoa(int(rs.Status.AvailableReplicas)),
|
||||
"name": rs.Name,
|
||||
"namespace": rs.Namespace,
|
||||
"replicas": strconv.Itoa(int(rs.Status.Replicas)),
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/golang/mock/gomock"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
|
||||
mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
|
||||
)
|
||||
|
||||
func TestEmitReplicasetStatuses(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
cli := fake.NewSimpleClientset(
|
||||
&appsv1.ReplicaSet{ // metrics expected
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "name1",
|
||||
Namespace: "openshift",
|
||||
},
|
||||
Status: appsv1.ReplicaSetStatus{
|
||||
Replicas: 2,
|
||||
AvailableReplicas: 1,
|
||||
},
|
||||
}, &appsv1.ReplicaSet{ // no metric expected
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "name2",
|
||||
Namespace: "openshift",
|
||||
},
|
||||
Status: appsv1.ReplicaSetStatus{
|
||||
Replicas: 2,
|
||||
AvailableReplicas: 2,
|
||||
},
|
||||
}, &appsv1.ReplicaSet{
|
||||
ObjectMeta: metav1.ObjectMeta{ // no metric expected -customer
|
||||
Name: "name2",
|
||||
Namespace: "customer",
|
||||
},
|
||||
Status: appsv1.ReplicaSetStatus{
|
||||
Replicas: 2,
|
||||
AvailableReplicas: 1,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
controller := gomock.NewController(t)
|
||||
defer controller.Finish()
|
||||
|
||||
m := mock_metrics.NewMockInterface(controller)
|
||||
|
||||
mon := &Monitor{
|
||||
cli: cli,
|
||||
m: m,
|
||||
}
|
||||
|
||||
m.EXPECT().EmitGauge("replicaset.statuses", int64(1), map[string]string{
|
||||
"availableReplicas": strconv.Itoa(1),
|
||||
"name": "name1",
|
||||
"namespace": "openshift",
|
||||
"replicas": strconv.Itoa(2),
|
||||
})
|
||||
|
||||
err := mon.emitReplicasetStatuses(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
"github.com/Azure/ARO-RP/pkg/util/namespace"
|
||||
)
|
||||
|
||||
func (mon *Monitor) emitStatefulsetStatuses(ctx context.Context) error {
|
||||
sss, err := mon.cli.AppsV1().StatefulSets("").List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, ss := range sss.Items {
|
||||
if !namespace.IsOpenShift(ss.Namespace) {
|
||||
continue
|
||||
}
|
||||
|
||||
if ss.Status.Replicas == ss.Status.ReadyReplicas {
|
||||
continue
|
||||
}
|
||||
|
||||
mon.emitGauge("statefulset.statuses", 1, map[string]string{
|
||||
"name": ss.Name,
|
||||
"namespace": ss.Namespace,
|
||||
"replicas": strconv.Itoa(int(ss.Status.Replicas)),
|
||||
"readyReplicas": strconv.Itoa(int(ss.Status.ReadyReplicas)),
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
package cluster
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/golang/mock/gomock"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
|
||||
mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
|
||||
)
|
||||
|
||||
func TestEmitStatefulsetStatuses(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
cli := fake.NewSimpleClientset(
|
||||
&appsv1.StatefulSet{ // metrics expected
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "name1",
|
||||
Namespace: "openshift",
|
||||
},
|
||||
Status: appsv1.StatefulSetStatus{
|
||||
Replicas: 2,
|
||||
ReadyReplicas: 1,
|
||||
},
|
||||
}, &appsv1.StatefulSet{ // no metric expected
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "name2",
|
||||
Namespace: "openshift",
|
||||
},
|
||||
Status: appsv1.StatefulSetStatus{
|
||||
Replicas: 2,
|
||||
ReadyReplicas: 2,
|
||||
},
|
||||
}, &appsv1.StatefulSet{
|
||||
ObjectMeta: metav1.ObjectMeta{ // no metric expected -customer
|
||||
Name: "name2",
|
||||
Namespace: "customer",
|
||||
},
|
||||
Status: appsv1.StatefulSetStatus{
|
||||
Replicas: 2,
|
||||
ReadyReplicas: 1,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
controller := gomock.NewController(t)
|
||||
defer controller.Finish()
|
||||
|
||||
m := mock_metrics.NewMockInterface(controller)
|
||||
|
||||
mon := &Monitor{
|
||||
cli: cli,
|
||||
m: m,
|
||||
}
|
||||
|
||||
m.EXPECT().EmitGauge("statefulset.statuses", int64(1), map[string]string{
|
||||
"name": "name1",
|
||||
"namespace": "openshift",
|
||||
"replicas": strconv.Itoa(2),
|
||||
"readyReplicas": strconv.Itoa(1),
|
||||
})
|
||||
|
||||
err := mon.emitStatefulsetStatuses(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
}
|
|
@ -111,6 +111,8 @@ func (mon *monitor) worker(stop <-chan struct{}, delay time.Duration, id string)
|
|||
t := time.NewTicker(time.Minute)
|
||||
defer t.Stop()
|
||||
|
||||
h := time.Now().Hour()
|
||||
|
||||
out:
|
||||
for {
|
||||
mon.mu.RLock()
|
||||
|
@ -121,27 +123,31 @@ out:
|
|||
break
|
||||
}
|
||||
|
||||
newh := time.Now().Hour()
|
||||
|
||||
// TODO: later can modify here to poll once per N minutes and re-issue
|
||||
// cached metrics in the remaining minutes
|
||||
|
||||
mon.workOne(context.Background(), log, v.doc)
|
||||
mon.workOne(context.Background(), log, v.doc, newh != h)
|
||||
|
||||
select {
|
||||
case <-t.C:
|
||||
case <-stop:
|
||||
break out
|
||||
}
|
||||
|
||||
h = newh
|
||||
}
|
||||
|
||||
log.Debug("stopping monitoring")
|
||||
}
|
||||
|
||||
// workOne checks the API server health of a cluster
|
||||
func (mon *monitor) workOne(ctx context.Context, log *logrus.Entry, doc *api.OpenShiftClusterDocument) {
|
||||
func (mon *monitor) workOne(ctx context.Context, log *logrus.Entry, doc *api.OpenShiftClusterDocument, logMessages bool) {
|
||||
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
c, err := cluster.NewMonitor(ctx, mon.env, log, doc.OpenShiftCluster, mon.clusterm)
|
||||
c, err := cluster.NewMonitor(ctx, mon.env, log, doc.OpenShiftCluster, mon.clusterm, logMessages)
|
||||
if err != nil {
|
||||
log.Error(err)
|
||||
return
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
package namespace
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// IsOpenShift returns true if ns is an openshift managed namespace.
|
||||
func IsOpenShift(ns string) bool {
|
||||
return ns == "" ||
|
||||
ns == "default" ||
|
||||
ns == "openshift" ||
|
||||
strings.HasPrefix(ns, "kube-") ||
|
||||
strings.HasPrefix(ns, "openshift-")
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package namespace
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the Apache License 2.0.
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestIsOpenShift(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
namespace string
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
namespace: "openshift-ns",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
namespace: "openshift",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
namespace: "kube-ns",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
namespace: "default",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
namespace: "customer",
|
||||
},
|
||||
} {
|
||||
t.Run(tt.namespace, func(t *testing.T) {
|
||||
got := IsOpenShift(tt.namespace)
|
||||
if tt.want != got {
|
||||
t.Error(got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Загрузка…
Ссылка в новой задаче