Merge pull request #1562 from bennerv/bugfix-monitoring-cm

Operator Bugfix: Clean out empty volumeClaimTemplate struct if set
This commit is contained in:
Mangirdas Judeikis 2021-06-30 08:10:02 +01:00 коммит произвёл GitHub
Родитель c3bc45b3a4 77d7b9f9e9
Коммит 357fa65f03
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 202 добавлений и 339 удалений

Просмотреть файл

@ -5,7 +5,7 @@ package monitoring
import (
"context"
"reflect"
"encoding/json"
"github.com/ghodss/yaml"
"github.com/sirupsen/logrus"
@ -29,7 +29,10 @@ import (
"github.com/Azure/ARO-RP/pkg/operator/controllers"
)
var monitoringName = types.NamespacedName{Name: "cluster-monitoring-config", Namespace: "openshift-monitoring"}
var (
monitoringName = types.NamespacedName{Name: "cluster-monitoring-config", Namespace: "openshift-monitoring"}
prometheusLabels = "app=prometheus,prometheus=k8s"
)
// Config represents cluster monitoring stack configuration.
// Reconciler reconciles retention and storage settings,
@ -39,20 +42,14 @@ type Config struct {
PrometheusK8s struct {
api.MissingFields
Retention string `json:"retention,omitempty"`
VolumeClaimTemplate struct {
api.MissingFields
} `json:"volumeClaimTemplate,omitempty"`
VolumeClaimTemplate *json.RawMessage `json:"volumeClaimTemplate,omitempty"`
} `json:"prometheusK8s,omitempty"`
AlertManagerMain struct {
api.MissingFields
VolumeClaimTemplate struct {
api.MissingFields
} `json:"volumeClaimTemplate,omitempty"`
VolumeClaimTemplate *json.RawMessage `json:"volumeClaimTemplate,omitempty"`
} `json:"alertmanagerMain,omitempty"`
}
var defaultConfig = `prometheusK8s: {}`
type Reconciler struct {
arocli aroclient.Interface
kubernetescli kubernetes.Interface
@ -70,10 +67,39 @@ func NewReconciler(log *logrus.Entry, kubernetescli kubernetes.Interface, arocli
}
func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) {
for _, f := range []func(context.Context, ctrl.Request) (ctrl.Result, error){
r.reconcileConfiguration,
r.reconcilePVC, // TODO(mj): This should be removed once we don't have PVC anymore
} {
result, err := f(ctx, request)
if err != nil {
return result, err
}
}
return reconcile.Result{}, nil
}
func (r *Reconciler) reconcilePVC(ctx context.Context, request ctrl.Request) (ctrl.Result, error) {
pvcList, err := r.kubernetescli.CoreV1().PersistentVolumeClaims(monitoringName.Namespace).List(ctx, metav1.ListOptions{LabelSelector: prometheusLabels})
if err != nil {
return reconcile.Result{}, err
}
for _, pvc := range pvcList.Items {
err = r.kubernetescli.CoreV1().PersistentVolumeClaims(monitoringName.Namespace).Delete(ctx, pvc.Name, metav1.DeleteOptions{})
if err != nil {
return reconcile.Result{}, err
}
}
return reconcile.Result{}, nil
}
func (r *Reconciler) reconcileConfiguration(ctx context.Context, request ctrl.Request) (ctrl.Result, error) {
cm, isCreate, err := r.monitoringConfigMap(ctx)
if err != nil {
return reconcile.Result{}, err
}
if cm.Data == nil {
cm.Data = map[string]string{}
}
@ -90,19 +116,20 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.
}
changed := false
// we are disabling persistence. We use omitempty on the struct to
// clean the fields
// Nil out the fields we don't want set
if configData.AlertManagerMain.VolumeClaimTemplate != nil {
configData.AlertManagerMain.VolumeClaimTemplate = nil
changed = true
}
if configData.PrometheusK8s.Retention != "" {
configData.PrometheusK8s.Retention = ""
changed = true
}
if !reflect.DeepEqual(configData.PrometheusK8s.VolumeClaimTemplate, struct{ api.MissingFields }{}) {
configData.PrometheusK8s.VolumeClaimTemplate = struct{ api.MissingFields }{}
changed = true
}
if !reflect.DeepEqual(configData.AlertManagerMain.VolumeClaimTemplate, struct{ api.MissingFields }{}) {
configData.AlertManagerMain.VolumeClaimTemplate = struct{ api.MissingFields }{}
if configData.PrometheusK8s.VolumeClaimTemplate != nil {
configData.PrometheusK8s.VolumeClaimTemplate = nil
changed = true
}
@ -140,9 +167,7 @@ func (r *Reconciler) monitoringConfigMap(ctx context.Context) (*corev1.ConfigMap
Name: monitoringName.Name,
Namespace: monitoringName.Namespace,
},
Data: map[string]string{
"config.yaml": defaultConfig,
},
Data: nil,
}, true, nil
}
if err != nil {

Просмотреть файл

@ -5,6 +5,7 @@ package monitoring
import (
"context"
"reflect"
"strings"
"testing"
@ -12,8 +13,11 @@ import (
"github.com/ugorji/go/codec"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/fake"
ctrl "sigs.k8s.io/controller-runtime"
"github.com/Azure/ARO-RP/pkg/util/cmp"
)
var cmMetadata = metav1.ObjectMeta{Name: "cluster-monitoring-config", Namespace: "openshift-monitoring"}
@ -22,56 +26,28 @@ func TestReconcileMonitoringConfig(t *testing.T) {
log := logrus.NewEntry(logrus.StandardLogger())
type test struct {
name string
setConfigMap func() *Reconciler
kubernetescli kubernetes.Interface
wantConfig string
}
for _, tt := range []*test{
{
name: "ConfigMap does not exist - enable",
setConfigMap: func() *Reconciler {
return &Reconciler{
kubernetescli: fake.NewSimpleClientset(&corev1.ConfigMap{}),
log: log,
jsonHandle: new(codec.JsonHandle),
}
},
wantConfig: `
{}`,
},
{
name: "ConfigMap does not have data",
setConfigMap: func() *Reconciler {
return &Reconciler{
kubernetescli: fake.NewSimpleClientset(&corev1.ConfigMap{
ObjectMeta: cmMetadata,
}),
log: log,
jsonHandle: new(codec.JsonHandle),
}
},
wantConfig: ``,
kubernetescli: fake.NewSimpleClientset(),
wantConfig: `{}`,
},
{
name: "empty config.yaml",
setConfigMap: func() *Reconciler {
return &Reconciler{
kubernetescli: fake.NewSimpleClientset(&corev1.ConfigMap{
ObjectMeta: cmMetadata,
Data: map[string]string{
"config.yaml": ``,
},
}),
log: log,
jsonHandle: new(codec.JsonHandle),
}
},
wantConfig: ``,
},
{
name: "settings restored to default and extra fields are preserved",
setConfigMap: func() *Reconciler {
return &Reconciler{
kubernetescli: fake.NewSimpleClientset(&corev1.ConfigMap{
ObjectMeta: cmMetadata,
Data: map[string]string{
@ -102,21 +78,37 @@ alertmanagerMain:
`,
},
}),
log: log,
jsonHandle: new(codec.JsonHandle),
}
},
wantConfig: `
alertmanagerMain:
extraField: yeet
prometheusK8s:
extraField: prometheus
`,
},
{
name: "empty volumeClaimTemplate struct is cleared out",
kubernetescli: fake.NewSimpleClientset(&corev1.ConfigMap{
ObjectMeta: cmMetadata,
Data: map[string]string{
"config.yaml": `
alertmanagerMain:
volumeClaimTemplate: {}
extraField: alertmanager
prometheusK8s:
volumeClaimTemplate: {}
bugs: not-here
`,
},
}),
wantConfig: `
alertmanagerMain:
extraField: alertmanager
prometheusK8s:
bugs: not-here
`,
},
{
name: "other monitoring components are configured",
setConfigMap: func() *Reconciler {
return &Reconciler{
kubernetescli: fake.NewSimpleClientset(&corev1.ConfigMap{
ObjectMeta: cmMetadata,
Data: map[string]string{
@ -129,10 +121,6 @@ somethingElse:
`,
},
}),
log: log,
jsonHandle: new(codec.JsonHandle),
}
},
wantConfig: `
alertmanagerMain:
nodeSelector:
@ -143,8 +131,12 @@ somethingElse:
},
} {
t.Run(tt.name, func(t *testing.T) {
r := &Reconciler{
kubernetescli: tt.kubernetescli,
log: log,
jsonHandle: new(codec.JsonHandle),
}
ctx := context.Background()
r := tt.setConfigMap()
request := ctrl.Request{}
request.Name = "cluster-monitoring-config"
request.Namespace = "openshift-monitoring"
@ -165,3 +157,98 @@ somethingElse:
})
}
}
func TestReconcilePVC(t *testing.T) {
log := logrus.NewEntry(logrus.StandardLogger())
tests := []struct {
name string
kubernetescli kubernetes.Interface
want []corev1.PersistentVolumeClaim
}{
{
name: "Should delete the prometheus PVCs",
kubernetescli: fake.NewSimpleClientset(&corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "prometheus-k8s-db-prometheus-k8s-0",
Namespace: "openshift-monitoring",
Labels: map[string]string{
"app": "prometheus",
"prometheus": "k8s",
},
},
},
&corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "prometheus-k8s-db-prometheus-k8s-1",
Namespace: "openshift-monitoring",
Labels: map[string]string{
"app": "prometheus",
"prometheus": "k8s",
},
},
}),
want: nil,
},
{
name: "Should preserve 1 pvc",
kubernetescli: fake.NewSimpleClientset(&corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "prometheus-k8s-db-prometheus-k8s-0",
Namespace: "openshift-monitoring",
Labels: map[string]string{
"app": "prometheus",
"prometheus": "k8s",
},
},
},
&corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "random-pvc",
Namespace: "openshift-monitoring",
Labels: map[string]string{
"app": "random",
},
},
}),
want: []corev1.PersistentVolumeClaim{
{
ObjectMeta: metav1.ObjectMeta{
Name: "random-pvc",
Namespace: "openshift-monitoring",
Labels: map[string]string{
"app": "random",
},
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctx := context.Background()
r := &Reconciler{
log: log,
kubernetescli: tt.kubernetescli,
jsonHandle: new(codec.JsonHandle),
}
request := ctrl.Request{}
request.Name = "cluster-monitoring-config"
request.Namespace = "openshift-monitoring"
_, err := r.Reconcile(ctx, request)
if err != nil {
t.Fatal(err)
}
pvcList, err := r.kubernetescli.CoreV1().PersistentVolumeClaims(monitoringName.Namespace).List(context.Background(), metav1.ListOptions{})
if err != nil {
t.Fatalf("Unexpected error during list of PVCs: %v", err)
}
if !reflect.DeepEqual(pvcList.Items, tt.want) {
t.Error(cmp.Diff(pvcList.Items, tt.want))
}
})
}
}

Просмотреть файл

@ -1,100 +0,0 @@
package workaround
// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.
// Clean the remaining PVCs in openshift-monitoring namespace.
// These PVCs with labels: app=prometheus,prometheus=k8s are left
// behind after switching back to use emptydir as persistent storage
// for prometheus by disabling featureflag in monitoing controller.
// This workaround is in effect for all clusters set to
// have non-persistent prometheus.
// The cleanup loop removes only up to 2 PVCs as this is the
// production configuration at the time of the workaround release.
import (
"context"
"reflect"
"github.com/ghodss/yaml"
"github.com/sirupsen/logrus"
"github.com/ugorji/go/codec"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/util/retry"
"github.com/Azure/ARO-RP/pkg/api"
"github.com/Azure/ARO-RP/pkg/operator/controllers/monitoring"
"github.com/Azure/ARO-RP/pkg/util/version"
)
const (
prometheusLabels = "app=prometheus,prometheus=k8s"
monitoringName = "cluster-monitoring-config"
monitoringNamespace = "openshift-monitoring"
)
type cleanPromPVC struct {
log *logrus.Entry
cli kubernetes.Interface
}
func NewCleanFromPVCWorkaround(log *logrus.Entry, cli kubernetes.Interface) Workaround {
return &cleanPromPVC{
log: log,
cli: cli,
}
}
func (*cleanPromPVC) Name() string {
return "Clean prometheus PVC after disabling persistency"
}
func (c *cleanPromPVC) IsRequired(clusterVersion *version.Version) bool {
cm, err := c.cli.CoreV1().ConfigMaps(monitoringNamespace).Get(context.Background(), monitoringName, metav1.GetOptions{})
if err != nil {
return false
}
configDataJSON, err := yaml.YAMLToJSON([]byte(cm.Data["config.yaml"]))
if err != nil {
return false
}
var configData monitoring.Config
handle := new(codec.JsonHandle)
err = codec.NewDecoderBytes(configDataJSON, handle).Decode(&configData)
if err != nil {
return false
}
if configData.PrometheusK8s.Retention == "" && reflect.DeepEqual(configData.PrometheusK8s.VolumeClaimTemplate, struct{ api.MissingFields }{}) {
return true
}
return false
}
func (c *cleanPromPVC) Ensure(ctx context.Context) error {
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
pvcList, err := c.cli.CoreV1().PersistentVolumeClaims(monitoringNamespace).List(ctx, metav1.ListOptions{LabelSelector: prometheusLabels})
if err != nil {
return err
}
for _, pvc := range pvcList.Items {
err = c.cli.CoreV1().PersistentVolumeClaims(monitoringNamespace).Delete(ctx, pvc.Name, metav1.DeleteOptions{})
if err != nil {
return err
}
}
return nil
})
return err
}
func (c *cleanPromPVC) Remove(ctx context.Context) error {
return nil
}

Просмотреть файл

@ -1,150 +0,0 @@
package workaround
// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.
import (
"context"
"testing"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/fake"
utillog "github.com/Azure/ARO-RP/pkg/util/log"
)
func TestCleanPromPVCEnsure(t *testing.T) {
pvc0 := corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "prometheus-k8s-db-prometheus-k8s-00",
Namespace: "openshift-monitoring",
Labels: map[string]string{
"app": "prometheus",
"prometheus": "k8s",
},
},
}
pvc1 := corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "prometheus-k8s-db-prometheus-k8s-0",
Namespace: "openshift-monitoring",
Labels: map[string]string{
"app": "prometheus",
"prometheus": "k8s",
},
},
}
pvc2 := corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "prometheus-k8s-db-prometheus-k8s-1",
Namespace: "openshift-monitoring",
Labels: map[string]string{
"app": "prometheus",
"prometheus": "k8s",
},
},
}
pvc3 := corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "dummy",
Namespace: "openshift-monitoring",
},
}
tests := []struct {
name string
cli *fake.Clientset
wantPVCNum int
wantErr error
}{
{
name: "Should delete the prometheus PVCs",
cli: fake.NewSimpleClientset(&pvc1, &pvc2, &pvc3),
wantPVCNum: 1,
wantErr: nil,
},
{
name: "Should not delete the prometheus PVCs, too many items",
cli: fake.NewSimpleClientset(&pvc1, &pvc2, &pvc3, &pvc0),
wantPVCNum: 1,
wantErr: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
w := NewCleanFromPVCWorkaround(utillog.GetLogger(), tt.cli)
err := w.Ensure(context.Background())
if err != tt.wantErr {
t.Fatalf("Unexpected error\nwant: %v\ngot: %v", tt.wantErr, err)
}
pvcList, err := tt.cli.CoreV1().PersistentVolumeClaims(monitoringNamespace).List(context.Background(), metav1.ListOptions{})
if err != nil {
t.Fatalf("Unexpected error during list of PVCs: %v", err)
}
if len(pvcList.Items) != tt.wantPVCNum {
t.Fatalf("Unexpected number of PVCs\nwant: %d\ngot: %d", tt.wantPVCNum, len(pvcList.Items))
}
})
}
}
func TestCleanPromPVCIsRequired(t *testing.T) {
newKubernetesCli := func(config string) *fake.Clientset {
configMap := corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: monitoringName,
Namespace: monitoringNamespace,
},
Data: make(map[string]string),
}
configMap.Data["config.yaml"] = config
return fake.NewSimpleClientset(&configMap)
}
tests := []struct {
name string
kcli *fake.Clientset
wantRequired bool
}{
{
name: "Should not be required, persistent set true",
kcli: newKubernetesCli(`prometheusK8s:
retention: 15d
volumeClaimTemplate:
spec:
resources:
requests:
storage: 100Gi
`),
wantRequired: false,
},
{
name: "Should be required, persistent set to false",
kcli: newKubernetesCli(`prometheusK8s:
retention: ""
volumeClaimTemplate: {}
`),
wantRequired: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
w := NewCleanFromPVCWorkaround(utillog.GetLogger(), tt.kcli)
required := w.IsRequired(nil)
if required != tt.wantRequired {
t.Fatalf("Unexpected workaroud required result\nwant: %t\ngot: %t", tt.wantRequired, required)
}
})
}
}

Просмотреть файл

@ -44,7 +44,7 @@ func NewReconciler(log *logrus.Entry, kubernetescli kubernetes.Interface, config
configcli: configcli,
arocli: arocli,
restConfig: restConfig,
workarounds: []Workaround{NewSystemReserved(log, mcocli, dh), NewIfReload(log, kubernetescli), NewCleanFromPVCWorkaround(log, kubernetescli)},
workarounds: []Workaround{NewSystemReserved(log, mcocli, dh), NewIfReload(log, kubernetescli)},
log: log,
}
}

Просмотреть файл

@ -21,7 +21,6 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/util/retry"
"github.com/Azure/ARO-RP/pkg/api"
arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1"
"github.com/Azure/ARO-RP/pkg/operator/controllers/monitoring"
"github.com/Azure/ARO-RP/pkg/util/ready"
@ -269,8 +268,10 @@ var _ = Describe("ARO Operator - Cluster Monitoring ConfigMap", func() {
log.Warn(err)
}
Expect(configData.PrometheusK8s.Retention).To(Equal(""))
Expect(configData.PrometheusK8s.VolumeClaimTemplate).To(Equal(struct{ api.MissingFields }{}))
Expect(configData.PrometheusK8s.Retention).To(BeEmpty())
Expect(configData.PrometheusK8s.VolumeClaimTemplate).To(BeNil())
Expect(configData.AlertManagerMain.VolumeClaimTemplate).To(BeNil())
})
Specify("cluster monitoring configmap should be restored if deleted", func() {