make ensureAROOperatorRunningDesiredVersion a condition (#2319)

A race condition is present in the current code, this PR
will fix it my modifying the existing action to a condition
with a timeout of 20 mins.
This commit is contained in:
Srinivas Atmakuri 2022-08-30 03:09:57 +05:30 коммит произвёл GitHub
Родитель e0c9352eb3
Коммит fa5ef9d222
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 301 добавлений и 50 удалений

Просмотреть файл

@ -50,7 +50,7 @@ func TestAdminUpdateSteps(t *testing.T) {
"[Action initializeOperatorDeployer-fm]",
"[Action ensureAROOperator-fm]",
"[Condition aroDeploymentReady-fm, timeout 20m0s]",
"[Action ensureAROOperatorRunningDesiredVersion-fm]",
"[Condition ensureAROOperatorRunningDesiredVersion-fm, timeout 5m0s]",
},
},
{
@ -89,7 +89,7 @@ func TestAdminUpdateSteps(t *testing.T) {
"[Action initializeOperatorDeployer-fm]",
"[Action ensureAROOperator-fm]",
"[Condition aroDeploymentReady-fm, timeout 20m0s]",
"[Action ensureAROOperatorRunningDesiredVersion-fm]",
"[Condition ensureAROOperatorRunningDesiredVersion-fm, timeout 5m0s]",
"[Action hiveCreateNamespace-fm]",
"[Action hiveEnsureResources-fm]",
"[Condition hiveClusterDeploymentReady-fm, timeout 5m0s]",
@ -133,7 +133,7 @@ func TestAdminUpdateSteps(t *testing.T) {
"[Action initializeOperatorDeployer-fm]",
"[Action ensureAROOperator-fm]",
"[Condition aroDeploymentReady-fm, timeout 20m0s]",
"[Action ensureAROOperatorRunningDesiredVersion-fm]",
"[Condition ensureAROOperatorRunningDesiredVersion-fm, timeout 5m0s]",
"[Action hiveCreateNamespace-fm]",
"[Action hiveEnsureResources-fm]",
"[Condition hiveClusterDeploymentReady-fm, timeout 5m0s]",

Просмотреть файл

@ -16,6 +16,7 @@ func (m *manager) isIngressProfileAvailable() bool {
func (m *manager) ensureAROOperator(ctx context.Context) error {
if !m.isIngressProfileAvailable() {
// If the ingress profile is not available, ARO operator update/deploy will fail.
m.log.Error("skip ensureAROOperator")
return nil
}
@ -29,23 +30,22 @@ func (m *manager) ensureAROOperator(ctx context.Context) error {
func (m *manager) aroDeploymentReady(ctx context.Context) (bool, error) {
if !m.isIngressProfileAvailable() {
// If the ingress profile is not available, ARO operator update/deploy will fail.
m.log.Error("skip aroDeploymentReady")
// skip and don't retry
return true, nil
}
return m.aroOperatorDeployer.IsReady(ctx)
}
func (m *manager) ensureAROOperatorRunningDesiredVersion(ctx context.Context) error {
func (m *manager) ensureAROOperatorRunningDesiredVersion(ctx context.Context) (bool, error) {
if !m.isIngressProfileAvailable() {
// If the ingress profile is not available, ARO operator update/deploy will fail.
m.log.Error("skip ensureAROOperatorRunningDesiredVersion")
return nil
return true, nil
}
err := m.aroOperatorDeployer.IsRunningDesiredVersion(ctx)
if err != nil {
m.log.Errorf("cannot ensureAROOperatorRunningDesiredVersion.IsRunningDesiredVersion: %s", err.Error())
ok, err := m.aroOperatorDeployer.IsRunningDesiredVersion(ctx)
if !ok || err != nil {
return false, err
}
return err
return true, nil
}

Просмотреть файл

@ -224,3 +224,99 @@ func TestAroDeploymentReady(t *testing.T) {
})
}
}
func TestEnsureAROOperatorRunningDesiredVersion(t *testing.T) {
ctx := context.Background()
const (
key = "/subscriptions/00000000-0000-0000-0000-000000000000/resourcegroups/resourceGroup/providers/Microsoft.RedHatOpenShift/openShiftClusters/resourceName1"
)
for _, tt := range []struct {
name string
doc *api.OpenShiftClusterDocument
mocks func(*mock_deploy.MockOperator)
wantRes bool
}{
{
name: "operator is runningDesiredVersion",
doc: &api.OpenShiftClusterDocument{
Key: strings.ToLower(key),
OpenShiftCluster: &api.OpenShiftCluster{
ID: key,
Properties: api.OpenShiftClusterProperties{
IngressProfiles: []api.IngressProfile{
{
Visibility: api.VisibilityPublic,
Name: "default",
},
},
},
},
},
mocks: func(dep *mock_deploy.MockOperator) {
dep.EXPECT().
IsRunningDesiredVersion(gomock.Any()).
Return(true, nil)
},
wantRes: true,
},
{
name: "operator is not runningDesiredVersion",
doc: &api.OpenShiftClusterDocument{
Key: strings.ToLower(key),
OpenShiftCluster: &api.OpenShiftCluster{
ID: key,
Properties: api.OpenShiftClusterProperties{
IngressProfiles: []api.IngressProfile{
{
Visibility: api.VisibilityPublic,
Name: "default",
},
},
},
},
},
mocks: func(dep *mock_deploy.MockOperator) {
dep.EXPECT().
IsRunningDesiredVersion(gomock.Any()).
Return(false, nil)
},
wantRes: false,
},
{
name: "enriched data not available - skip",
doc: &api.OpenShiftClusterDocument{
Key: strings.ToLower(key),
OpenShiftCluster: &api.OpenShiftCluster{
ID: key,
Properties: api.OpenShiftClusterProperties{
IngressProfiles: nil,
},
},
},
wantRes: true,
},
} {
t.Run(tt.name, func(t *testing.T) {
controller := gomock.NewController(t)
defer controller.Finish()
dep := mock_deploy.NewMockOperator(controller)
if tt.mocks != nil {
tt.mocks(dep)
}
m := &manager{
log: logrus.NewEntry(logrus.StandardLogger()),
doc: tt.doc,
aroOperatorDeployer: dep,
}
ok, err := m.ensureAROOperatorRunningDesiredVersion(ctx)
if err != nil || ok != tt.wantRes {
t.Error(err)
}
})
}
}

Просмотреть файл

@ -89,7 +89,7 @@ func (m *manager) adminUpdate() []steps.Step {
steps.Action(m.initializeOperatorDeployer), // depends on kube clients
steps.Action(m.ensureAROOperator),
steps.Condition(m.aroDeploymentReady, 20*time.Minute, true),
steps.Action(m.ensureAROOperatorRunningDesiredVersion),
steps.Condition(m.ensureAROOperatorRunningDesiredVersion, 5*time.Minute, true),
)
}

Просмотреть файл

@ -41,7 +41,6 @@ import (
"github.com/Azure/ARO-RP/pkg/util/restconfig"
"github.com/Azure/ARO-RP/pkg/util/subnet"
utiltls "github.com/Azure/ARO-RP/pkg/util/tls"
"github.com/Azure/ARO-RP/pkg/util/version"
)
//go:embed staticresources
@ -50,7 +49,7 @@ var embeddedFiles embed.FS
type Operator interface {
CreateOrUpdate(context.Context) error
IsReady(context.Context) (bool, error)
IsRunningDesiredVersion(context.Context) error
IsRunningDesiredVersion(context.Context) (bool, error)
}
type operator struct {
@ -378,53 +377,72 @@ func (o *operator) IsReady(ctx context.Context) (bool, error) {
return true, nil
}
func checkOperatorDeploymentVersion(ctx context.Context, cli appsv1client.DeploymentInterface, name string, gitCommit string) error {
func checkOperatorDeploymentVersion(ctx context.Context, cli appsv1client.DeploymentInterface, name string, desiredVersion string) (bool, error) {
d, err := cli.Get(ctx, name, metav1.GetOptions{})
if err != nil {
return err
switch {
case kerrors.IsNotFound(err):
return false, nil
case err != nil:
return false, err
}
if d.Labels["version"] != gitCommit {
return errors.New(name + " is not running the desired version: " + gitCommit)
if d.Labels["version"] != desiredVersion {
return false, nil
}
return nil
return true, nil
}
func checkPodImageVersion(ctx context.Context, cli corev1client.PodInterface, namespace string, gitCommit string) error {
podList, err := cli.List(ctx, metav1.ListOptions{LabelSelector: "app=" + namespace})
if err != nil {
return err
func checkPodImageVersion(ctx context.Context, cli corev1client.PodInterface, role string, desiredVersion string) (bool, error) {
podList, err := cli.List(ctx, metav1.ListOptions{LabelSelector: "app=" + role})
switch {
case kerrors.IsNotFound(err):
return false, nil
case err != nil:
return false, err
}
imageTag := "latest"
for _, pod := range podList.Items {
imageTag := strings.Split(pod.Spec.Containers[0].Image, ":")
if imageTag[len(imageTag)-1] != gitCommit {
return errors.New(pod.Name + " pod of namespace " + pod.Namespace + " is not running the desired version: " + gitCommit)
if strings.Contains(pod.Spec.Containers[0].Image, ":") {
str := strings.Split(pod.Spec.Containers[0].Image, ":")
imageTag = str[len(str)-1]
}
}
return nil
if imageTag != desiredVersion {
return false, nil
}
return true, nil
}
func (o *operator) IsRunningDesiredVersion(ctx context.Context) error {
// check if aro-operator-master is running desired version
err := checkOperatorDeploymentVersion(ctx, o.kubernetescli.AppsV1().Deployments(pkgoperator.Namespace), "aro-operator-master", version.GitCommit)
if err != nil {
return err
func (o *operator) IsRunningDesiredVersion(ctx context.Context) (bool, error) {
// Get the desired Version
image := o.env.AROOperatorImage()
desiredVersion := "latest"
if strings.Contains(image, ":") {
str := strings.Split(image, ":")
desiredVersion = str[len(str)-1]
}
err = checkPodImageVersion(ctx, o.kubernetescli.CoreV1().Pods(pkgoperator.Namespace), "aro-operator-master", version.GitCommit)
if err != nil {
return err
if o.oc.Properties.OperatorVersion != "" {
desiredVersion = o.oc.Properties.OperatorVersion
}
// check if aro-operator-worker is running desired version
err = checkOperatorDeploymentVersion(ctx, o.kubernetescli.AppsV1().Deployments(pkgoperator.Namespace), "aro-operator-worker", version.GitCommit)
if err != nil {
return err
// Check if aro-operator-master is running desired version
ok, err := checkOperatorDeploymentVersion(ctx, o.kubernetescli.AppsV1().Deployments(pkgoperator.Namespace), "aro-operator-master", desiredVersion)
if !ok || err != nil {
return ok, err
}
err = checkPodImageVersion(ctx, o.kubernetescli.CoreV1().Pods(pkgoperator.Namespace), "aro-operator-worker", version.GitCommit)
if err != nil {
return err
ok, err = checkPodImageVersion(ctx, o.kubernetescli.CoreV1().Pods(pkgoperator.Namespace), "aro-operator-master", desiredVersion)
if !ok || err != nil {
return ok, err
}
return nil
// Check if aro-operator-worker is running desired version
ok, err = checkOperatorDeploymentVersion(ctx, o.kubernetescli.AppsV1().Deployments(pkgoperator.Namespace), "aro-operator-worker", desiredVersion)
if !ok || err != nil {
return ok, err
}
ok, err = checkPodImageVersion(ctx, o.kubernetescli.CoreV1().Pods(pkgoperator.Namespace), "aro-operator-worker", desiredVersion)
if !ok || err != nil {
return ok, err
}
return true, nil
}
func checkIngressIP(ingressProfiles []api.IngressProfile) (string, error) {

Просмотреть файл

@ -4,12 +4,16 @@ package deploy
// Licensed under the Apache License 2.0.
import (
"context"
"errors"
"reflect"
"testing"
"github.com/golang/mock/gomock"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/fake"
"github.com/Azure/ARO-RP/pkg/api"
"github.com/Azure/ARO-RP/pkg/util/cmp"
@ -280,3 +284,135 @@ func TestOperatorVersion(t *testing.T) {
})
}
}
func TestCheckOperatorDeploymentVersion(t *testing.T) {
ctx := context.Background()
for _, tt := range []struct {
name string
deployment *appsv1.Deployment
desiredVersion string
want bool
wantErr error
}{
{
name: "arooperator deployment has correct version",
deployment: &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "arooperator-deploy",
Namespace: "openshift-azure-operator",
Labels: map[string]string{
"version": "abcde",
},
},
},
desiredVersion: "abcde",
want: true,
wantErr: nil,
},
{
name: "arooperator deployment has incorrect version",
deployment: &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "arooperator-deploy",
Namespace: "openshift-azure-operator",
Labels: map[string]string{
"version": "unknown",
},
},
},
desiredVersion: "abcde",
want: false,
wantErr: nil,
},
} {
t.Run(tt.name, func(t *testing.T) {
clientset := fake.NewSimpleClientset()
_, err := clientset.AppsV1().Deployments("openshift-azure-operator").Create(ctx, tt.deployment, metav1.CreateOptions{})
if err != nil {
t.Fatalf("error creating deployment: %v", err)
}
got, err := checkOperatorDeploymentVersion(ctx, clientset.AppsV1().Deployments("openshift-azure-operator"), tt.deployment.Name, tt.desiredVersion)
if err != nil && err.Error() != tt.wantErr.Error() ||
err == nil && tt.wantErr != nil {
t.Error(err)
}
if tt.want != got {
t.Fatalf("error with CheckOperatorDeploymentVersion test %s: got %v wanted %v", tt.name, got, tt.want)
}
})
}
}
func TestCheckPodImageVersion(t *testing.T) {
ctx := context.Background()
for _, tt := range []struct {
name string
pod *corev1.Pod
desiredVersion string
want bool
wantErr error
}{
{
name: "arooperator pod has correct image version",
pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "arooperator-pod",
Namespace: "openshift-azure-operator",
Labels: map[string]string{
"app": "arooperator-pod",
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Image: "random-image:abcde",
},
},
},
},
desiredVersion: "abcde",
want: true,
wantErr: nil,
},
{
name: "arooperator pod has incorrect image version",
pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "arooperator-pod",
Namespace: "openshift-azure-operator",
Labels: map[string]string{
"app": "arooperator-pod",
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Image: "random-image:unknown",
},
},
},
},
desiredVersion: "abcde",
want: false,
wantErr: nil,
},
} {
t.Run(tt.name, func(t *testing.T) {
clientset := fake.NewSimpleClientset()
_, err := clientset.CoreV1().Pods("openshift-azure-operator").Create(ctx, tt.pod, metav1.CreateOptions{})
if err != nil {
t.Fatalf("error creating pod: %v", err)
}
got, err := checkPodImageVersion(ctx, clientset.CoreV1().Pods("openshift-azure-operator"), tt.pod.Name, tt.desiredVersion)
if err != nil && err.Error() != tt.wantErr.Error() ||
err == nil && tt.wantErr != nil {
t.Error(err)
}
if tt.want != got {
t.Fatalf("error with CheckPodImageVersion test %s: got %v wanted %v", tt.name, got, tt.want)
}
})
}
}

Просмотреть файл

@ -64,11 +64,12 @@ func (mr *MockOperatorMockRecorder) IsReady(arg0 interface{}) *gomock.Call {
}
// IsRunningDesiredVersion mocks base method.
func (m *MockOperator) IsRunningDesiredVersion(arg0 context.Context) error {
func (m *MockOperator) IsRunningDesiredVersion(arg0 context.Context) (bool, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "IsRunningDesiredVersion", arg0)
ret0, _ := ret[0].(error)
return ret0
ret0, _ := ret[0].(bool)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// IsRunningDesiredVersion indicates an expected call of IsRunningDesiredVersion.