fix the nil crash and ignore ordinal in compare resources

This commit is contained in:
Ryan Zhang 2022-09-06 15:30:35 -07:00
Родитель 0e82fabab7
Коммит 549847da7e
8 изменённых файлов: 297 добавлений и 76 удалений

Просмотреть файл

@ -31,6 +31,7 @@ import (
"k8s.io/client-go/tools/clientcmd"
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/work-api/pkg/apis/v1alpha1"
"sigs.k8s.io/work-api/pkg/controllers"
"sigs.k8s.io/work-api/version"

Просмотреть файл

@ -37,10 +37,12 @@ import (
func (r *ApplyWorkReconciler) generateDiff(ctx context.Context, work *workapi.Work, appliedWork *workapi.AppliedWork) ([]workapi.AppliedResourceMeta, []workapi.AppliedResourceMeta, error) {
var staleRes, newRes []workapi.AppliedResourceMeta
// for every resource applied in cluster, check if it's still in the work's manifest condition
// we keep the applied resource in the appliedWork status even if it is not applied successfully
// to make sure that it is safe to delete the resource from the member cluster.
for _, resourceMeta := range appliedWork.Status.AppliedResources {
resStillExist := false
for _, manifestCond := range work.Status.ManifestConditions {
if resourceMeta.ResourceIdentifier == manifestCond.Identifier {
if isSameResourceIdentifier(resourceMeta.ResourceIdentifier, manifestCond.Identifier) {
resStillExist = true
break
}
@ -62,17 +64,21 @@ func (r *ApplyWorkReconciler) generateDiff(ctx context.Context, work *workapi.Wo
// we only add the applied one to the appliedWork status
if ac.Status == metav1.ConditionTrue {
resRecorded := false
// we keep the existing resourceMeta since it has the UID
// we update the identifier
// TODO: this UID may not be the current one if the resource is deleted and recreated
for _, resourceMeta := range appliedWork.Status.AppliedResources {
if resourceMeta.ResourceIdentifier == manifestCond.Identifier {
if isSameResourceIdentifier(resourceMeta.ResourceIdentifier, manifestCond.Identifier) {
resRecorded = true
newRes = append(newRes, resourceMeta)
newRes = append(newRes, workapi.AppliedResourceMeta{
ResourceIdentifier: manifestCond.Identifier,
UID: resourceMeta.UID,
})
break
}
}
if !resRecorded {
klog.V(5).InfoS("discovered a new resource",
"parent Work", work.GetName(), "discovered resource", manifestCond.Identifier)
klog.V(5).InfoS("discovered a new manifest resource",
"parent Work", work.GetName(), "manifest", manifestCond.Identifier)
obj, err := r.spokeDynamicClient.Resource(schema.GroupVersionResource{
Group: manifestCond.Identifier.Group,
Version: manifestCond.Identifier.Version,
@ -80,10 +86,10 @@ func (r *ApplyWorkReconciler) generateDiff(ctx context.Context, work *workapi.Wo
}).Namespace(manifestCond.Identifier.Namespace).Get(ctx, manifestCond.Identifier.Name, metav1.GetOptions{})
switch {
case apierrors.IsNotFound(err):
klog.V(4).InfoS("the manifest resource is deleted", "manifest", manifestCond.Identifier)
klog.V(4).InfoS("the new manifest resource is already deleted", "parent Work", work.GetName(), "manifest", manifestCond.Identifier)
continue
case err != nil:
klog.ErrorS(err, "failed to retrieve the manifest", "manifest", manifestCond.Identifier)
klog.ErrorS(err, "failed to retrieve the manifest", "parent Work", work.GetName(), "manifest", manifestCond.Identifier)
return nil, nil, err
}
newRes = append(newRes, workapi.AppliedResourceMeta{
@ -107,6 +113,16 @@ func (r *ApplyWorkReconciler) deleteStaleManifest(ctx context.Context, staleMani
}
uObj, err := r.spokeDynamicClient.Resource(gvr).Namespace(staleManifest.Namespace).
Get(ctx, staleManifest.Name, metav1.GetOptions{})
if err != nil {
// It is possible that the staled manifest was already deleted but the status wasn't updated to reflect that yet.
if apierrors.IsNotFound(err) {
klog.V(2).InfoS("the staled manifest already deleted", "manifest", staleManifest, "owner", owner)
continue
}
klog.ErrorS(err, "failed to get the staled manifest", "manifest", staleManifest, "owner", owner)
errs = append(errs, err)
continue
}
existingOwners := uObj.GetOwnerReferences()
newOwners := make([]metav1.OwnerReference, 0)
found := false
@ -118,12 +134,12 @@ func (r *ApplyWorkReconciler) deleteStaleManifest(ctx context.Context, staleMani
}
}
if !found {
klog.ErrorS(err, "the stale manifest is not owned by this work, skip", "manifest", staleManifest, "owner", owner)
klog.V(4).InfoS("the stale manifest is not owned by this work, skip", "manifest", staleManifest, "owner", owner)
continue
}
if len(newOwners) == 0 {
klog.V(2).InfoS("delete the staled manifest", "manifest", staleManifest, "owner", owner)
err := r.spokeDynamicClient.Resource(gvr).Namespace(staleManifest.Namespace).
err = r.spokeDynamicClient.Resource(gvr).Namespace(staleManifest.Namespace).
Delete(ctx, staleManifest.Name, metav1.DeleteOptions{})
if err != nil && !apierrors.IsNotFound(err) {
klog.ErrorS(err, "failed to delete the staled manifest", "manifest", staleManifest, "owner", owner)
@ -132,7 +148,7 @@ func (r *ApplyWorkReconciler) deleteStaleManifest(ctx context.Context, staleMani
} else {
klog.V(2).InfoS("remove the owner reference from the staled manifest", "manifest", staleManifest, "owner", owner)
uObj.SetOwnerReferences(newOwners)
_, err := r.spokeDynamicClient.Resource(gvr).Namespace(staleManifest.Namespace).Update(ctx, uObj, metav1.UpdateOptions{FieldManager: workFieldManagerName})
_, err = r.spokeDynamicClient.Resource(gvr).Namespace(staleManifest.Namespace).Update(ctx, uObj, metav1.UpdateOptions{FieldManager: workFieldManagerName})
if err != nil {
klog.ErrorS(err, "failed to remove the owner reference from manifest", "manifest", staleManifest, "owner", owner)
errs = append(errs, err)
@ -141,3 +157,9 @@ func (r *ApplyWorkReconciler) deleteStaleManifest(ctx context.Context, staleMani
}
return utilerrors.NewAggregate(errs)
}
// isSameResourceIdentifier returns true if a and b identifies the same object.
func isSameResourceIdentifier(a, b workapi.ResourceIdentifier) bool {
// compare GVKNN but ignore the Ordinal and Resource
return a.Group == b.Group && a.Version == b.Version && a.Kind == b.Kind && a.Namespace == b.Namespace && a.Name == b.Name
}

Просмотреть файл

@ -18,12 +18,21 @@ package controllers
import (
"context"
"fmt"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/rand"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/dynamic/fake"
testingclient "k8s.io/client-go/testing"
"sigs.k8s.io/work-api/pkg/apis/v1alpha1"
)
@ -32,57 +41,162 @@ import (
// The result of the tests pass back a collection of resources that should either
// be applied to the member cluster or removed.
func TestCalculateNewAppliedWork(t *testing.T) {
identifier := generateResourceIdentifier()
inputWork := generateWorkObj(nil)
inputWorkWithResourceIdentifier := generateWorkObj(&identifier)
inputAppliedWork := generateAppliedWorkObj(nil)
inputAppliedWorkWithResourceIdentifier := generateAppliedWorkObj(&identifier)
workIdentifier := generateResourceIdentifier()
diffOrdinalIdentifier := workIdentifier
diffOrdinalIdentifier.Ordinal = rand.Int()
tests := map[string]struct {
r ApplyWorkReconciler
inputWork v1alpha1.Work
inputAppliedWork v1alpha1.AppliedWork
expectedNewRes []v1alpha1.AppliedResourceMeta
expectedStaleRes []v1alpha1.AppliedResourceMeta
hasErr bool
spokeDynamicClient dynamic.Interface
inputWork v1alpha1.Work
inputAppliedWork v1alpha1.AppliedWork
expectedNewRes []v1alpha1.AppliedResourceMeta
expectedStaleRes []v1alpha1.AppliedResourceMeta
hasErr bool
}{
"AppliedWork and Work has been garbage collected; AppliedWork and Work of a resource both does not exist": {
r: ApplyWorkReconciler{},
inputWork: inputWork,
inputAppliedWork: inputAppliedWork,
expectedNewRes: []v1alpha1.AppliedResourceMeta(nil),
expectedStaleRes: []v1alpha1.AppliedResourceMeta(nil),
"Test work and appliedWork in sync with no manifest applied": {
spokeDynamicClient: nil,
inputWork: generateWorkObj(nil),
inputAppliedWork: generateAppliedWorkObj(nil),
expectedNewRes: []v1alpha1.AppliedResourceMeta(nil),
expectedStaleRes: []v1alpha1.AppliedResourceMeta(nil),
hasErr: false,
},
"AppliedWork and Work of a resource exists; there are nothing being deleted": {
r: ApplyWorkReconciler{joined: true},
inputWork: inputWorkWithResourceIdentifier,
inputAppliedWork: inputAppliedWorkWithResourceIdentifier,
"Test work and appliedWork in sync with one manifest applied": {
spokeDynamicClient: nil,
inputWork: generateWorkObj(&workIdentifier),
inputAppliedWork: generateAppliedWorkObj(&workIdentifier),
expectedNewRes: []v1alpha1.AppliedResourceMeta{
{
ResourceIdentifier: inputAppliedWorkWithResourceIdentifier.Status.AppliedResources[0].ResourceIdentifier,
UID: inputAppliedWorkWithResourceIdentifier.Status.AppliedResources[0].UID,
ResourceIdentifier: workIdentifier,
},
},
expectedStaleRes: []v1alpha1.AppliedResourceMeta(nil),
hasErr: false,
},
"Work resource has been deleted, but the corresponding AppliedWork remains": {
r: ApplyWorkReconciler{joined: true},
inputWork: inputWork,
inputAppliedWork: inputAppliedWorkWithResourceIdentifier,
expectedNewRes: []v1alpha1.AppliedResourceMeta(nil),
expectedStaleRes: []v1alpha1.AppliedResourceMeta{
"Test work and appliedWork has the same resource but with different ordinal": {
spokeDynamicClient: nil,
inputWork: generateWorkObj(&workIdentifier),
inputAppliedWork: generateAppliedWorkObj(&diffOrdinalIdentifier),
expectedNewRes: []v1alpha1.AppliedResourceMeta{
{
ResourceIdentifier: inputAppliedWorkWithResourceIdentifier.Status.AppliedResources[0].ResourceIdentifier,
UID: inputAppliedWorkWithResourceIdentifier.Status.AppliedResources[0].UID,
ResourceIdentifier: workIdentifier,
},
},
expectedStaleRes: []v1alpha1.AppliedResourceMeta(nil),
hasErr: false,
},
"Test work is missing one manifest": {
spokeDynamicClient: nil,
inputWork: generateWorkObj(nil),
inputAppliedWork: generateAppliedWorkObj(&workIdentifier),
expectedNewRes: []v1alpha1.AppliedResourceMeta(nil),
expectedStaleRes: []v1alpha1.AppliedResourceMeta{
{
ResourceIdentifier: workIdentifier,
},
},
hasErr: false,
},
"Test work has more manifest but not applied": {
spokeDynamicClient: nil,
inputWork: func() v1alpha1.Work {
return v1alpha1.Work{
Status: v1alpha1.WorkStatus{
ManifestConditions: []v1alpha1.ManifestCondition{
{
Identifier: workIdentifier,
Conditions: []metav1.Condition{
{
Type: ConditionTypeApplied,
Status: metav1.ConditionFalse,
},
},
},
},
},
}
}(),
inputAppliedWork: generateAppliedWorkObj(nil),
expectedNewRes: []v1alpha1.AppliedResourceMeta(nil),
expectedStaleRes: []v1alpha1.AppliedResourceMeta(nil),
hasErr: false,
},
"Test work is adding one manifest, happy case": {
spokeDynamicClient: func() *fake.FakeDynamicClient {
uObj := unstructured.Unstructured{}
uObj.SetUID(types.UID(rand.String(10)))
dynamicClient := fake.NewSimpleDynamicClient(runtime.NewScheme())
dynamicClient.PrependReactor("get", "*", func(action testingclient.Action) (handled bool, ret runtime.Object, err error) {
return true, uObj.DeepCopy(), nil
})
return dynamicClient
}(),
inputWork: generateWorkObj(&workIdentifier),
inputAppliedWork: generateAppliedWorkObj(nil),
expectedNewRes: []v1alpha1.AppliedResourceMeta{
{
ResourceIdentifier: workIdentifier,
},
},
expectedStaleRes: []v1alpha1.AppliedResourceMeta(nil),
hasErr: false,
},
"Test work is adding one manifest but not found on the member cluster": {
spokeDynamicClient: func() *fake.FakeDynamicClient {
dynamicClient := fake.NewSimpleDynamicClient(runtime.NewScheme())
dynamicClient.PrependReactor("get", "*", func(action testingclient.Action) (handled bool, ret runtime.Object, err error) {
return true, nil, &apierrors.StatusError{
ErrStatus: metav1.Status{
Status: metav1.StatusFailure,
Reason: metav1.StatusReasonNotFound,
}}
})
return dynamicClient
}(),
inputWork: generateWorkObj(&workIdentifier),
inputAppliedWork: generateAppliedWorkObj(nil),
expectedNewRes: []v1alpha1.AppliedResourceMeta(nil),
expectedStaleRes: []v1alpha1.AppliedResourceMeta(nil),
hasErr: false,
},
"Test work is adding one manifest but failed to get it on the member cluster": {
spokeDynamicClient: func() *fake.FakeDynamicClient {
dynamicClient := fake.NewSimpleDynamicClient(runtime.NewScheme())
dynamicClient.PrependReactor("get", "*", func(action testingclient.Action) (handled bool, ret runtime.Object, err error) {
return true, nil, fmt.Errorf("get failed")
})
return dynamicClient
}(),
inputWork: generateWorkObj(&workIdentifier),
inputAppliedWork: generateAppliedWorkObj(nil),
expectedNewRes: nil,
expectedStaleRes: nil,
hasErr: true,
},
}
for testName, tt := range tests {
t.Run(testName, func(t *testing.T) {
newRes, staleRes, err := tt.r.generateDiff(context.Background(), &tt.inputWork, &tt.inputAppliedWork)
assert.Equalf(t, tt.expectedNewRes, newRes, "Testcase %s: NewRes is different from what it should be.", testName)
assert.Equalf(t, tt.expectedStaleRes, staleRes, "Testcase %s: StaleRes is different from what it should be.", testName)
r := &ApplyWorkReconciler{
spokeDynamicClient: tt.spokeDynamicClient,
}
newRes, staleRes, err := r.generateDiff(context.Background(), &tt.inputWork, &tt.inputAppliedWork)
if len(tt.expectedNewRes) != len(newRes) {
t.Errorf("Testcase %s: get newRes contains different number of elements than the expected newRes.", testName)
}
for i := 0; i < len(newRes); i++ {
diff := cmp.Diff(tt.expectedNewRes[i].ResourceIdentifier, newRes[i].ResourceIdentifier)
if len(diff) != 0 {
t.Errorf("Testcase %s: get newRes is different from the expected newRes, diff = %s", testName, diff)
}
}
if len(tt.expectedStaleRes) != len(staleRes) {
t.Errorf("Testcase %s: get staleRes contains different number of elements than the expected staleRes.", testName)
}
for i := 0; i < len(staleRes); i++ {
diff := cmp.Diff(tt.expectedStaleRes[i].ResourceIdentifier, staleRes[i].ResourceIdentifier)
if len(diff) != 0 {
t.Errorf("Testcase %s: get staleRes is different from the expected staleRes, diff = %s", testName, diff)
}
}
if tt.hasErr {
assert.Truef(t, err != nil, "Testcase %s: Should get an err.", testName)
}
@ -90,6 +204,109 @@ func TestCalculateNewAppliedWork(t *testing.T) {
}
}
func TestDeleteStaleManifest(t *testing.T) {
tests := map[string]struct {
spokeDynamicClient dynamic.Interface
staleManifests []v1alpha1.AppliedResourceMeta
owner metav1.OwnerReference
wantErr error
}{
"test staled manifests already deleted": {
spokeDynamicClient: func() *fake.FakeDynamicClient {
dynamicClient := fake.NewSimpleDynamicClient(runtime.NewScheme())
dynamicClient.PrependReactor("get", "*", func(action testingclient.Action) (handled bool, ret runtime.Object, err error) {
return true, nil, &apierrors.StatusError{
ErrStatus: metav1.Status{
Status: metav1.StatusFailure,
Reason: metav1.StatusReasonNotFound,
}}
})
return dynamicClient
}(),
staleManifests: []v1alpha1.AppliedResourceMeta{
{
ResourceIdentifier: v1alpha1.ResourceIdentifier{
Name: "does not matter 1",
},
},
{
ResourceIdentifier: v1alpha1.ResourceIdentifier{
Name: "does not matter 2",
},
},
},
owner: metav1.OwnerReference{
APIVersion: "does not matter",
},
wantErr: nil,
},
"test failed to get staled manifest": {
spokeDynamicClient: func() *fake.FakeDynamicClient {
dynamicClient := fake.NewSimpleDynamicClient(runtime.NewScheme())
dynamicClient.PrependReactor("get", "*", func(action testingclient.Action) (handled bool, ret runtime.Object, err error) {
return true, nil, fmt.Errorf("get failed")
})
return dynamicClient
}(),
staleManifests: []v1alpha1.AppliedResourceMeta{
{
ResourceIdentifier: v1alpha1.ResourceIdentifier{
Name: "does not matter",
},
},
},
owner: metav1.OwnerReference{
APIVersion: "does not matter",
},
wantErr: utilerrors.NewAggregate([]error{fmt.Errorf("get failed")}),
},
"test not remove a staled manifest that work does not own": {
spokeDynamicClient: func() *fake.FakeDynamicClient {
uObj := unstructured.Unstructured{}
uObj.SetOwnerReferences([]metav1.OwnerReference{
{
APIVersion: "not owned by work",
},
})
dynamicClient := fake.NewSimpleDynamicClient(runtime.NewScheme())
dynamicClient.PrependReactor("get", "*", func(action testingclient.Action) (handled bool, ret runtime.Object, err error) {
return true, uObj.DeepCopy(), nil
})
dynamicClient.PrependReactor("delete", "*", func(action testingclient.Action) (handled bool, ret runtime.Object, err error) {
return true, nil, fmt.Errorf("should not call")
})
return dynamicClient
}(),
staleManifests: []v1alpha1.AppliedResourceMeta{
{
ResourceIdentifier: v1alpha1.ResourceIdentifier{
Name: "does not matter",
},
},
},
owner: metav1.OwnerReference{
APIVersion: "does not match",
},
wantErr: nil,
},
}
for name, tt := range tests {
t.Run(name, func(t *testing.T) {
r := &ApplyWorkReconciler{
spokeDynamicClient: tt.spokeDynamicClient,
}
gotErr := r.deleteStaleManifest(context.Background(), tt.staleManifests, tt.owner)
if tt.wantErr == nil {
if gotErr != nil {
t.Errorf("test case `%s` didn't return the exepected error, want no error, got error = %+v ", name, gotErr)
}
} else if gotErr == nil || gotErr.Error() != tt.wantErr.Error() {
t.Errorf("test case `%s` didn't return the exepected error, want error = %+v, got error = %+v", name, tt.wantErr, gotErr)
}
})
}
}
func generateWorkObj(identifier *v1alpha1.ResourceIdentifier) v1alpha1.Work {
if identifier != nil {
return v1alpha1.Work{

Просмотреть файл

@ -169,7 +169,7 @@ func (r *ApplyWorkReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
}
// we periodically reconcile the work to make sure the member cluster state is in sync with the work
// if the reconcile succeeds
// even if the reconciling succeeds in case the resources on the member cluster is removed/changed.
return ctrl.Result{RequeueAfter: time.Minute * 5}, err
}

Просмотреть файл

@ -66,5 +66,5 @@ func isReferSameObject(a, b metav1.OwnerReference) bool {
return false
}
return aGV.Group == bGV.Group && a.Kind == b.Kind && a.Name == b.Name
return aGV.Group == bGV.Group && aGV.Version == bGV.Version && a.Kind == b.Kind && a.Name == b.Name
}

Просмотреть файл

@ -17,8 +17,6 @@ limitations under the License.
package utils
import (
"github.com/onsi/gomega/format"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/client-go/tools/record"
)
@ -28,26 +26,3 @@ func NewFakeRecorder(bufferSize int) *record.FakeRecorder {
recorder.IncludeObject = true
return recorder
}
// AlreadyExistMatcher matches the error to be already exist
type AlreadyExistMatcher struct {
}
// Match matches error.
func (matcher AlreadyExistMatcher) Match(actual interface{}) (success bool, err error) {
if actual == nil {
return false, nil
}
actualError := actual.(error)
return apierrors.IsAlreadyExists(actualError), nil
}
// FailureMessage builds an error message.
func (matcher AlreadyExistMatcher) FailureMessage(actual interface{}) (message string) {
return format.Message(actual, "to be already exist")
}
// NegatedFailureMessage builds an error message.
func (matcher AlreadyExistMatcher) NegatedFailureMessage(actual interface{}) (message string) {
return format.Message(actual, "not to be already exist")
}

Просмотреть файл

@ -1 +1,5 @@
work_creation.py creates an example work 'n' number of times. The correct usage is: python3 work_creation.py n where n is the number of works to be created.
work_creation.py creates an example work 'n' number of times. The correct usage is: python3 work_creation.py n where n is the number of works to be created.
../fleet/hack/tools/bin/goimports-latest -local sigs.k8s.io/work-api -w $(go list -f {{.Dir}} ./...)
../fleet/hack/tools/bin/staticcheck ./...

Просмотреть файл

@ -18,6 +18,9 @@ package e2e
import (
"embed"
"os"
"testing"
"github.com/onsi/ginkgo"
"github.com/onsi/gomega"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
@ -31,13 +34,12 @@ import (
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
"os"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/apiutil"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/work-api/pkg/apis/v1alpha1"
"testing"
)
var (