From 7b7e99759706754550adb19e6b19a7afda1b7eca Mon Sep 17 00:00:00 2001 From: Jack Francis Date: Tue, 9 Oct 2018 12:57:59 -0700 Subject: [PATCH] E2E: retry kubectl delete job (#3981) --- test/e2e/kubernetes/job/job.go | 24 +++++++++++++++++------- test/e2e/kubernetes/kubernetes_test.go | 6 +++--- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/test/e2e/kubernetes/job/job.go b/test/e2e/kubernetes/job/job.go index 9a234bdaa..0f177c087 100644 --- a/test/e2e/kubernetes/job/job.go +++ b/test/e2e/kubernetes/job/job.go @@ -161,13 +161,23 @@ func (j *Job) WaitOnReady(sleep, duration time.Duration) (bool, error) { } // Delete will delete a Job in a given namespace -func (j *Job) Delete() error { - cmd := exec.Command("kubectl", "delete", "job", "-n", j.Metadata.Namespace, j.Metadata.Name) - util.PrintCommand(cmd) - out, err := cmd.CombinedOutput() - if err != nil { - log.Printf("Error while trying to delete Job %s in namespace %s:%s\n", j.Metadata.Namespace, j.Metadata.Name, string(out)) - return err +func (j *Job) Delete(retries int) error { + var kubectlOutput []byte + var kubectlError error + for i := 0; i < retries; i++ { + cmd := exec.Command("kubectl", "delete", "job", "-n", j.Metadata.Namespace, j.Metadata.Name) + util.PrintCommand(cmd) + kubectlOutput, kubectlError = cmd.CombinedOutput() + if kubectlError != nil { + log.Printf("Error while trying to delete Job %s in namespace %s:%s\n", j.Metadata.Namespace, j.Metadata.Name, string(kubectlOutput)) + continue + } + break } + + if kubectlError != nil { + return kubectlError + } + return nil } diff --git a/test/e2e/kubernetes/kubernetes_test.go b/test/e2e/kubernetes/kubernetes_test.go index 33a62e5a8..e9bb46c2e 100644 --- a/test/e2e/kubernetes/kubernetes_test.go +++ b/test/e2e/kubernetes/kubernetes_test.go @@ -265,7 +265,7 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu j, err := job.CreateJobFromFile(filepath.Join(WorkloadDir, "validate-dns.yaml"), "validate-dns", "default") Expect(err).NotTo(HaveOccurred()) ready, err := j.WaitOnReady(5*time.Second, cfg.Timeout) - delErr := j.Delete() + delErr := j.Delete(10) if delErr != nil { fmt.Printf("could not delete job %s\n", j.Metadata.Name) fmt.Println(delErr) @@ -824,7 +824,7 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu j, err := job.CreateJobFromFile(filepath.Join(WorkloadDir, "cuda-vector-add.yaml"), "cuda-vector-add", "default") Expect(err).NotTo(HaveOccurred()) ready, err := j.WaitOnReady(30*time.Second, cfg.Timeout) - delErr := j.Delete() + delErr := j.Delete(10) if delErr != nil { fmt.Printf("could not delete job %s\n", j.Metadata.Name) fmt.Println(delErr) @@ -835,7 +835,7 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu j, err := job.CreateJobFromFile(filepath.Join(WorkloadDir, "nvidia-smi.yaml"), "nvidia-smi", "default") Expect(err).NotTo(HaveOccurred()) ready, err := j.WaitOnReady(30*time.Second, cfg.Timeout) - delErr := j.Delete() + delErr := j.Delete(10) if delErr != nil { fmt.Printf("could not delete job %s\n", j.Metadata.Name) fmt.Println(delErr)