Skip to content

Commit 5a6a4df

Browse files
authored
[Test] Fixing SubmittedFinsihedTimeout e2e test for K8s v1.33+ clusters by setting JobSuccessCriteriaMet in addition to JobComplete (#4428)
* Fixing SubmittedFinsihedTimeout e2e test for K8s v1.33+ clusters by setting JobSuccessCriteriaMet in addition to JobComplete Signed-off-by: Mark Rossett <marosset@microsoft.com> * Also set JobSuccessCriteriaMet when setting JobCoplete in controller unit tests Signed-off-by: Mark Rossett <marosset@microsoft.com> --------- Signed-off-by: Mark Rossett <marosset@microsoft.com>
1 parent 9bf0ac6 commit 5a6a4df

File tree

3 files changed

+60
-153
lines changed

3 files changed

+60
-153
lines changed

ray-operator/controllers/ray/raycluster_controller_unit_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2854,7 +2854,7 @@ func Test_RedisCleanup(t *testing.T) {
28542854
// Simulate the Job succeeded.
28552855
job := jobList.Items[0]
28562856
job.Status.Succeeded = 1
2857-
job.Status.Conditions = []batchv1.JobCondition{{Type: batchv1.JobComplete, Status: corev1.ConditionTrue}}
2857+
job.Status.Conditions = []batchv1.JobCondition{{Type: batchv1.JobSuccessCriteriaMet, Status: corev1.ConditionTrue}, {Type: batchv1.JobComplete, Status: corev1.ConditionTrue}}
28582858
err = fakeClient.Status().Update(ctx, &job)
28592859
require.NoError(t, err, "Fail to update Job status")
28602860

ray-operator/controllers/ray/rayjob_controller_test.go

Lines changed: 39 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,21 @@ func rayJobTemplate(name string, namespace string) *rayv1.RayJob {
113113
}
114114
}
115115

116+
// updateK8sJobToComplete updates a Kubernetes Job status to mark it as complete.
117+
// This sets conditions (JobSuccessCriteriaMet and JobComplete) and
118+
// timestamps (StartTime, CompletionTime) required by Kubernetes 1.33+.
119+
func updateK8sJobToComplete(ctx context.Context, job *batchv1.Job) {
120+
startTime := metav1.Now()
121+
completionTime := metav1.NewTime(startTime.Add(time.Second))
122+
job.Status.Conditions = []batchv1.JobCondition{
123+
{Type: batchv1.JobSuccessCriteriaMet, Status: corev1.ConditionTrue, LastTransitionTime: completionTime},
124+
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: completionTime},
125+
}
126+
job.Status.StartTime = &startTime
127+
job.Status.CompletionTime = &completionTime
128+
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
129+
}
130+
116131
var _ = Context("RayJob with different submission modes", func() {
117132
Context("RayJob in K8sJobMode", func() {
118133
Describe("RayJob SubmitterConfig BackoffLimit", Ordered, func() {
@@ -316,12 +331,7 @@ var _ = Context("RayJob with different submission modes", func() {
316331
err := k8sClient.Get(ctx, namespacedName, job)
317332
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
318333

319-
// Update the submitter Kubernetes Job to Complete.
320-
conditions := []batchv1.JobCondition{
321-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
322-
}
323-
job.Status.Conditions = conditions
324-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
334+
updateK8sJobToComplete(ctx, job)
325335

326336
// RayJob transitions to Complete.
327337
Eventually(
@@ -541,12 +551,7 @@ var _ = Context("RayJob with different submission modes", func() {
541551
err := k8sClient.Get(ctx, namespacedName, job)
542552
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
543553

544-
// Update the submitter Kubernetes Job to Complete.
545-
conditions := []batchv1.JobCondition{
546-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
547-
}
548-
job.Status.Conditions = conditions
549-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
554+
updateK8sJobToComplete(ctx, job)
550555

551556
// RayJob transitions to Complete.
552557
Eventually(
@@ -706,12 +711,7 @@ var _ = Context("RayJob with different submission modes", func() {
706711
err := k8sClient.Get(ctx, namespacedName, job)
707712
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
708713

709-
// Update the submitter Kubernetes Job to Complete.
710-
conditions := []batchv1.JobCondition{
711-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
712-
}
713-
job.Status.Conditions = conditions
714-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
714+
updateK8sJobToComplete(ctx, job)
715715

716716
// record the current cluster name
717717
oldClusterName := rayJob.Status.RayClusterName
@@ -801,12 +801,7 @@ var _ = Context("RayJob with different submission modes", func() {
801801
err := k8sClient.Get(ctx, namespacedName, job)
802802
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
803803

804-
// Update the submitter Kubernetes Job to Complete.
805-
conditions := []batchv1.JobCondition{
806-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
807-
}
808-
job.Status.Conditions = conditions
809-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
804+
updateK8sJobToComplete(ctx, job)
810805

811806
// RayJob transitions from Running -> Complete
812807
Eventually(
@@ -1028,12 +1023,7 @@ var _ = Context("RayJob with different submission modes", func() {
10281023
err := k8sClient.Get(ctx, namespacedName, job)
10291024
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
10301025

1031-
// Update the submitter Kubernetes Job to Complete.
1032-
conditions := []batchv1.JobCondition{
1033-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
1034-
}
1035-
job.Status.Conditions = conditions
1036-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
1026+
updateK8sJobToComplete(ctx, job)
10371027

10381028
// RayJob transitions to Complete.
10391029
Eventually(
@@ -1167,12 +1157,7 @@ var _ = Context("RayJob with different submission modes", func() {
11671157
err := k8sClient.Get(ctx, namespacedName, job)
11681158
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
11691159

1170-
// Update the submitter Kubernetes Job to Complete.
1171-
conditions := []batchv1.JobCondition{
1172-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
1173-
}
1174-
job.Status.Conditions = conditions
1175-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
1160+
updateK8sJobToComplete(ctx, job)
11761161

11771162
// RayJob transitions to Complete.
11781163
Eventually(
@@ -1306,12 +1291,7 @@ var _ = Context("RayJob with different submission modes", func() {
13061291
err := k8sClient.Get(ctx, namespacedName, job)
13071292
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
13081293

1309-
// Update the submitter Kubernetes Job to Complete.
1310-
conditions := []batchv1.JobCondition{
1311-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
1312-
}
1313-
job.Status.Conditions = conditions
1314-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
1294+
updateK8sJobToComplete(ctx, job)
13151295

13161296
// RayJob transitions to Complete.
13171297
Eventually(
@@ -1462,12 +1442,7 @@ var _ = Context("RayJob with different submission modes", func() {
14621442
err := k8sClient.Get(ctx, namespacedName, job)
14631443
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
14641444

1465-
// Update the submitter Kubernetes Job to Complete.
1466-
conditions := []batchv1.JobCondition{
1467-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
1468-
}
1469-
job.Status.Conditions = conditions
1470-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
1445+
updateK8sJobToComplete(ctx, job)
14711446

14721447
// RayJob transitions to Complete.
14731448
Eventually(
@@ -1606,12 +1581,7 @@ var _ = Context("RayJob with different submission modes", func() {
16061581
err := k8sClient.Get(ctx, namespacedName, job)
16071582
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
16081583

1609-
// Update the submitter Kubernetes Job to Complete.
1610-
conditions := []batchv1.JobCondition{
1611-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
1612-
}
1613-
job.Status.Conditions = conditions
1614-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
1584+
updateK8sJobToComplete(ctx, job)
16151585
})
16161586

16171587
By("If DeletionStrategy=DeleteSelf, the RayJob is deleted", func() {
@@ -1722,12 +1692,7 @@ var _ = Context("RayJob with different submission modes", func() {
17221692
err := k8sClient.Get(ctx, namespacedName, job)
17231693
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
17241694

1725-
// Update the submitter Kubernetes Job to Complete.
1726-
conditions := []batchv1.JobCondition{
1727-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
1728-
}
1729-
job.Status.Conditions = conditions
1730-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
1695+
updateK8sJobToComplete(ctx, job)
17311696
})
17321697

17331698
By("If DeletionStrategy=DeleteSelf, the RayJob is deleted", func() {
@@ -1850,12 +1815,7 @@ var _ = Context("RayJob with different submission modes", func() {
18501815
err := k8sClient.Get(ctx, namespacedName, job)
18511816
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
18521817

1853-
// Update the submitter Kubernetes Job to Complete.
1854-
conditions := []batchv1.JobCondition{
1855-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
1856-
}
1857-
job.Status.Conditions = conditions
1858-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
1818+
updateK8sJobToComplete(ctx, job)
18591819

18601820
// RayJob transitions to Complete.
18611821
Eventually(
@@ -2011,12 +1971,7 @@ var _ = Context("RayJob with different submission modes", func() {
20111971
err := k8sClient.Get(ctx, namespacedName, job)
20121972
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
20131973

2014-
// Update the submitter Kubernetes Job to Complete.
2015-
conditions := []batchv1.JobCondition{
2016-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
2017-
}
2018-
job.Status.Conditions = conditions
2019-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
1974+
updateK8sJobToComplete(ctx, job)
20201975

20211976
// RayJob transitions to Complete.
20221977
Eventually(
@@ -2173,12 +2128,7 @@ var _ = Context("RayJob with different submission modes", func() {
21732128
err := k8sClient.Get(ctx, namespacedName, job)
21742129
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
21752130

2176-
// Update the submitter Kubernetes Job to Complete.
2177-
conditions := []batchv1.JobCondition{
2178-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
2179-
}
2180-
job.Status.Conditions = conditions
2181-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
2131+
updateK8sJobToComplete(ctx, job)
21822132

21832133
// RayJob transitions to Complete.
21842134
Eventually(
@@ -2330,12 +2280,7 @@ var _ = Context("RayJob with different submission modes", func() {
23302280
err := k8sClient.Get(ctx, namespacedName, job)
23312281
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
23322282

2333-
// Update the submitter Kubernetes Job to Complete.
2334-
conditions := []batchv1.JobCondition{
2335-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
2336-
}
2337-
job.Status.Conditions = conditions
2338-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
2283+
updateK8sJobToComplete(ctx, job)
23392284

23402285
// RayJob transitions to Failed.
23412286
Eventually(
@@ -2487,12 +2432,7 @@ var _ = Context("RayJob with different submission modes", func() {
24872432
err := k8sClient.Get(ctx, namespacedName, job)
24882433
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
24892434

2490-
// Update the submitter Kubernetes Job to Complete.
2491-
conditions := []batchv1.JobCondition{
2492-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
2493-
}
2494-
job.Status.Conditions = conditions
2495-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
2435+
updateK8sJobToComplete(ctx, job)
24962436

24972437
// RayJob transitions to Complete.
24982438
Eventually(
@@ -2627,12 +2567,7 @@ var _ = Context("RayJob with different submission modes", func() {
26272567
err := k8sClient.Get(ctx, namespacedName, job)
26282568
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
26292569

2630-
// Update the submitter Kubernetes Job to Complete.
2631-
conditions := []batchv1.JobCondition{
2632-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
2633-
}
2634-
job.Status.Conditions = conditions
2635-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
2570+
updateK8sJobToComplete(ctx, job)
26362571

26372572
// RayJob transitions to Complete.
26382573
Eventually(
@@ -2753,12 +2688,7 @@ var _ = Context("RayJob with different submission modes", func() {
27532688
err := k8sClient.Get(ctx, namespacedName, job)
27542689
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
27552690

2756-
// Update the submitter Kubernetes Job to Complete.
2757-
conditions := []batchv1.JobCondition{
2758-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
2759-
}
2760-
job.Status.Conditions = conditions
2761-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
2691+
updateK8sJobToComplete(ctx, job)
27622692
})
27632693

27642694
By("If DeletionStrategy=DeleteSelf, the RayJob is deleted", func() {
@@ -2868,12 +2798,7 @@ var _ = Context("RayJob with different submission modes", func() {
28682798
err := k8sClient.Get(ctx, namespacedName, job)
28692799
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
28702800

2871-
// Update the submitter Kubernetes Job to Complete.
2872-
conditions := []batchv1.JobCondition{
2873-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
2874-
}
2875-
job.Status.Conditions = conditions
2876-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
2801+
updateK8sJobToComplete(ctx, job)
28772802
})
28782803

28792804
By("If DeletionStrategy=DeleteSelf, the RayJob is deleted", func() {
@@ -2997,12 +2922,7 @@ var _ = Context("RayJob with different submission modes", func() {
29972922
err := k8sClient.Get(ctx, namespacedName, job)
29982923
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
29992924

3000-
// Update the submitter Kubernetes Job to Complete.
3001-
conditions := []batchv1.JobCondition{
3002-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
3003-
}
3004-
job.Status.Conditions = conditions
3005-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
2925+
updateK8sJobToComplete(ctx, job)
30062926

30072927
// RayJob transitions to Complete.
30082928
Eventually(
@@ -3159,12 +3079,7 @@ var _ = Context("RayJob with different submission modes", func() {
31593079
err := k8sClient.Get(ctx, namespacedName, job)
31603080
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
31613081

3162-
// Update the submitter Kubernetes Job to Complete.
3163-
conditions := []batchv1.JobCondition{
3164-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
3165-
}
3166-
job.Status.Conditions = conditions
3167-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
3082+
updateK8sJobToComplete(ctx, job)
31683083

31693084
// RayJob transitions to Complete.
31703085
Eventually(
@@ -3351,12 +3266,7 @@ var _ = Context("RayJob with different submission modes", func() {
33513266
err := k8sClient.Get(ctx, namespacedName, job)
33523267
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
33533268

3354-
// Update the submitter Kubernetes Job to Complete.
3355-
conditions := []batchv1.JobCondition{
3356-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
3357-
}
3358-
job.Status.Conditions = conditions
3359-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
3269+
updateK8sJobToComplete(ctx, job)
33603270
})
33613271

33623272
By("Verify RayJob itself is deleted", func() {
@@ -3510,12 +3420,7 @@ var _ = Context("RayJob with different submission modes", func() {
35103420
err := k8sClient.Get(ctx, namespacedName, job)
35113421
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
35123422

3513-
// Update the submitter Kubernetes Job to Complete.
3514-
conditions := []batchv1.JobCondition{
3515-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
3516-
}
3517-
job.Status.Conditions = conditions
3518-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
3423+
updateK8sJobToComplete(ctx, job)
35193424
})
35203425

35213426
By("Verify RayJob itself is deleted", func() {
@@ -3670,12 +3575,7 @@ var _ = Context("RayJob with different submission modes", func() {
36703575
err := k8sClient.Get(ctx, namespacedName, job)
36713576
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
36723577

3673-
// Update the submitter Kubernetes Job to Complete.
3674-
conditions := []batchv1.JobCondition{
3675-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
3676-
}
3677-
job.Status.Conditions = conditions
3678-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
3578+
updateK8sJobToComplete(ctx, job)
36793579

36803580
// RayJob transitions to Complete.
36813581
Eventually(
@@ -3878,12 +3778,7 @@ var _ = Context("RayJob with different submission modes", func() {
38783778
err := k8sClient.Get(ctx, namespacedName, job)
38793779
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
38803780

3881-
// Update the submitter Kubernetes Job to Complete.
3882-
conditions := []batchv1.JobCondition{
3883-
{Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()},
3884-
}
3885-
job.Status.Conditions = conditions
3886-
Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed())
3781+
updateK8sJobToComplete(ctx, job)
38873782

38883783
// RayJob transitions to Failed.
38893784
Eventually(

0 commit comments

Comments
 (0)