-
Notifications
You must be signed in to change notification settings - Fork 501
[WIP] [Feat] Re-do mk admission after eviction in worker cluster #8477
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -349,22 +349,53 @@ func (w *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reco | |
| return reconcile.Result{}, workload.Finish(ctx, w.client, group.local, remoteFinishedCond.Reason, remoteFinishedCond.Message, w.clock) | ||
| } | ||
|
|
||
| // 4. Handle workload evicted on manager cluster | ||
| // 4. Handle workload eviction | ||
| remoteEvictCond, evictedRemote := group.bestMatchByCondition(kueue.WorkloadEvicted) | ||
| if remoteEvictCond != nil && remoteEvictCond.Reason == workload.ReasonWithCause(kueue.WorkloadDeactivated, kueue.WorkloadEvictedOnManagerCluster) { | ||
| if remoteEvictCond != nil { | ||
| remoteCl := group.remoteClients[evictedRemote].client | ||
| remoteWl := group.remotes[evictedRemote] | ||
|
|
||
| log = log.WithValues("remote", evictedRemote, "remoteWorkload", klog.KObj(remoteWl)) | ||
| ctx = ctrl.LoggerInto(ctx, log) | ||
|
|
||
| if err := group.jobAdapter.SyncJob(ctx, w.client, remoteCl, group.controllerKey, group.local.Name, w.origin); err != nil { | ||
| log.Error(err, "Syncing remote controller object") | ||
| // We'll retry this in the next reconciling. | ||
| // workload evicted on manager cluster | ||
| if remoteEvictCond.Reason == workload.ReasonWithCause(kueue.WorkloadDeactivated, kueue.WorkloadEvictedOnManagerCluster) { | ||
| if err := group.jobAdapter.SyncJob(ctx, w.client, remoteCl, group.controllerKey, group.local.Name, w.origin); err != nil { | ||
| log.Error(err, "Syncing remote controller object") | ||
| // We'll retry this in the next reconciling. | ||
| return reconcile.Result{}, err | ||
| } | ||
| return reconcile.Result{}, nil | ||
| } | ||
|
|
||
| // workload eviction on worker cluster | ||
| log.V(5).Info("Workload gets evicted in the remote cluster", "cluster", evictedRemote) | ||
| needsACUpdate := acs.State == kueue.CheckStateReady | ||
| if err := workload.PatchAdmissionStatus(ctx, w.client, group.local, w.clock, func(wl *kueue.Workload) (bool, error) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would we do it when |
||
| if needsACUpdate { | ||
| acs.State = kueue.CheckStatePending | ||
| acs.Message = fmt.Sprintf("Workload evicted on worker cluster: %q, resetting for re-admission", *group.local.Status.ClusterName) | ||
| acs.LastTransitionTime = metav1.NewTime(w.clock.Now()) | ||
| workload.SetAdmissionCheckState(&wl.Status.AdmissionChecks, *acs, w.clock) | ||
| wl.Status.ClusterName = nil | ||
| wl.Status.NominatedClusterNames = nil | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Out of curiosity - can this have an effect, given this rule? |
||
| } | ||
| return true, nil | ||
| }); err != nil { | ||
| log.Error(err, "Failed to patch workload status") | ||
| return reconcile.Result{}, err | ||
| } | ||
|
|
||
| // Wait for QuotaReserved=false in the local job. | ||
| if needsACUpdate { | ||
| w.recorder.Eventf(group.local, corev1.EventTypeNormal, "MultiKueue", acs.Message) | ||
| } | ||
|
|
||
| for cluster := range group.remotes { | ||
| if err := client.IgnoreNotFound(group.RemoveRemoteObjects(ctx, cluster)); err != nil { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In most cases, other calls to |
||
| log.Error(err, "Failed to remove cluster remote objects", "cluster", cluster) | ||
| return reconcile.Result{}, err | ||
| } | ||
| } | ||
| return reconcile.Result{}, nil | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,6 +34,7 @@ import ( | |
| batchv1 "k8s.io/api/batch/v1" | ||
| corev1 "k8s.io/api/core/v1" | ||
| apimeta "k8s.io/apimachinery/pkg/api/meta" | ||
| "k8s.io/apimachinery/pkg/api/resource" | ||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
| "k8s.io/apimachinery/pkg/types" | ||
| "k8s.io/utils/ptr" | ||
|
|
@@ -603,6 +604,93 @@ var _ = ginkgo.Describe("MultiKueue", func() { | |
| }) | ||
| }) | ||
|
|
||
| ginkgo.It("Should re-do admission process when workload gets evicted in the worker", func() { | ||
| job := testingjob.MakeJob("job", managerNs.Name). | ||
| WorkloadPriorityClass(managerLowWPC.Name). | ||
| Queue(kueue.LocalQueueName(managerLq.Name)). | ||
| RequestAndLimit(corev1.ResourceCPU, "0.9"). | ||
| RequestAndLimit(corev1.ResourceMemory, "0.5G"). | ||
| Obj() | ||
| util.MustCreate(ctx, k8sManagerClient, job) | ||
|
|
||
| wlKey := types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(job.Name, job.UID), Namespace: managerNs.Name} | ||
| managerWl := &kueue.Workload{} | ||
| workerWorkload := &kueue.Workload{} | ||
|
|
||
| ginkgo.By("Checking that the workload is created and admitted in the manager cluster", func() { | ||
| gomega.Eventually(func(g gomega.Gomega) { | ||
| g.Expect(k8sManagerClient.Get(ctx, wlKey, managerWl)).To(gomega.Succeed()) | ||
| g.Expect(workload.IsAdmitted(managerWl)).To(gomega.BeTrue()) | ||
| }, util.Timeout, util.Interval).Should(gomega.Succeed()) | ||
| }) | ||
|
|
||
| createdAtWorker := "" | ||
|
|
||
| ginkgo.By("Checking that the workload is created in one of the workers", func() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Below, you'll modify worker's CQ limits to manipulate what it can fit. AFAICS the initial CPU quotas are: 2 at worker1 and 1 at worker2. Compared to what you have now, +1 CQ update but -4 |
||
| gomega.Eventually(func(g gomega.Gomega) { | ||
| if err := k8sWorker1Client.Get(ctx, wlKey, workerWorkload); err == nil { | ||
| createdAtWorker = "worker1" | ||
| } else { | ||
| g.Expect(k8sWorker2Client.Get(ctx, wlKey, workerWorkload)).To(gomega.Succeed()) | ||
| createdAtWorker = "worker2" | ||
| } | ||
| g.Expect(workload.IsAdmitted(workerWorkload)).To(gomega.BeTrue()) | ||
| g.Expect(workerWorkload.Spec).To(gomega.BeComparableTo(managerWl.Spec)) | ||
| }, util.Timeout, util.Interval).Should(gomega.Succeed()) | ||
| }) | ||
|
|
||
| ginkgo.GinkgoLogr.Info("Workload created at", "cluster", createdAtWorker) | ||
|
|
||
| ginkgo.By("Modifying worker cluster queue to not have enough resources", func() { | ||
| if createdAtWorker == "worker1" { | ||
| gomega.Eventually(func(g gomega.Gomega) { | ||
| g.Expect(k8sWorker1Client.Get(ctx, client.ObjectKeyFromObject(worker1Cq), worker1Cq)).To(gomega.Succeed()) | ||
| worker1Cq.Spec.ResourceGroups[0].Flavors[0].Resources[0].NominalQuota = resource.MustParse("0.5") | ||
| g.Expect(k8sWorker1Client.Update(ctx, worker1Cq)).To(gomega.Succeed()) | ||
| }, util.Timeout, util.Interval).Should(gomega.Succeed()) | ||
| } else { | ||
| gomega.Eventually(func(g gomega.Gomega) { | ||
| g.Expect(k8sWorker2Client.Get(ctx, client.ObjectKeyFromObject(worker2Cq), worker2Cq)).To(gomega.Succeed()) | ||
| worker2Cq.Spec.ResourceGroups[0].Flavors[0].Resources[0].NominalQuota = resource.MustParse("0.5") | ||
| g.Expect(k8sWorker2Client.Update(ctx, worker2Cq)).To(gomega.Succeed()) | ||
| }, util.Timeout, util.Interval).Should(gomega.Succeed()) | ||
| } | ||
| }) | ||
|
|
||
| ginkgo.By("Triggering eviction in worker", func() { | ||
| if createdAtWorker == "worker1" { | ||
| gomega.Eventually(func(g gomega.Gomega) { | ||
| g.Expect(k8sWorker1Client.Get(ctx, wlKey, workerWorkload)).To(gomega.Succeed()) | ||
| g.Expect(workload.SetConditionAndUpdate(ctx, k8sWorker1Client, workerWorkload, kueue.WorkloadEvicted, metav1.ConditionTrue, kueue.WorkloadEvictedByPreemption, "By test", "evict", util.RealClock)).To(gomega.Succeed()) | ||
| }, util.Timeout, util.Interval).Should(gomega.Succeed()) | ||
| } else { | ||
| gomega.Eventually(func(g gomega.Gomega) { | ||
| g.Expect(k8sWorker2Client.Get(ctx, wlKey, workerWorkload)).To(gomega.Succeed()) | ||
| g.Expect(workload.SetConditionAndUpdate(ctx, k8sWorker2Client, workerWorkload, kueue.WorkloadEvicted, metav1.ConditionTrue, kueue.WorkloadEvictedByPreemption, "By test", "evict", util.RealClock)).To(gomega.Succeed()) | ||
| }, util.Timeout, util.Interval).Should(gomega.Succeed()) | ||
| } | ||
| }) | ||
|
|
||
| ginkgo.By("Checking that the workload is re-admitted in the other worker cluster", func() { | ||
| gomega.Eventually(func(g gomega.Gomega) { | ||
| g.Expect(k8sManagerClient.Get(ctx, wlKey, managerWl)).To(gomega.Succeed()) | ||
| g.Expect(managerWl.Status.ClusterName).NotTo(gomega.HaveValue(gomega.Equal(createdAtWorker))) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could also check that it's not empty. |
||
| }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) | ||
| }) | ||
|
|
||
| ginkgo.By("Checking that the workload is created in the other worker", func() { | ||
| gomega.Eventually(func(g gomega.Gomega) { | ||
| if createdAtWorker == "worker1" { | ||
| g.Expect(k8sWorker2Client.Get(ctx, wlKey, workerWorkload)).To(gomega.Succeed()) | ||
| } else { | ||
| g.Expect(k8sWorker1Client.Get(ctx, wlKey, workerWorkload)).To(gomega.Succeed()) | ||
| } | ||
| g.Expect(workload.IsAdmitted(workerWorkload)).To(gomega.BeTrue()) | ||
| g.Expect(workerWorkload.Spec).To(gomega.BeComparableTo(managerWl.Spec)) | ||
| }, util.Timeout, util.Interval).Should(gomega.Succeed()) | ||
| }) | ||
| }) | ||
|
|
||
| ginkgo.It("Should preempt a running low-priority workload when a high-priority workload is admitted (other workers)", func() { | ||
| lowJob := testingjob.MakeJob("low-job", managerNs.Name). | ||
| WorkloadPriorityClass(managerLowWPC.Name). | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -71,6 +71,7 @@ import ( | |||||
| testingtfjob "sigs.k8s.io/kueue/pkg/util/testingjobs/tfjob" | ||||||
| testingtrainjob "sigs.k8s.io/kueue/pkg/util/testingjobs/trainjob" | ||||||
| testingxgboostjob "sigs.k8s.io/kueue/pkg/util/testingjobs/xgboostjob" | ||||||
| "sigs.k8s.io/kueue/pkg/workload" | ||||||
| "sigs.k8s.io/kueue/pkg/workloadslicing" | ||||||
| "sigs.k8s.io/kueue/test/integration/framework" | ||||||
| "sigs.k8s.io/kueue/test/util" | ||||||
|
|
@@ -1885,6 +1886,91 @@ var _ = ginkgo.Describe("MultiKueue", ginkgo.Label("area:multikueue", "feature:m | |||||
| }, gomega.Equal(completedJobCondition)))) | ||||||
| }) | ||||||
| }) | ||||||
| ginkgo.It("Should redo the admission process once the workload looses Admission in the worker cluster", func() { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| job := testingjob.MakeJob("job", managerNs.Name). | ||||||
| ManagedBy(kueue.MultiKueueControllerName). | ||||||
| Queue(kueue.LocalQueueName(managerLq.Name)). | ||||||
| Obj() | ||||||
| util.MustCreate(managerTestCluster.ctx, managerTestCluster.client, job) | ||||||
|
|
||||||
| createdWorkload := &kueue.Workload{} | ||||||
| wlLookupKey := types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(job.Name, job.UID), Namespace: managerNs.Name} | ||||||
|
|
||||||
| ginkgo.By("setting workload reservation in the management cluster", func() { | ||||||
| admission := utiltestingapi.MakeAdmission(managerCq.Name).Obj() | ||||||
| util.SetQuotaReservation(managerTestCluster.ctx, managerTestCluster.client, wlLookupKey, admission) | ||||||
| }) | ||||||
|
|
||||||
| ginkgo.By("checking the workload creation in the worker clusters", func() { | ||||||
| managerWl := &kueue.Workload{} | ||||||
| gomega.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, managerWl)).To(gomega.Succeed()) | ||||||
| gomega.Eventually(func(g gomega.Gomega) { | ||||||
| g.Expect(worker1TestCluster.client.Get(worker1TestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) | ||||||
| g.Expect(createdWorkload.Spec).To(gomega.BeComparableTo(managerWl.Spec)) | ||||||
| g.Expect(worker2TestCluster.client.Get(worker2TestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) | ||||||
| g.Expect(createdWorkload.Spec).To(gomega.BeComparableTo(managerWl.Spec)) | ||||||
| }, util.Timeout, util.Interval).Should(gomega.Succeed()) | ||||||
| }) | ||||||
|
|
||||||
| ginkgo.By("setting workload reservation in worker1, AC state is updated in manager and worker2 wl is removed", func() { | ||||||
| admission := utiltestingapi.MakeAdmission(managerCq.Name).Obj() | ||||||
| util.SetQuotaReservation(worker1TestCluster.ctx, worker1TestCluster.client, wlLookupKey, admission) | ||||||
|
|
||||||
| gomega.Eventually(func(g gomega.Gomega) { | ||||||
| g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) | ||||||
| acs := admissioncheck.FindAdmissionCheck(createdWorkload.Status.AdmissionChecks, kueue.AdmissionCheckReference(multiKueueAC.Name)) | ||||||
| g.Expect(acs).NotTo(gomega.BeNil()) | ||||||
| g.Expect(acs.State).To(gomega.Equal(kueue.CheckStateReady)) | ||||||
| g.Expect(acs.Message).To(gomega.Equal(`The workload got reservation on "worker1"`)) | ||||||
| ok, err := utiltesting.HasEventAppeared(managerTestCluster.ctx, managerTestCluster.client, corev1.Event{ | ||||||
| Reason: "MultiKueue", | ||||||
| Type: corev1.EventTypeNormal, | ||||||
| Message: `The workload got reservation on "worker1"`, | ||||||
| }) | ||||||
| g.Expect(err).NotTo(gomega.HaveOccurred()) | ||||||
| g.Expect(ok).To(gomega.BeTrue()) | ||||||
| }, util.Timeout, util.Interval).Should(gomega.Succeed()) | ||||||
|
|
||||||
| gomega.Eventually(func(g gomega.Gomega) { | ||||||
| g.Expect(worker2TestCluster.client.Get(worker2TestCluster.ctx, wlLookupKey, createdWorkload)).To(utiltesting.BeNotFoundError()) | ||||||
| }, util.Timeout, util.Interval).Should(gomega.Succeed()) | ||||||
| }) | ||||||
|
|
||||||
| ginkgo.By("preempting workload in worker1", func() { | ||||||
| gomega.Eventually(func(g gomega.Gomega) { | ||||||
| createdWorkload := &kueue.Workload{} | ||||||
| g.Expect(worker1TestCluster.client.Get(worker1TestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) | ||||||
| g.Expect(workload.SetConditionAndUpdate(worker1TestCluster.ctx, worker1TestCluster.client, createdWorkload, kueue.WorkloadEvicted, metav1.ConditionTrue, kueue.WorkloadEvictedByPreemption, "By test", "evict", util.RealClock)).To(gomega.Succeed()) | ||||||
| }, util.Timeout, util.Interval).Should(gomega.Succeed()) | ||||||
| }) | ||||||
|
|
||||||
| ginkgo.By("check manager's workload ClusterName reset", func() { | ||||||
| gomega.Eventually(func(g gomega.Gomega) { | ||||||
| managerWl := &kueue.Workload{} | ||||||
| g.Expect(managerTestCluster.client.Get(worker1TestCluster.ctx, wlLookupKey, managerWl)).To(gomega.Succeed()) | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
(or are there reasons to use the other context here?) |
||||||
| g.Expect(managerWl.Status.NominatedClusterNames).To(gomega.ContainElements(workerCluster1.Name, workerCluster2.Name)) | ||||||
| g.Expect(managerWl.Status.ClusterName).To(gomega.BeNil()) | ||||||
| g.Expect(managerWl.Status.AdmissionChecks).To(gomega.ContainElement(gomega.BeComparableTo( | ||||||
| kueue.AdmissionCheckState{ | ||||||
| Name: kueue.AdmissionCheckReference(multiKueueAC.Name), | ||||||
| State: kueue.CheckStatePending, | ||||||
| }, | ||||||
| cmpopts.IgnoreFields(kueue.AdmissionCheckState{}, "LastTransitionTime", "PodSetUpdates", "Message", "RetryCount")))) | ||||||
| }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) | ||||||
| }) | ||||||
|
|
||||||
| ginkgo.By("checking the workload admission process started again", func() { | ||||||
| managerWl := &kueue.Workload{} | ||||||
| gomega.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, managerWl)).To(gomega.Succeed()) | ||||||
| gomega.Eventually(func(g gomega.Gomega) { | ||||||
| createdWorkload := &kueue.Workload{} | ||||||
| g.Expect(worker1TestCluster.client.Get(worker1TestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) | ||||||
| g.Expect(createdWorkload.Spec).To(gomega.BeComparableTo(managerWl.Spec)) | ||||||
| g.Expect(worker2TestCluster.client.Get(worker2TestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) | ||||||
| g.Expect(createdWorkload.Spec).To(gomega.BeComparableTo(managerWl.Spec)) | ||||||
| }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) | ||||||
| }) | ||||||
| }) | ||||||
| }) | ||||||
|
|
||||||
| func admitWorkloadAndCheckWorkerCopies(acName string, wlLookupKey types.NamespacedName, admission *utiltestingapi.AdmissionWrapper) { | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: this present tense feels slightly confusing; IIUC the workload already got evicted.