@@ -23,7 +23,7 @@ import (
2323 "fmt"
2424 "strconv"
2525
26- kftrainerapi "github.com/kubeflow/trainer/v2/pkg/apis/trainer/v1alpha1"
26+ kftrainer "github.com/kubeflow/trainer/v2/pkg/apis/trainer/v1alpha1"
2727 kftrainerruntime "github.com/kubeflow/trainer/v2/pkg/runtime"
2828 kftrainerruntimecore "github.com/kubeflow/trainer/v2/pkg/runtime/core"
2929 kftrainerjobset "github.com/kubeflow/trainer/v2/pkg/runtime/framework/plugins/jobset"
@@ -42,15 +42,15 @@ import (
4242
4343 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
4444 "sigs.k8s.io/kueue/pkg/controller/jobframework"
45- kJobset "sigs.k8s.io/kueue/pkg/controller/jobs/jobset"
45+ workloadjobset "sigs.k8s.io/kueue/pkg/controller/jobs/jobset"
4646 "sigs.k8s.io/kueue/pkg/features"
4747 "sigs.k8s.io/kueue/pkg/podset"
4848 clientutil "sigs.k8s.io/kueue/pkg/util/client"
4949 "sigs.k8s.io/kueue/pkg/util/slices"
5050)
5151
5252var (
53- gvk = kftrainerapi .GroupVersion .WithKind ("TrainJob" )
53+ gvk = kftrainer .GroupVersion .WithKind ("TrainJob" )
5454 FrameworkName = "trainer.kubeflow.org/trainjob"
5555 TrainJobControllerName = "trainer.kubeflow.org/trainjob-controller"
5656)
@@ -66,8 +66,8 @@ func init() {
6666 NewJob : NewJob ,
6767 NewReconciler : NewReconciler ,
6868 SetupWebhook : SetupTrainJobWebhook ,
69- JobType : & kftrainerapi .TrainJob {},
70- AddToScheme : kftrainerapi .AddToScheme ,
69+ JobType : & kftrainer .TrainJob {},
70+ AddToScheme : kftrainer .AddToScheme ,
7171 MultiKueueAdapter : & multiKueueAdapter {},
7272 }))
7373}
@@ -108,11 +108,11 @@ func (r *trainJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
108108
109109func (r * trainJobReconciler ) SetupWithManager (mgr ctrl.Manager ) error {
110110 b := ctrl .NewControllerManagedBy (mgr ).
111- For (& kftrainerapi .TrainJob {}).Owns (& kueue.Workload {}).Owns (& jobsetapi.JobSet {})
111+ For (& kftrainer .TrainJob {}).Owns (& kueue.Workload {}).Owns (& jobsetapi.JobSet {})
112112 return b .Complete (r )
113113}
114114
115- type TrainJob kftrainerapi .TrainJob
115+ type TrainJob kftrainer .TrainJob
116116
117117var _ jobframework.GenericJob = (* TrainJob )(nil )
118118var _ jobframework.JobWithCustomStop = (* TrainJob )(nil )
@@ -124,11 +124,11 @@ func NewJob() jobframework.GenericJob {
124124}
125125
126126func fromObject (obj runtime.Object ) * TrainJob {
127- return (* TrainJob )(obj .(* kftrainerapi .TrainJob ))
127+ return (* TrainJob )(obj .(* kftrainer .TrainJob ))
128128}
129129
130130func (t * TrainJob ) Object () client.Object {
131- return (* kftrainerapi .TrainJob )(t )
131+ return (* kftrainer .TrainJob )(t )
132132}
133133
134134func (t * TrainJob ) IsSuspended () bool {
@@ -167,7 +167,7 @@ func getChildJobSet(t *TrainJob) (*jobsetapi.JobSet, error) {
167167 return nil , fmt .Errorf ("unsupported runtime: %s" , runtimeRefGK )
168168 }
169169
170- trainJob := (* kftrainerapi .TrainJob )(t )
170+ trainJob := (* kftrainer .TrainJob )(t )
171171 trSpec , err := getRuntimeSpec (trainJob )
172172 if err != nil {
173173 return nil , fmt .Errorf ("runtime '%s' not found" , trainJob .Spec .RuntimeRef .Name )
@@ -212,16 +212,16 @@ func jobsetApplyToJobset(jobsetApply *jobsetapplyapi.JobSetApplyConfiguration) (
212212 return jobset , nil
213213}
214214
215- func getRuntimeSpec (trainJob * kftrainerapi .TrainJob ) (* kftrainerapi .TrainingRuntimeSpec , error ) {
216- if * trainJob .Spec .RuntimeRef .Kind == kftrainerapi .ClusterTrainingRuntimeKind {
217- var ctr kftrainerapi .ClusterTrainingRuntime
215+ func getRuntimeSpec (trainJob * kftrainer .TrainJob ) (* kftrainer .TrainingRuntimeSpec , error ) {
216+ if * trainJob .Spec .RuntimeRef .Kind == kftrainer .ClusterTrainingRuntimeKind {
217+ var ctr kftrainer .ClusterTrainingRuntime
218218 err := reconciler .client .Get (reconciler .ctx , client.ObjectKey {Name : trainJob .Spec .RuntimeRef .Name }, & ctr )
219219 if err != nil {
220220 return nil , err
221221 }
222222 return & ctr .Spec , nil
223223 } else {
224- var tr kftrainerapi .TrainingRuntime
224+ var tr kftrainer .TrainingRuntime
225225 err := reconciler .client .Get (reconciler .ctx , client.ObjectKey {Namespace : trainJob .Namespace , Name : trainJob .Spec .RuntimeRef .Name }, & tr )
226226 if err != nil {
227227 return nil , err
@@ -235,7 +235,7 @@ func (t *TrainJob) PodSets() ([]kueue.PodSet, error) {
235235 if err != nil {
236236 return nil , err
237237 }
238- return (* kJobset .JobSet )(jobset ).PodSets ()
238+ return (* workloadjobset .JobSet )(jobset ).PodSets ()
239239}
240240
241241func (t * TrainJob ) RunWithPodSetsInfo (podSetsInfo []podset.PodSetInfo ) error {
@@ -249,16 +249,16 @@ func (t *TrainJob) RunWithPodSetsInfo(podSetsInfo []podset.PodSetInfo) error {
249249 }
250250
251251 if t .Spec .PodSpecOverrides == nil {
252- t .Spec .PodSpecOverrides = []kftrainerapi .PodSpecOverride {}
252+ t .Spec .PodSpecOverrides = []kftrainer .PodSpecOverride {}
253253 }
254254 if t .Annotations == nil {
255255 t .Annotations = map [string ]string {}
256256 }
257257 t .Annotations [firstOverrideIdx ] = strconv .Itoa (len (t .Spec .PodSpecOverrides ))
258258 for _ , info := range podSetsInfo {
259259 // The trainjob controller merges each podSpecOverride sequentially, so any existing user provided override will be processed first
260- t .Spec .PodSpecOverrides = append (t .Spec .PodSpecOverrides , kftrainerapi .PodSpecOverride {
261- TargetJobs : []kftrainerapi .PodSpecOverrideTargetJob {
260+ t .Spec .PodSpecOverrides = append (t .Spec .PodSpecOverrides , kftrainer .PodSpecOverride {
261+ TargetJobs : []kftrainer .PodSpecOverrideTargetJob {
262262 {Name : string (info .Name )},
263263 },
264264 // TODO: Set the labels/annotations when supported. See https://github.com/kubeflow/trainer/pull/2785
@@ -320,10 +320,10 @@ func (t *TrainJob) RestorePodSetsInfo(_ []podset.PodSetInfo) bool {
320320}
321321
322322func (t * TrainJob ) Finished () (message string , success , finished bool ) {
323- if c := apimeta .FindStatusCondition (t .Status .Conditions , kftrainerapi .TrainJobComplete ); c != nil && c .Status == metav1 .ConditionTrue {
323+ if c := apimeta .FindStatusCondition (t .Status .Conditions , kftrainer .TrainJobComplete ); c != nil && c .Status == metav1 .ConditionTrue {
324324 return c .Message , true , true
325325 }
326- if c := apimeta .FindStatusCondition (t .Status .Conditions , kftrainerapi .TrainJobFailed ); c != nil && c .Status == metav1 .ConditionTrue {
326+ if c := apimeta .FindStatusCondition (t .Status .Conditions , kftrainer .TrainJobFailed ); c != nil && c .Status == metav1 .ConditionTrue {
327327 return c .Message , false , true
328328 }
329329 return message , success , false
@@ -356,15 +356,15 @@ func (t *TrainJob) ReclaimablePods() ([]kueue.ReclaimablePod, error) {
356356 }
357357
358358 ret := make ([]kueue.ReclaimablePod , 0 , len (jobset .Spec .ReplicatedJobs ))
359- statuses := slices .ToRefMap (t .Status .JobsStatus , func (js * kftrainerapi .JobStatus ) string { return js .Name })
359+ statuses := slices .ToRefMap (t .Status .JobsStatus , func (js * kftrainer .JobStatus ) string { return js .Name })
360360
361361 for i := range jobset .Spec .ReplicatedJobs {
362362 spec := & jobset .Spec .ReplicatedJobs [i ]
363363 if status , found := statuses [spec .Name ]; found && status .Succeeded > 0 {
364364 if status .Succeeded > 0 && status .Succeeded <= spec .Replicas {
365365 ret = append (ret , kueue.ReclaimablePod {
366366 Name : kueue .NewPodSetReference (spec .Name ),
367- Count : status .Succeeded * kJobset .PodsCountPerReplica (spec ),
367+ Count : status .Succeeded * workloadjobset .PodsCountPerReplica (spec ),
368368 })
369369 }
370370 }
0 commit comments