Skip to content

Commit e6af2cc

Browse files
authored
[Feature] Add ManagedBy field to RayJob (#2589)
* Add managedBy field to the spec * Make use of managedBy field in RayJob reconciliation * Update after code review
1 parent d05964c commit e6af2cc

File tree

13 files changed

+154
-0
lines changed

13 files changed

+154
-0
lines changed

docs/reference/api.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ _Appears in:_
159159
| `metadata` _object (keys:string, values:string)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | |
160160
| `clusterSelector` _object (keys:string, values:string)_ | clusterSelector is used to select running rayclusters by labels | | |
161161
| `submitterConfig` _[SubmitterConfig](#submitterconfig)_ | Configurations of submitter k8s job. | | |
162+
| `managedBy` _string_ | ManagedBy is used to indicate the controller or entity that manages a RayJob.<br />The value must be either empty, 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'.<br />The kuberay-operator reconciles a RayJob which doesn't have this field at all or<br />the field value is the reserved string 'ray.io/kuberay-operator',<br />but delegates reconciling the RayJob with 'kueue.x-k8s.io/multikueue' to the Kueue.<br />The field is immutable. | | |
162163
| `entrypoint` _string_ | INSERT ADDITIONAL SPEC FIELDS - desired state of cluster<br />Important: Run "make" to regenerate code after modifying this file | | |
163164
| `runtimeEnvYAML` _string_ | RuntimeEnvYAML represents the runtime environment configuration<br />provided as a multi-line YAML string. | | |
164165
| `jobId` _string_ | If jobId is not set, a new jobId will be auto-generated. | | |

helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ray-operator/apis/ray/v1/rayjob_types.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,15 @@ type RayJobSpec struct {
8686
ClusterSelector map[string]string `json:"clusterSelector,omitempty"`
8787
// Configurations of submitter k8s job.
8888
SubmitterConfig *SubmitterConfig `json:"submitterConfig,omitempty"`
89+
// ManagedBy is used to indicate the controller or entity that manages a RayJob.
90+
// The value must be either empty, 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'.
91+
// The kuberay-operator reconciles a RayJob which doesn't have this field at all or
92+
// the field value is the reserved string 'ray.io/kuberay-operator',
93+
// but delegates reconciling the RayJob with 'kueue.x-k8s.io/multikueue' to the Kueue.
94+
// The field is immutable.
95+
// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="the managedBy field is immutable"
96+
// +kubebuilder:validation:XValidation:rule="self in ['', 'ray.io/kuberay-operator', 'kueue.x-k8s.io/multikueue']",message="the managedBy field value must be either empty, 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'"
97+
ManagedBy *string `json:"managedBy,omitempty"`
8998
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
9099
// Important: Run "make" to regenerate code after modifying this file
91100
Entrypoint string `json:"entrypoint,omitempty"`

ray-operator/apis/ray/v1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ray-operator/config/crd/bases/ray.io_rayjobs.yaml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ray-operator/controllers/ray/rayjob_controller.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,11 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request)
9292
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err
9393
}
9494

95+
if manager := utils.ManagedByExternalController(rayJobInstance.Spec.ManagedBy); manager != nil {
96+
logger.Info("Skipping RayJob managed by a custom controller", "managed-by", manager)
97+
return ctrl.Result{}, nil
98+
}
99+
95100
if !rayJobInstance.ObjectMeta.DeletionTimestamp.IsZero() {
96101
logger.Info("RayJob is being deleted", "DeletionTimestamp", rayJobInstance.ObjectMeta.DeletionTimestamp)
97102
// If the JobStatus is not terminal, it is possible that the Ray job is still running. This includes

ray-operator/controllers/ray/utils/constant.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,9 @@ const (
188188
// as well as the user-agent. This constant is updated before release.
189189
// TODO: Update KUBERAY_VERSION to be a build-time variable.
190190
KUBERAY_VERSION = "nightly"
191+
192+
// KubeRayController represents the value of the default job controller
193+
KubeRayController = "ray.io/kuberay-operator"
191194
)
192195

193196
type ServiceType string

ray-operator/controllers/ray/utils/util.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,3 +601,10 @@ type ClientProvider interface {
601601
GetDashboardClient(mgr manager.Manager) func() RayDashboardClientInterface
602602
GetHttpProxyClient(mgr manager.Manager) func() RayHttpProxyClientInterface
603603
}
604+
605+
func ManagedByExternalController(controllerName *string) *string {
606+
if controllerName != nil && *controllerName != KubeRayController {
607+
return controllerName
608+
}
609+
return nil
610+
}

ray-operator/pkg/client/applyconfiguration/ray/v1/rayjobspec.go

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ray-operator/test/e2e/rayjob_cluster_selector_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@ package e2e
22

33
import (
44
"testing"
5+
"time"
56

67
. "github.com/onsi/gomega"
8+
"k8s.io/apimachinery/pkg/api/errors"
79

810
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
911
"github.com/ray-project/kuberay/ray-operator/controllers/ray/utils"
@@ -87,4 +89,48 @@ env_vars:
8789
g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)).
8890
To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusFailed)))
8991
})
92+
93+
test.T().Run("RayJob should be created but not to be updated when managed externally", func(_ *testing.T) {
94+
t.Parallel()
95+
96+
// RayJob
97+
rayJobAC := rayv1ac.RayJob("managed-externally", namespace.Name).
98+
WithSpec(rayv1ac.RayJobSpec().
99+
WithClusterSelector(map[string]string{utils.RayClusterLabelKey: rayCluster.Name}).
100+
WithEntrypoint("python /home/ray/jobs/counter.py").
101+
WithRuntimeEnvYAML(`
102+
env_vars:
103+
counter_name: test_counter
104+
`).
105+
WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration()).
106+
WithManagedBy(MultiKueueController))
107+
108+
rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions)
109+
g.Expect(err).NotTo(HaveOccurred())
110+
test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name)
111+
112+
// Assert the Ray job status has not been updated
113+
g.Consistently(func(gg Gomega) {
114+
rayJob, err = GetRayJob(test, rayJob.Namespace, rayJob.Name)
115+
gg.Expect(err).ToNot(HaveOccurred())
116+
gg.Expect(rayJob.Status.JobDeploymentStatus).To(Equal(rayv1.JobDeploymentStatusNew))
117+
}, time.Second*3, time.Millisecond*500).Should(Succeed())
118+
})
119+
120+
test.T().Run("RayJob should not be created due to managedBy invalid value", func(_ *testing.T) {
121+
// RayJob
122+
rayJobAC := rayv1ac.RayJob("managed-externally-invalid", namespace.Name).
123+
WithSpec(rayv1ac.RayJobSpec().
124+
WithClusterSelector(map[string]string{utils.RayClusterLabelKey: rayCluster.Name}).
125+
WithEntrypoint("python /home/ray/jobs/counter.py").
126+
WithRuntimeEnvYAML(`
127+
env_vars:
128+
counter_name: test_counter
129+
`).
130+
WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration()).
131+
WithManagedBy("invalid.com/controller"))
132+
133+
_, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions)
134+
g.Expect(errors.IsInvalid(err)).To(BeTrue(), "error: %v", err)
135+
})
90136
}

0 commit comments

Comments
 (0)