Skip to content

Commit 4520153

Browse files
authored
Merge pull request #190 from andyatmiami/chore/cherry-pick-updated-kueue-messaging
refactor: update Kueue management state checks and messaging
2 parents 5eafa8f + 1afe44d commit 4520153

File tree

7 files changed

+65
-60
lines changed

7 files changed

+65
-60
lines changed

pkg/lint/checks/components/kueue/kueue.go

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,17 @@ import (
1616
)
1717

1818
const (
19-
kind = "kueue"
20-
checkTypeManagementState = "management-state"
21-
managementStateRemediation = "Migrate to the Red Hat build of Kueue operator following https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed/2.25/html/managing_openshift_ai/managing-workloads-with-kueue#migrating-to-the-rhbok-operator_kueue before upgrading"
19+
kind = "kueue"
20+
checkTypeManagementState = "management-state"
21+
22+
// Deferred: parameterize hardcoded version references using ComponentRequest.TargetVersion.
23+
msgManagedProhibited = "The 3.3.1 upgrade currently only supports the Kueue managementState of Removed. A future 3.3.x release will allow an upgrade when you have migrated to the Red Hat build of Kueue Operator and the Kueue managementState is Unmanaged."
24+
msgUnmanagedProhibited = "The 3.3.1 upgrade currently only supports the Kueue managementState of Removed. A future 3.3.x release will allow an upgrade when the Kueue managementState is Unmanaged."
2225
)
2326

24-
// ManagementStateCheck validates that Kueue managed option is not used before upgrading to 3.x.
25-
// In RHOAI 3.x, the Managed option for Kueue is removed — users must migrate to the standalone
26-
// Red Hat build of Kueue operator and set managementState to Removed or Unmanaged.
27+
// ManagementStateCheck validates that Kueue managementState is Removed before upgrading to 3.x.
28+
// In RHOAI 3.3.1, only the Removed state is supported. A future 3.3.x release will support
29+
// Unmanaged with the Red Hat build of Kueue Operator.
2730
type ManagementStateCheck struct {
2831
check.BaseCheck
2932
}
@@ -36,8 +39,7 @@ func NewManagementStateCheck() *ManagementStateCheck {
3639
Type: checkTypeManagementState,
3740
CheckID: "components.kueue.management-state",
3841
CheckName: "Components :: Kueue :: Management State (3.x)",
39-
CheckDescription: "Validates that Kueue managementState is compatible with RHOAI 3.x (Managed option will be removed)",
40-
CheckRemediation: managementStateRemediation,
42+
CheckDescription: "Validates that Kueue managementState is Removed before upgrading to RHOAI 3.x",
4143
},
4244
}
4345
}
@@ -63,25 +65,25 @@ func (c *ManagementStateCheck) CanApply(ctx context.Context, target check.Target
6365
func (c *ManagementStateCheck) Validate(ctx context.Context, target check.Target) (*result.DiagnosticResult, error) {
6466
return validate.Component(c, target).
6567
Run(ctx, func(_ context.Context, req *validate.ComponentRequest) error {
66-
tv := version.MajorMinorLabel(req.TargetVersion)
67-
6868
switch req.ManagementState {
6969
case constants.ManagementStateManaged:
7070
req.Result.SetCondition(check.NewCondition(
7171
check.ConditionTypeCompatible,
7272
metav1.ConditionFalse,
7373
check.WithReason(check.ReasonVersionIncompatible),
74-
check.WithMessage("Kueue is managed by OpenShift AI (state: %s) but Managed option will be removed in RHOAI %s", req.ManagementState, tv),
75-
check.WithImpact(result.ImpactBlocking),
76-
check.WithRemediation(c.CheckRemediation),
74+
check.WithMessage(msgManagedProhibited),
75+
check.WithImpact(result.ImpactProhibited),
7776
))
7877
case constants.ManagementStateUnmanaged:
7978
req.Result.SetCondition(check.NewCondition(
8079
check.ConditionTypeCompatible,
81-
metav1.ConditionTrue,
82-
check.WithReason(check.ReasonVersionCompatible),
83-
check.WithMessage("Kueue managementState is %s — compatible with RHOAI %s", req.ManagementState, tv),
80+
metav1.ConditionFalse,
81+
check.WithReason(check.ReasonVersionIncompatible),
82+
check.WithMessage(msgUnmanagedProhibited),
83+
check.WithImpact(result.ImpactProhibited),
8484
))
85+
default:
86+
return fmt.Errorf("unexpected management state %q for kueue", req.ManagementState)
8587
}
8688

8789
return nil

pkg/lint/checks/components/kueue/kueue_operator_installed.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,17 @@ import (
1616
)
1717

1818
const (
19-
checkTypeOperatorInstalled = "operator-installed"
20-
subscriptionName = "kueue-operator"
21-
annotationInstalledVersion = "operator.opendatahub.io/installed-version"
22-
msgManagedNotSupported = "Kueue managementState is Managed — migration to the Red Hat build of Kueue operator is required before upgrading"
19+
checkTypeOperatorInstalled = "operator-installed"
20+
subscriptionName = "kueue-operator"
21+
annotationInstalledVersion = "operator.opendatahub.io/installed-version"
22+
msgManagedNotSupported = "Kueue managementState is Managed — migration to the Red Hat build of Kueue operator is required before upgrading"
23+
operatorInstalledManagedRemediation = "Migrate to the Red Hat build of Kueue operator following https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed/2.25/html/managing_openshift_ai/managing-workloads-with-kueue#migrating-to-the-rhbok-operator_kueue before upgrading"
2324
)
2425

25-
// OperatorInstalledCheck validates the Red Hat build of Kueue operator installation status against the Kueue
26+
// OperatorInstalledCheck is currently deregistered — re-enable when a future 3.3.x release
27+
// supports Unmanaged + Red Hat build of Kueue Operator (see command.go registration).
28+
//
29+
// It validates the Red Hat build of Kueue operator installation status against the Kueue
2630
// component management state:
2731
// - Managed: prohibited — no supported upgrade path from embedded Kueue, must migrate to RHBoK first
2832
// - Unmanaged + operator absent: blocking — Unmanaged requires the Red Hat build of Kueue operator
@@ -97,7 +101,7 @@ func (c *OperatorInstalledCheck) validateManaged(
97101
check.WithReason(check.ReasonVersionIncompatible),
98102
check.WithMessage(msgManagedNotSupported),
99103
check.WithImpact(result.ImpactProhibited),
100-
check.WithRemediation(managementStateRemediation),
104+
check.WithRemediation(operatorInstalledManagedRemediation),
101105
))
102106
}
103107

pkg/lint/checks/components/kueue/kueue_test.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func TestManagementStateCheck_CanApply_NotConfigured(t *testing.T) {
5757
g.Expect(canApply).To(BeFalse())
5858
}
5959

60-
func TestManagementStateCheck_ManagedBlocking(t *testing.T) {
60+
func TestManagementStateCheck_ManagedProhibited(t *testing.T) {
6161
g := NewWithT(t)
6262
ctx := t.Context()
6363

@@ -77,16 +77,16 @@ func TestManagementStateCheck_ManagedBlocking(t *testing.T) {
7777
"Type": Equal(check.ConditionTypeCompatible),
7878
"Status": Equal(metav1.ConditionFalse),
7979
"Reason": Equal(check.ReasonVersionIncompatible),
80-
"Message": And(ContainSubstring("managed by OpenShift AI"), ContainSubstring("Managed option will be removed")),
80+
"Message": And(ContainSubstring("only supports the Kueue managementState of Removed"), ContainSubstring("migrated to the Red Hat build of Kueue Operator")),
8181
}))
82-
g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactBlocking))
82+
g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactProhibited))
8383
g.Expect(result.Annotations).To(And(
8484
HaveKeyWithValue("component.opendatahub.io/management-state", "Managed"),
8585
HaveKeyWithValue("check.opendatahub.io/target-version", "3.0.0"),
8686
))
8787
}
8888

89-
func TestManagementStateCheck_UnmanagedAllowed(t *testing.T) {
89+
func TestManagementStateCheck_UnmanagedProhibited(t *testing.T) {
9090
g := NewWithT(t)
9191
ctx := t.Context()
9292

@@ -104,10 +104,11 @@ func TestManagementStateCheck_UnmanagedAllowed(t *testing.T) {
104104
g.Expect(result.Status.Conditions).To(HaveLen(1))
105105
g.Expect(result.Status.Conditions[0].Condition).To(MatchFields(IgnoreExtras, Fields{
106106
"Type": Equal(check.ConditionTypeCompatible),
107-
"Status": Equal(metav1.ConditionTrue),
108-
"Reason": Equal(check.ReasonVersionCompatible),
109-
"Message": ContainSubstring("compatible with RHOAI 3.1"),
107+
"Status": Equal(metav1.ConditionFalse),
108+
"Reason": Equal(check.ReasonVersionIncompatible),
109+
"Message": And(ContainSubstring("only supports the Kueue managementState of Removed"), Not(ContainSubstring("migrated to the Red Hat build of Kueue Operator"))),
110110
}))
111+
g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactProhibited))
111112
}
112113

113114
func TestManagementStateCheck_CanApply_ManagementState(t *testing.T) {

pkg/lint/checks/workloads/kueue/check.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ func NewDataIntegrityCheck() *DataIntegrityCheck {
3939
}
4040

4141
func (c *DataIntegrityCheck) CanApply(ctx context.Context, target check.Target) (bool, error) {
42-
ok, err := IsKueueActive(ctx, target)
42+
ok, err := IsKueueUnmanaged(ctx, target)
4343
if err != nil {
4444
return false, fmt.Errorf("checking kueue state: %w", err)
4545
}

pkg/lint/checks/workloads/kueue/check_test.go

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ func TestDataIntegrityCheck_CanApply_KueueNotActive(t *testing.T) {
138138
g.Expect(applies).To(BeFalse())
139139
}
140140

141-
func TestDataIntegrityCheck_CanApply_KueueActive(t *testing.T) {
141+
func TestDataIntegrityCheck_CanApply_KueueManaged(t *testing.T) {
142142
g := NewWithT(t)
143143

144144
dsc := testutil.NewDSC(map[string]string{
@@ -156,7 +156,7 @@ func TestDataIntegrityCheck_CanApply_KueueActive(t *testing.T) {
156156
applies, err := chk.CanApply(t.Context(), target)
157157

158158
g.Expect(err).ToNot(HaveOccurred())
159-
g.Expect(applies).To(BeTrue())
159+
g.Expect(applies).To(BeFalse())
160160
}
161161

162162
func TestDataIntegrityCheck_CanApply_KueueUnmanaged(t *testing.T) {
@@ -184,7 +184,7 @@ func TestDataIntegrityCheck_BenignWorkload_NoViolations(t *testing.T) {
184184
g := NewWithT(t)
185185

186186
dsc := testutil.NewDSC(map[string]string{
187-
"kueue": "Managed",
187+
"kueue": "Unmanaged",
188188
})
189189

190190
// Non-kueue namespace.
@@ -216,7 +216,7 @@ func TestDataIntegrityCheck_NoRelevantNamespaces(t *testing.T) {
216216
g := NewWithT(t)
217217

218218
dsc := testutil.NewDSC(map[string]string{
219-
"kueue": "Managed",
219+
"kueue": "Unmanaged",
220220
})
221221

222222
target := testutil.NewTarget(t, testutil.TargetConfig{
@@ -242,7 +242,7 @@ func TestDataIntegrityCheck_FullyConsistent(t *testing.T) {
242242
g := NewWithT(t)
243243

244244
dsc := testutil.NewDSC(map[string]string{
245-
"kueue": "Managed",
245+
"kueue": "Unmanaged",
246246
})
247247

248248
ns := newNamespace("team-a", map[string]string{
@@ -281,7 +281,7 @@ func TestDataIntegrityCheck_Invariant1_MissingLabelInKueueNamespace(t *testing.T
281281
g := NewWithT(t)
282282

283283
dsc := testutil.NewDSC(map[string]string{
284-
"kueue": "Managed",
284+
"kueue": "Unmanaged",
285285
})
286286

287287
ns := newNamespace("team-a", map[string]string{
@@ -318,7 +318,7 @@ func TestDataIntegrityCheck_Invariant2_LabeledInNonKueueNamespace(t *testing.T)
318318
g := NewWithT(t)
319319

320320
dsc := testutil.NewDSC(map[string]string{
321-
"kueue": "Managed",
321+
"kueue": "Unmanaged",
322322
})
323323

324324
// Namespace WITHOUT kueue-managed label.
@@ -351,7 +351,7 @@ func TestDataIntegrityCheck_Invariant3_OwnerTreeMismatch(t *testing.T) {
351351
g := NewWithT(t)
352352

353353
dsc := testutil.NewDSC(map[string]string{
354-
"kueue": "Managed",
354+
"kueue": "Unmanaged",
355355
})
356356

357357
ns := newNamespace("team-a", map[string]string{
@@ -390,7 +390,7 @@ func TestDataIntegrityCheck_Invariant3_ChildHasLabelRootDoesNot(t *testing.T) {
390390
g := NewWithT(t)
391391

392392
dsc := testutil.NewDSC(map[string]string{
393-
"kueue": "Managed",
393+
"kueue": "Unmanaged",
394394
})
395395

396396
ns := newNamespace("team-a", map[string]string{
@@ -429,7 +429,7 @@ func TestDataIntegrityCheck_Invariant3_SingleNodeTreeConsistent(t *testing.T) {
429429
g := NewWithT(t)
430430

431431
dsc := testutil.NewDSC(map[string]string{
432-
"kueue": "Managed",
432+
"kueue": "Unmanaged",
433433
})
434434

435435
ns := newNamespace("team-a", map[string]string{
@@ -463,7 +463,7 @@ func TestDataIntegrityCheck_MultiLevelOwnerChain(t *testing.T) {
463463
g := NewWithT(t)
464464

465465
dsc := testutil.NewDSC(map[string]string{
466-
"kueue": "Managed",
466+
"kueue": "Unmanaged",
467467
})
468468

469469
ns := newNamespace("team-a", map[string]string{
@@ -505,7 +505,7 @@ func TestDataIntegrityCheck_MixedViolationsAcrossNamespaces(t *testing.T) {
505505
g := NewWithT(t)
506506

507507
dsc := testutil.NewDSC(map[string]string{
508-
"kueue": "Managed",
508+
"kueue": "Unmanaged",
509509
})
510510

511511
kueueNs := newNamespace("team-a", map[string]string{
@@ -546,7 +546,7 @@ func TestDataIntegrityCheck_BlockingImpact(t *testing.T) {
546546
g := NewWithT(t)
547547

548548
dsc := testutil.NewDSC(map[string]string{
549-
"kueue": "Managed",
549+
"kueue": "Unmanaged",
550550
})
551551

552552
ns := newNamespace("team-a", map[string]string{
@@ -574,7 +574,7 @@ func TestDataIntegrityCheck_AnnotationCheckTargetVersion(t *testing.T) {
574574
g := NewWithT(t)
575575

576576
dsc := testutil.NewDSC(map[string]string{
577-
"kueue": "Managed",
577+
"kueue": "Unmanaged",
578578
})
579579

580580
target := testutil.NewTarget(t, testutil.TargetConfig{
@@ -609,7 +609,7 @@ func TestDataIntegrityCheck_OpenshiftManagedLabel(t *testing.T) {
609609
g := NewWithT(t)
610610

611611
dsc := testutil.NewDSC(map[string]string{
612-
"kueue": "Managed",
612+
"kueue": "Unmanaged",
613613
})
614614

615615
// Uses the kueue.openshift.io/managed label instead of kueue-managed.
@@ -641,7 +641,7 @@ func TestDataIntegrityCheck_Invariant1_ObjectContextAnnotation(t *testing.T) {
641641
g := NewWithT(t)
642642

643643
dsc := testutil.NewDSC(map[string]string{
644-
"kueue": "Managed",
644+
"kueue": "Unmanaged",
645645
})
646646

647647
ns := newNamespace("team-a", map[string]string{
@@ -673,7 +673,7 @@ func TestDataIntegrityCheck_Invariant2_ObjectContextAnnotation(t *testing.T) {
673673
g := NewWithT(t)
674674

675675
dsc := testutil.NewDSC(map[string]string{
676-
"kueue": "Managed",
676+
"kueue": "Unmanaged",
677677
})
678678

679679
ns := newNamespace("team-b", nil)
@@ -704,7 +704,7 @@ func TestDataIntegrityCheck_Invariant3_ObjectContextAnnotation(t *testing.T) {
704704
g := NewWithT(t)
705705

706706
dsc := testutil.NewDSC(map[string]string{
707-
"kueue": "Managed",
707+
"kueue": "Unmanaged",
708708
})
709709

710710
ns := newNamespace("team-a", map[string]string{
@@ -740,7 +740,7 @@ func TestDataIntegrityCheck_MultipleWorkloadTypes(t *testing.T) {
740740
g := NewWithT(t)
741741

742742
dsc := testutil.NewDSC(map[string]string{
743-
"kueue": "Managed",
743+
"kueue": "Unmanaged",
744744
})
745745

746746
ns := newNamespace("team-a", map[string]string{

pkg/lint/checks/workloads/kueue/support.go

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,9 @@ const (
7373
msgInvariant3Mismatch = "%s %s/%s has kueue.x-k8s.io/queue-name=%s but root %s %s/%s has kueue.x-k8s.io/queue-name=%s"
7474
)
7575

76-
// IsKueueActive returns true when Kueue is active (Managed or Unmanaged) on the DSC.
77-
func IsKueueActive(
76+
// IsKueueUnmanaged returns true when Kueue managementState is Unmanaged on the DSC.
77+
// Data integrity checks only apply when the user manages Kueue themselves (Unmanaged state).
78+
func IsKueueUnmanaged(
7879
ctx context.Context,
7980
target check.Target,
8081
) (bool, error) {
@@ -83,14 +84,10 @@ func IsKueueActive(
8384
return false, fmt.Errorf("getting DataScienceCluster: %w", err)
8485
}
8586

86-
if !components.HasManagementState(
87+
return components.HasManagementState(
8788
dsc, constants.ComponentKueue,
88-
constants.ManagementStateManaged, constants.ManagementStateUnmanaged,
89-
) {
90-
return false, nil
91-
}
92-
93-
return true, nil
89+
constants.ManagementStateUnmanaged,
90+
), nil
9491
}
9592

9693
// kueueEnabledNamespaces returns the set of namespaces that have a kueue-managed label.

pkg/lint/command.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ func NewCommand(
8787
registry.MustRegister(dscinitialization.NewDSCInitializationReadinessCheck())
8888
registry.MustRegister(datasciencecluster.NewDataScienceClusterReadinessCheck())
8989

90-
// Components (13)
90+
// Components (12)
9191
registry.MustRegister(raycomponent.NewCodeFlareRemovalCheck())
9292
registry.MustRegister(dashboard.NewAcceleratorProfileMigrationCheck())
9393
registry.MustRegister(dashboard.NewHardwareProfileMigrationCheck())
@@ -98,7 +98,8 @@ func NewCommand(
9898
registry.MustRegister(kserve.NewServiceMeshOperatorCheck())
9999
registry.MustRegister(kserve.NewServiceMeshRemovalCheck())
100100
registry.MustRegister(kueue.NewManagementStateCheck())
101-
registry.MustRegister(kueue.NewOperatorInstalledCheck())
101+
// Deferred: re-enable when a future 3.3.x release supports Unmanaged + Red Hat build of Kueue Operator.
102+
// registry.MustRegister(kueue.NewOperatorInstalledCheck())
102103
registry.MustRegister(modelmesh.NewRemovalCheck())
103104
registry.MustRegister(trainingoperator.NewDeprecationCheck())
104105

0 commit comments

Comments
 (0)