From 7089d45b48f34ee5752b549091bb516190c54136 Mon Sep 17 00:00:00 2001 From: Andy Stoneberg Date: Thu, 19 Mar 2026 13:24:01 -0400 Subject: [PATCH] refactor: update Kueue management state checks and messaging - Deregistered the Kueue OperatorInstalledCheck in command.go, with a note to re-enable it in a future release that supports the Unmanaged state. - Enhanced the ManagementStateCheck to validate that only the Removed state is supported for Kueue before upgrading to RHOAI 3.3.1, updating the remediation messages accordingly. - Renamed test functions to reflect the new logic regarding management states, ensuring clarity in test outcomes. - Updated the DataIntegrityCheck to focus on the Unmanaged state, aligning checks with the new management state requirements. This refactor improves clarity and correctness in Kueue management state validations, ensuring users are informed about the necessary migration steps for upgrades. Signed-off-by: Andy Stoneberg --- pkg/lint/checks/components/kueue/kueue.go | 34 +++++++++-------- .../kueue/kueue_operator_installed.go | 16 +++++--- .../checks/components/kueue/kueue_test.go | 15 ++++---- pkg/lint/checks/workloads/kueue/check.go | 2 +- pkg/lint/checks/workloads/kueue/check_test.go | 38 +++++++++---------- pkg/lint/checks/workloads/kueue/support.go | 15 +++----- pkg/lint/command.go | 5 ++- 7 files changed, 65 insertions(+), 60 deletions(-) diff --git a/pkg/lint/checks/components/kueue/kueue.go b/pkg/lint/checks/components/kueue/kueue.go index 850536e3..9e657694 100644 --- a/pkg/lint/checks/components/kueue/kueue.go +++ b/pkg/lint/checks/components/kueue/kueue.go @@ -16,14 +16,17 @@ import ( ) const ( - kind = "kueue" - checkTypeManagementState = "management-state" - managementStateRemediation = "Migrate to the Red Hat build of Kueue operator following https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed/2.25/html/managing_openshift_ai/managing-workloads-with-kueue#migrating-to-the-rhbok-operator_kueue before upgrading" + kind = "kueue" + checkTypeManagementState = "management-state" + + // Deferred: parameterize hardcoded version references using ComponentRequest.TargetVersion. + msgManagedProhibited = "The 3.3.1 upgrade currently only supports the Kueue managementState of Removed. A future 3.3.x release will allow an upgrade when you have migrated to the Red Hat build of Kueue Operator and the Kueue managementState is Unmanaged." + msgUnmanagedProhibited = "The 3.3.1 upgrade currently only supports the Kueue managementState of Removed. A future 3.3.x release will allow an upgrade when the Kueue managementState is Unmanaged." ) -// ManagementStateCheck validates that Kueue managed option is not used before upgrading to 3.x. -// In RHOAI 3.x, the Managed option for Kueue is removed — users must migrate to the standalone -// Red Hat build of Kueue operator and set managementState to Removed or Unmanaged. +// ManagementStateCheck validates that Kueue managementState is Removed before upgrading to 3.x. +// In RHOAI 3.3.1, only the Removed state is supported. A future 3.3.x release will support +// Unmanaged with the Red Hat build of Kueue Operator. type ManagementStateCheck struct { check.BaseCheck } @@ -36,8 +39,7 @@ func NewManagementStateCheck() *ManagementStateCheck { Type: checkTypeManagementState, CheckID: "components.kueue.management-state", CheckName: "Components :: Kueue :: Management State (3.x)", - CheckDescription: "Validates that Kueue managementState is compatible with RHOAI 3.x (Managed option will be removed)", - CheckRemediation: managementStateRemediation, + CheckDescription: "Validates that Kueue managementState is Removed before upgrading to RHOAI 3.x", }, } } @@ -63,25 +65,25 @@ func (c *ManagementStateCheck) CanApply(ctx context.Context, target check.Target func (c *ManagementStateCheck) Validate(ctx context.Context, target check.Target) (*result.DiagnosticResult, error) { return validate.Component(c, target). Run(ctx, func(_ context.Context, req *validate.ComponentRequest) error { - tv := version.MajorMinorLabel(req.TargetVersion) - switch req.ManagementState { case constants.ManagementStateManaged: req.Result.SetCondition(check.NewCondition( check.ConditionTypeCompatible, metav1.ConditionFalse, check.WithReason(check.ReasonVersionIncompatible), - check.WithMessage("Kueue is managed by OpenShift AI (state: %s) but Managed option will be removed in RHOAI %s", req.ManagementState, tv), - check.WithImpact(result.ImpactBlocking), - check.WithRemediation(c.CheckRemediation), + check.WithMessage(msgManagedProhibited), + check.WithImpact(result.ImpactProhibited), )) case constants.ManagementStateUnmanaged: req.Result.SetCondition(check.NewCondition( check.ConditionTypeCompatible, - metav1.ConditionTrue, - check.WithReason(check.ReasonVersionCompatible), - check.WithMessage("Kueue managementState is %s — compatible with RHOAI %s", req.ManagementState, tv), + metav1.ConditionFalse, + check.WithReason(check.ReasonVersionIncompatible), + check.WithMessage(msgUnmanagedProhibited), + check.WithImpact(result.ImpactProhibited), )) + default: + return fmt.Errorf("unexpected management state %q for kueue", req.ManagementState) } return nil diff --git a/pkg/lint/checks/components/kueue/kueue_operator_installed.go b/pkg/lint/checks/components/kueue/kueue_operator_installed.go index a2dc9380..9e24cdea 100644 --- a/pkg/lint/checks/components/kueue/kueue_operator_installed.go +++ b/pkg/lint/checks/components/kueue/kueue_operator_installed.go @@ -16,13 +16,17 @@ import ( ) const ( - checkTypeOperatorInstalled = "operator-installed" - subscriptionName = "kueue-operator" - annotationInstalledVersion = "operator.opendatahub.io/installed-version" - msgManagedNotSupported = "Kueue managementState is Managed — migration to the Red Hat build of Kueue operator is required before upgrading" + checkTypeOperatorInstalled = "operator-installed" + subscriptionName = "kueue-operator" + annotationInstalledVersion = "operator.opendatahub.io/installed-version" + msgManagedNotSupported = "Kueue managementState is Managed — migration to the Red Hat build of Kueue operator is required before upgrading" + operatorInstalledManagedRemediation = "Migrate to the Red Hat build of Kueue operator following https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed/2.25/html/managing_openshift_ai/managing-workloads-with-kueue#migrating-to-the-rhbok-operator_kueue before upgrading" ) -// OperatorInstalledCheck validates the Red Hat build of Kueue operator installation status against the Kueue +// OperatorInstalledCheck is currently deregistered — re-enable when a future 3.3.x release +// supports Unmanaged + Red Hat build of Kueue Operator (see command.go registration). +// +// It validates the Red Hat build of Kueue operator installation status against the Kueue // component management state: // - Managed: prohibited — no supported upgrade path from embedded Kueue, must migrate to RHBoK first // - Unmanaged + operator absent: blocking — Unmanaged requires the Red Hat build of Kueue operator @@ -97,7 +101,7 @@ func (c *OperatorInstalledCheck) validateManaged( check.WithReason(check.ReasonVersionIncompatible), check.WithMessage(msgManagedNotSupported), check.WithImpact(result.ImpactProhibited), - check.WithRemediation(managementStateRemediation), + check.WithRemediation(operatorInstalledManagedRemediation), )) } diff --git a/pkg/lint/checks/components/kueue/kueue_test.go b/pkg/lint/checks/components/kueue/kueue_test.go index dbdd46cc..9c309062 100644 --- a/pkg/lint/checks/components/kueue/kueue_test.go +++ b/pkg/lint/checks/components/kueue/kueue_test.go @@ -57,7 +57,7 @@ func TestManagementStateCheck_CanApply_NotConfigured(t *testing.T) { g.Expect(canApply).To(BeFalse()) } -func TestManagementStateCheck_ManagedBlocking(t *testing.T) { +func TestManagementStateCheck_ManagedProhibited(t *testing.T) { g := NewWithT(t) ctx := t.Context() @@ -77,16 +77,16 @@ func TestManagementStateCheck_ManagedBlocking(t *testing.T) { "Type": Equal(check.ConditionTypeCompatible), "Status": Equal(metav1.ConditionFalse), "Reason": Equal(check.ReasonVersionIncompatible), - "Message": And(ContainSubstring("managed by OpenShift AI"), ContainSubstring("Managed option will be removed")), + "Message": And(ContainSubstring("only supports the Kueue managementState of Removed"), ContainSubstring("migrated to the Red Hat build of Kueue Operator")), })) - g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactBlocking)) + g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactProhibited)) g.Expect(result.Annotations).To(And( HaveKeyWithValue("component.opendatahub.io/management-state", "Managed"), HaveKeyWithValue("check.opendatahub.io/target-version", "3.0.0"), )) } -func TestManagementStateCheck_UnmanagedAllowed(t *testing.T) { +func TestManagementStateCheck_UnmanagedProhibited(t *testing.T) { g := NewWithT(t) ctx := t.Context() @@ -104,10 +104,11 @@ func TestManagementStateCheck_UnmanagedAllowed(t *testing.T) { g.Expect(result.Status.Conditions).To(HaveLen(1)) g.Expect(result.Status.Conditions[0].Condition).To(MatchFields(IgnoreExtras, Fields{ "Type": Equal(check.ConditionTypeCompatible), - "Status": Equal(metav1.ConditionTrue), - "Reason": Equal(check.ReasonVersionCompatible), - "Message": ContainSubstring("compatible with RHOAI 3.1"), + "Status": Equal(metav1.ConditionFalse), + "Reason": Equal(check.ReasonVersionIncompatible), + "Message": And(ContainSubstring("only supports the Kueue managementState of Removed"), Not(ContainSubstring("migrated to the Red Hat build of Kueue Operator"))), })) + g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactProhibited)) } func TestManagementStateCheck_CanApply_ManagementState(t *testing.T) { diff --git a/pkg/lint/checks/workloads/kueue/check.go b/pkg/lint/checks/workloads/kueue/check.go index d863b105..edb4aefe 100644 --- a/pkg/lint/checks/workloads/kueue/check.go +++ b/pkg/lint/checks/workloads/kueue/check.go @@ -39,7 +39,7 @@ func NewDataIntegrityCheck() *DataIntegrityCheck { } func (c *DataIntegrityCheck) CanApply(ctx context.Context, target check.Target) (bool, error) { - ok, err := IsKueueActive(ctx, target) + ok, err := IsKueueUnmanaged(ctx, target) if err != nil { return false, fmt.Errorf("checking kueue state: %w", err) } diff --git a/pkg/lint/checks/workloads/kueue/check_test.go b/pkg/lint/checks/workloads/kueue/check_test.go index 41d3f6ef..d55d818f 100644 --- a/pkg/lint/checks/workloads/kueue/check_test.go +++ b/pkg/lint/checks/workloads/kueue/check_test.go @@ -138,7 +138,7 @@ func TestDataIntegrityCheck_CanApply_KueueNotActive(t *testing.T) { g.Expect(applies).To(BeFalse()) } -func TestDataIntegrityCheck_CanApply_KueueActive(t *testing.T) { +func TestDataIntegrityCheck_CanApply_KueueManaged(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ @@ -156,7 +156,7 @@ func TestDataIntegrityCheck_CanApply_KueueActive(t *testing.T) { applies, err := chk.CanApply(t.Context(), target) g.Expect(err).ToNot(HaveOccurred()) - g.Expect(applies).To(BeTrue()) + g.Expect(applies).To(BeFalse()) } func TestDataIntegrityCheck_CanApply_KueueUnmanaged(t *testing.T) { @@ -184,7 +184,7 @@ func TestDataIntegrityCheck_BenignWorkload_NoViolations(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) // Non-kueue namespace. @@ -216,7 +216,7 @@ func TestDataIntegrityCheck_NoRelevantNamespaces(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) target := testutil.NewTarget(t, testutil.TargetConfig{ @@ -242,7 +242,7 @@ func TestDataIntegrityCheck_FullyConsistent(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -281,7 +281,7 @@ func TestDataIntegrityCheck_Invariant1_MissingLabelInKueueNamespace(t *testing.T g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -318,7 +318,7 @@ func TestDataIntegrityCheck_Invariant2_LabeledInNonKueueNamespace(t *testing.T) g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) // Namespace WITHOUT kueue-managed label. @@ -351,7 +351,7 @@ func TestDataIntegrityCheck_Invariant3_OwnerTreeMismatch(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -390,7 +390,7 @@ func TestDataIntegrityCheck_Invariant3_ChildHasLabelRootDoesNot(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -429,7 +429,7 @@ func TestDataIntegrityCheck_Invariant3_SingleNodeTreeConsistent(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -463,7 +463,7 @@ func TestDataIntegrityCheck_MultiLevelOwnerChain(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -505,7 +505,7 @@ func TestDataIntegrityCheck_MixedViolationsAcrossNamespaces(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) kueueNs := newNamespace("team-a", map[string]string{ @@ -546,7 +546,7 @@ func TestDataIntegrityCheck_BlockingImpact(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -574,7 +574,7 @@ func TestDataIntegrityCheck_AnnotationCheckTargetVersion(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) target := testutil.NewTarget(t, testutil.TargetConfig{ @@ -609,7 +609,7 @@ func TestDataIntegrityCheck_OpenshiftManagedLabel(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) // Uses the kueue.openshift.io/managed label instead of kueue-managed. @@ -641,7 +641,7 @@ func TestDataIntegrityCheck_Invariant1_ObjectContextAnnotation(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -673,7 +673,7 @@ func TestDataIntegrityCheck_Invariant2_ObjectContextAnnotation(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-b", nil) @@ -704,7 +704,7 @@ func TestDataIntegrityCheck_Invariant3_ObjectContextAnnotation(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -740,7 +740,7 @@ func TestDataIntegrityCheck_MultipleWorkloadTypes(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ diff --git a/pkg/lint/checks/workloads/kueue/support.go b/pkg/lint/checks/workloads/kueue/support.go index b3189bb0..8e7dc14a 100644 --- a/pkg/lint/checks/workloads/kueue/support.go +++ b/pkg/lint/checks/workloads/kueue/support.go @@ -73,8 +73,9 @@ const ( msgInvariant3Mismatch = "%s %s/%s has kueue.x-k8s.io/queue-name=%s but root %s %s/%s has kueue.x-k8s.io/queue-name=%s" ) -// IsKueueActive returns true when Kueue is active (Managed or Unmanaged) on the DSC. -func IsKueueActive( +// IsKueueUnmanaged returns true when Kueue managementState is Unmanaged on the DSC. +// Data integrity checks only apply when the user manages Kueue themselves (Unmanaged state). +func IsKueueUnmanaged( ctx context.Context, target check.Target, ) (bool, error) { @@ -83,14 +84,10 @@ func IsKueueActive( return false, fmt.Errorf("getting DataScienceCluster: %w", err) } - if !components.HasManagementState( + return components.HasManagementState( dsc, constants.ComponentKueue, - constants.ManagementStateManaged, constants.ManagementStateUnmanaged, - ) { - return false, nil - } - - return true, nil + constants.ManagementStateUnmanaged, + ), nil } // kueueEnabledNamespaces returns the set of namespaces that have a kueue-managed label. diff --git a/pkg/lint/command.go b/pkg/lint/command.go index 71d8d79c..1fb1641f 100644 --- a/pkg/lint/command.go +++ b/pkg/lint/command.go @@ -87,7 +87,7 @@ func NewCommand( registry.MustRegister(dscinitialization.NewDSCInitializationReadinessCheck()) registry.MustRegister(datasciencecluster.NewDataScienceClusterReadinessCheck()) - // Components (13) + // Components (12) registry.MustRegister(raycomponent.NewCodeFlareRemovalCheck()) registry.MustRegister(dashboard.NewAcceleratorProfileMigrationCheck()) registry.MustRegister(dashboard.NewHardwareProfileMigrationCheck()) @@ -98,7 +98,8 @@ func NewCommand( registry.MustRegister(kserve.NewServiceMeshOperatorCheck()) registry.MustRegister(kserve.NewServiceMeshRemovalCheck()) registry.MustRegister(kueue.NewManagementStateCheck()) - registry.MustRegister(kueue.NewOperatorInstalledCheck()) + // Deferred: re-enable when a future 3.3.x release supports Unmanaged + Red Hat build of Kueue Operator. + // registry.MustRegister(kueue.NewOperatorInstalledCheck()) registry.MustRegister(modelmesh.NewRemovalCheck()) registry.MustRegister(trainingoperator.NewDeprecationCheck())