diff --git a/pkg/lint/checks/components/kueue/kueue.go b/pkg/lint/checks/components/kueue/kueue.go index 850536e3..9e657694 100644 --- a/pkg/lint/checks/components/kueue/kueue.go +++ b/pkg/lint/checks/components/kueue/kueue.go @@ -16,14 +16,17 @@ import ( ) const ( - kind = "kueue" - checkTypeManagementState = "management-state" - managementStateRemediation = "Migrate to the Red Hat build of Kueue operator following https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed/2.25/html/managing_openshift_ai/managing-workloads-with-kueue#migrating-to-the-rhbok-operator_kueue before upgrading" + kind = "kueue" + checkTypeManagementState = "management-state" + + // Deferred: parameterize hardcoded version references using ComponentRequest.TargetVersion. + msgManagedProhibited = "The 3.3.1 upgrade currently only supports the Kueue managementState of Removed. A future 3.3.x release will allow an upgrade when you have migrated to the Red Hat build of Kueue Operator and the Kueue managementState is Unmanaged." + msgUnmanagedProhibited = "The 3.3.1 upgrade currently only supports the Kueue managementState of Removed. A future 3.3.x release will allow an upgrade when the Kueue managementState is Unmanaged." ) -// ManagementStateCheck validates that Kueue managed option is not used before upgrading to 3.x. -// In RHOAI 3.x, the Managed option for Kueue is removed — users must migrate to the standalone -// Red Hat build of Kueue operator and set managementState to Removed or Unmanaged. +// ManagementStateCheck validates that Kueue managementState is Removed before upgrading to 3.x. +// In RHOAI 3.3.1, only the Removed state is supported. A future 3.3.x release will support +// Unmanaged with the Red Hat build of Kueue Operator. type ManagementStateCheck struct { check.BaseCheck } @@ -36,8 +39,7 @@ func NewManagementStateCheck() *ManagementStateCheck { Type: checkTypeManagementState, CheckID: "components.kueue.management-state", CheckName: "Components :: Kueue :: Management State (3.x)", - CheckDescription: "Validates that Kueue managementState is compatible with RHOAI 3.x (Managed option will be removed)", - CheckRemediation: managementStateRemediation, + CheckDescription: "Validates that Kueue managementState is Removed before upgrading to RHOAI 3.x", }, } } @@ -63,25 +65,25 @@ func (c *ManagementStateCheck) CanApply(ctx context.Context, target check.Target func (c *ManagementStateCheck) Validate(ctx context.Context, target check.Target) (*result.DiagnosticResult, error) { return validate.Component(c, target). Run(ctx, func(_ context.Context, req *validate.ComponentRequest) error { - tv := version.MajorMinorLabel(req.TargetVersion) - switch req.ManagementState { case constants.ManagementStateManaged: req.Result.SetCondition(check.NewCondition( check.ConditionTypeCompatible, metav1.ConditionFalse, check.WithReason(check.ReasonVersionIncompatible), - check.WithMessage("Kueue is managed by OpenShift AI (state: %s) but Managed option will be removed in RHOAI %s", req.ManagementState, tv), - check.WithImpact(result.ImpactBlocking), - check.WithRemediation(c.CheckRemediation), + check.WithMessage(msgManagedProhibited), + check.WithImpact(result.ImpactProhibited), )) case constants.ManagementStateUnmanaged: req.Result.SetCondition(check.NewCondition( check.ConditionTypeCompatible, - metav1.ConditionTrue, - check.WithReason(check.ReasonVersionCompatible), - check.WithMessage("Kueue managementState is %s — compatible with RHOAI %s", req.ManagementState, tv), + metav1.ConditionFalse, + check.WithReason(check.ReasonVersionIncompatible), + check.WithMessage(msgUnmanagedProhibited), + check.WithImpact(result.ImpactProhibited), )) + default: + return fmt.Errorf("unexpected management state %q for kueue", req.ManagementState) } return nil diff --git a/pkg/lint/checks/components/kueue/kueue_operator_installed.go b/pkg/lint/checks/components/kueue/kueue_operator_installed.go index a2dc9380..9e24cdea 100644 --- a/pkg/lint/checks/components/kueue/kueue_operator_installed.go +++ b/pkg/lint/checks/components/kueue/kueue_operator_installed.go @@ -16,13 +16,17 @@ import ( ) const ( - checkTypeOperatorInstalled = "operator-installed" - subscriptionName = "kueue-operator" - annotationInstalledVersion = "operator.opendatahub.io/installed-version" - msgManagedNotSupported = "Kueue managementState is Managed — migration to the Red Hat build of Kueue operator is required before upgrading" + checkTypeOperatorInstalled = "operator-installed" + subscriptionName = "kueue-operator" + annotationInstalledVersion = "operator.opendatahub.io/installed-version" + msgManagedNotSupported = "Kueue managementState is Managed — migration to the Red Hat build of Kueue operator is required before upgrading" + operatorInstalledManagedRemediation = "Migrate to the Red Hat build of Kueue operator following https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed/2.25/html/managing_openshift_ai/managing-workloads-with-kueue#migrating-to-the-rhbok-operator_kueue before upgrading" ) -// OperatorInstalledCheck validates the Red Hat build of Kueue operator installation status against the Kueue +// OperatorInstalledCheck is currently deregistered — re-enable when a future 3.3.x release +// supports Unmanaged + Red Hat build of Kueue Operator (see command.go registration). +// +// It validates the Red Hat build of Kueue operator installation status against the Kueue // component management state: // - Managed: prohibited — no supported upgrade path from embedded Kueue, must migrate to RHBoK first // - Unmanaged + operator absent: blocking — Unmanaged requires the Red Hat build of Kueue operator @@ -97,7 +101,7 @@ func (c *OperatorInstalledCheck) validateManaged( check.WithReason(check.ReasonVersionIncompatible), check.WithMessage(msgManagedNotSupported), check.WithImpact(result.ImpactProhibited), - check.WithRemediation(managementStateRemediation), + check.WithRemediation(operatorInstalledManagedRemediation), )) } diff --git a/pkg/lint/checks/components/kueue/kueue_test.go b/pkg/lint/checks/components/kueue/kueue_test.go index dbdd46cc..9c309062 100644 --- a/pkg/lint/checks/components/kueue/kueue_test.go +++ b/pkg/lint/checks/components/kueue/kueue_test.go @@ -57,7 +57,7 @@ func TestManagementStateCheck_CanApply_NotConfigured(t *testing.T) { g.Expect(canApply).To(BeFalse()) } -func TestManagementStateCheck_ManagedBlocking(t *testing.T) { +func TestManagementStateCheck_ManagedProhibited(t *testing.T) { g := NewWithT(t) ctx := t.Context() @@ -77,16 +77,16 @@ func TestManagementStateCheck_ManagedBlocking(t *testing.T) { "Type": Equal(check.ConditionTypeCompatible), "Status": Equal(metav1.ConditionFalse), "Reason": Equal(check.ReasonVersionIncompatible), - "Message": And(ContainSubstring("managed by OpenShift AI"), ContainSubstring("Managed option will be removed")), + "Message": And(ContainSubstring("only supports the Kueue managementState of Removed"), ContainSubstring("migrated to the Red Hat build of Kueue Operator")), })) - g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactBlocking)) + g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactProhibited)) g.Expect(result.Annotations).To(And( HaveKeyWithValue("component.opendatahub.io/management-state", "Managed"), HaveKeyWithValue("check.opendatahub.io/target-version", "3.0.0"), )) } -func TestManagementStateCheck_UnmanagedAllowed(t *testing.T) { +func TestManagementStateCheck_UnmanagedProhibited(t *testing.T) { g := NewWithT(t) ctx := t.Context() @@ -104,10 +104,11 @@ func TestManagementStateCheck_UnmanagedAllowed(t *testing.T) { g.Expect(result.Status.Conditions).To(HaveLen(1)) g.Expect(result.Status.Conditions[0].Condition).To(MatchFields(IgnoreExtras, Fields{ "Type": Equal(check.ConditionTypeCompatible), - "Status": Equal(metav1.ConditionTrue), - "Reason": Equal(check.ReasonVersionCompatible), - "Message": ContainSubstring("compatible with RHOAI 3.1"), + "Status": Equal(metav1.ConditionFalse), + "Reason": Equal(check.ReasonVersionIncompatible), + "Message": And(ContainSubstring("only supports the Kueue managementState of Removed"), Not(ContainSubstring("migrated to the Red Hat build of Kueue Operator"))), })) + g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactProhibited)) } func TestManagementStateCheck_CanApply_ManagementState(t *testing.T) { diff --git a/pkg/lint/checks/workloads/kueue/check.go b/pkg/lint/checks/workloads/kueue/check.go index d863b105..edb4aefe 100644 --- a/pkg/lint/checks/workloads/kueue/check.go +++ b/pkg/lint/checks/workloads/kueue/check.go @@ -39,7 +39,7 @@ func NewDataIntegrityCheck() *DataIntegrityCheck { } func (c *DataIntegrityCheck) CanApply(ctx context.Context, target check.Target) (bool, error) { - ok, err := IsKueueActive(ctx, target) + ok, err := IsKueueUnmanaged(ctx, target) if err != nil { return false, fmt.Errorf("checking kueue state: %w", err) } diff --git a/pkg/lint/checks/workloads/kueue/check_test.go b/pkg/lint/checks/workloads/kueue/check_test.go index 41d3f6ef..d55d818f 100644 --- a/pkg/lint/checks/workloads/kueue/check_test.go +++ b/pkg/lint/checks/workloads/kueue/check_test.go @@ -138,7 +138,7 @@ func TestDataIntegrityCheck_CanApply_KueueNotActive(t *testing.T) { g.Expect(applies).To(BeFalse()) } -func TestDataIntegrityCheck_CanApply_KueueActive(t *testing.T) { +func TestDataIntegrityCheck_CanApply_KueueManaged(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ @@ -156,7 +156,7 @@ func TestDataIntegrityCheck_CanApply_KueueActive(t *testing.T) { applies, err := chk.CanApply(t.Context(), target) g.Expect(err).ToNot(HaveOccurred()) - g.Expect(applies).To(BeTrue()) + g.Expect(applies).To(BeFalse()) } func TestDataIntegrityCheck_CanApply_KueueUnmanaged(t *testing.T) { @@ -184,7 +184,7 @@ func TestDataIntegrityCheck_BenignWorkload_NoViolations(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) // Non-kueue namespace. @@ -216,7 +216,7 @@ func TestDataIntegrityCheck_NoRelevantNamespaces(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) target := testutil.NewTarget(t, testutil.TargetConfig{ @@ -242,7 +242,7 @@ func TestDataIntegrityCheck_FullyConsistent(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -281,7 +281,7 @@ func TestDataIntegrityCheck_Invariant1_MissingLabelInKueueNamespace(t *testing.T g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -318,7 +318,7 @@ func TestDataIntegrityCheck_Invariant2_LabeledInNonKueueNamespace(t *testing.T) g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) // Namespace WITHOUT kueue-managed label. @@ -351,7 +351,7 @@ func TestDataIntegrityCheck_Invariant3_OwnerTreeMismatch(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -390,7 +390,7 @@ func TestDataIntegrityCheck_Invariant3_ChildHasLabelRootDoesNot(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -429,7 +429,7 @@ func TestDataIntegrityCheck_Invariant3_SingleNodeTreeConsistent(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -463,7 +463,7 @@ func TestDataIntegrityCheck_MultiLevelOwnerChain(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -505,7 +505,7 @@ func TestDataIntegrityCheck_MixedViolationsAcrossNamespaces(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) kueueNs := newNamespace("team-a", map[string]string{ @@ -546,7 +546,7 @@ func TestDataIntegrityCheck_BlockingImpact(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -574,7 +574,7 @@ func TestDataIntegrityCheck_AnnotationCheckTargetVersion(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) target := testutil.NewTarget(t, testutil.TargetConfig{ @@ -609,7 +609,7 @@ func TestDataIntegrityCheck_OpenshiftManagedLabel(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) // Uses the kueue.openshift.io/managed label instead of kueue-managed. @@ -641,7 +641,7 @@ func TestDataIntegrityCheck_Invariant1_ObjectContextAnnotation(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -673,7 +673,7 @@ func TestDataIntegrityCheck_Invariant2_ObjectContextAnnotation(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-b", nil) @@ -704,7 +704,7 @@ func TestDataIntegrityCheck_Invariant3_ObjectContextAnnotation(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ @@ -740,7 +740,7 @@ func TestDataIntegrityCheck_MultipleWorkloadTypes(t *testing.T) { g := NewWithT(t) dsc := testutil.NewDSC(map[string]string{ - "kueue": "Managed", + "kueue": "Unmanaged", }) ns := newNamespace("team-a", map[string]string{ diff --git a/pkg/lint/checks/workloads/kueue/support.go b/pkg/lint/checks/workloads/kueue/support.go index b3189bb0..8e7dc14a 100644 --- a/pkg/lint/checks/workloads/kueue/support.go +++ b/pkg/lint/checks/workloads/kueue/support.go @@ -73,8 +73,9 @@ const ( msgInvariant3Mismatch = "%s %s/%s has kueue.x-k8s.io/queue-name=%s but root %s %s/%s has kueue.x-k8s.io/queue-name=%s" ) -// IsKueueActive returns true when Kueue is active (Managed or Unmanaged) on the DSC. -func IsKueueActive( +// IsKueueUnmanaged returns true when Kueue managementState is Unmanaged on the DSC. +// Data integrity checks only apply when the user manages Kueue themselves (Unmanaged state). +func IsKueueUnmanaged( ctx context.Context, target check.Target, ) (bool, error) { @@ -83,14 +84,10 @@ func IsKueueActive( return false, fmt.Errorf("getting DataScienceCluster: %w", err) } - if !components.HasManagementState( + return components.HasManagementState( dsc, constants.ComponentKueue, - constants.ManagementStateManaged, constants.ManagementStateUnmanaged, - ) { - return false, nil - } - - return true, nil + constants.ManagementStateUnmanaged, + ), nil } // kueueEnabledNamespaces returns the set of namespaces that have a kueue-managed label. diff --git a/pkg/lint/command.go b/pkg/lint/command.go index 71d8d79c..1fb1641f 100644 --- a/pkg/lint/command.go +++ b/pkg/lint/command.go @@ -87,7 +87,7 @@ func NewCommand( registry.MustRegister(dscinitialization.NewDSCInitializationReadinessCheck()) registry.MustRegister(datasciencecluster.NewDataScienceClusterReadinessCheck()) - // Components (13) + // Components (12) registry.MustRegister(raycomponent.NewCodeFlareRemovalCheck()) registry.MustRegister(dashboard.NewAcceleratorProfileMigrationCheck()) registry.MustRegister(dashboard.NewHardwareProfileMigrationCheck()) @@ -98,7 +98,8 @@ func NewCommand( registry.MustRegister(kserve.NewServiceMeshOperatorCheck()) registry.MustRegister(kserve.NewServiceMeshRemovalCheck()) registry.MustRegister(kueue.NewManagementStateCheck()) - registry.MustRegister(kueue.NewOperatorInstalledCheck()) + // Deferred: re-enable when a future 3.3.x release supports Unmanaged + Red Hat build of Kueue Operator. + // registry.MustRegister(kueue.NewOperatorInstalledCheck()) registry.MustRegister(modelmesh.NewRemovalCheck()) registry.MustRegister(trainingoperator.NewDeprecationCheck())