Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 18 additions & 16 deletions pkg/lint/checks/components/kueue/kueue.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@ import (
)

const (
kind = "kueue"
checkTypeManagementState = "management-state"
managementStateRemediation = "Migrate to the Red Hat build of Kueue operator following https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed/2.25/html/managing_openshift_ai/managing-workloads-with-kueue#migrating-to-the-rhbok-operator_kueue before upgrading"
kind = "kueue"
checkTypeManagementState = "management-state"

// Deferred: parameterize hardcoded version references using ComponentRequest.TargetVersion.
msgManagedProhibited = "The 3.3.1 upgrade currently only supports the Kueue managementState of Removed. A future 3.3.x release will allow an upgrade when you have migrated to the Red Hat build of Kueue Operator and the Kueue managementState is Unmanaged."
msgUnmanagedProhibited = "The 3.3.1 upgrade currently only supports the Kueue managementState of Removed. A future 3.3.x release will allow an upgrade when the Kueue managementState is Unmanaged."
)

// ManagementStateCheck validates that Kueue managed option is not used before upgrading to 3.x.
// In RHOAI 3.x, the Managed option for Kueue is removed — users must migrate to the standalone
// Red Hat build of Kueue operator and set managementState to Removed or Unmanaged.
// ManagementStateCheck validates that Kueue managementState is Removed before upgrading to 3.x.
// In RHOAI 3.3.1, only the Removed state is supported. A future 3.3.x release will support
// Unmanaged with the Red Hat build of Kueue Operator.
type ManagementStateCheck struct {
check.BaseCheck
}
Expand All @@ -36,8 +39,7 @@ func NewManagementStateCheck() *ManagementStateCheck {
Type: checkTypeManagementState,
CheckID: "components.kueue.management-state",
CheckName: "Components :: Kueue :: Management State (3.x)",
CheckDescription: "Validates that Kueue managementState is compatible with RHOAI 3.x (Managed option will be removed)",
CheckRemediation: managementStateRemediation,
CheckDescription: "Validates that Kueue managementState is Removed before upgrading to RHOAI 3.x",
},
}
}
Expand All @@ -63,25 +65,25 @@ func (c *ManagementStateCheck) CanApply(ctx context.Context, target check.Target
func (c *ManagementStateCheck) Validate(ctx context.Context, target check.Target) (*result.DiagnosticResult, error) {
return validate.Component(c, target).
Run(ctx, func(_ context.Context, req *validate.ComponentRequest) error {
tv := version.MajorMinorLabel(req.TargetVersion)

switch req.ManagementState {
case constants.ManagementStateManaged:
req.Result.SetCondition(check.NewCondition(
check.ConditionTypeCompatible,
metav1.ConditionFalse,
check.WithReason(check.ReasonVersionIncompatible),
check.WithMessage("Kueue is managed by OpenShift AI (state: %s) but Managed option will be removed in RHOAI %s", req.ManagementState, tv),
check.WithImpact(result.ImpactBlocking),
check.WithRemediation(c.CheckRemediation),
check.WithMessage(msgManagedProhibited),
check.WithImpact(result.ImpactProhibited),
))
case constants.ManagementStateUnmanaged:
req.Result.SetCondition(check.NewCondition(
check.ConditionTypeCompatible,
metav1.ConditionTrue,
check.WithReason(check.ReasonVersionCompatible),
check.WithMessage("Kueue managementState is %s — compatible with RHOAI %s", req.ManagementState, tv),
metav1.ConditionFalse,
check.WithReason(check.ReasonVersionIncompatible),
check.WithMessage(msgUnmanagedProhibited),
check.WithImpact(result.ImpactProhibited),
))
default:
return fmt.Errorf("unexpected management state %q for kueue", req.ManagementState)
}

return nil
Expand Down
16 changes: 10 additions & 6 deletions pkg/lint/checks/components/kueue/kueue_operator_installed.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@ import (
)

const (
checkTypeOperatorInstalled = "operator-installed"
subscriptionName = "kueue-operator"
annotationInstalledVersion = "operator.opendatahub.io/installed-version"
msgManagedNotSupported = "Kueue managementState is Managed — migration to the Red Hat build of Kueue operator is required before upgrading"
checkTypeOperatorInstalled = "operator-installed"
subscriptionName = "kueue-operator"
annotationInstalledVersion = "operator.opendatahub.io/installed-version"
msgManagedNotSupported = "Kueue managementState is Managed — migration to the Red Hat build of Kueue operator is required before upgrading"
operatorInstalledManagedRemediation = "Migrate to the Red Hat build of Kueue operator following https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed/2.25/html/managing_openshift_ai/managing-workloads-with-kueue#migrating-to-the-rhbok-operator_kueue before upgrading"
)

// OperatorInstalledCheck validates the Red Hat build of Kueue operator installation status against the Kueue
// OperatorInstalledCheck is currently deregistered — re-enable when a future 3.3.x release
// supports Unmanaged + Red Hat build of Kueue Operator (see command.go registration).
//
// It validates the Red Hat build of Kueue operator installation status against the Kueue
// component management state:
// - Managed: prohibited — no supported upgrade path from embedded Kueue, must migrate to RHBoK first
// - Unmanaged + operator absent: blocking — Unmanaged requires the Red Hat build of Kueue operator
Expand Down Expand Up @@ -97,7 +101,7 @@ func (c *OperatorInstalledCheck) validateManaged(
check.WithReason(check.ReasonVersionIncompatible),
check.WithMessage(msgManagedNotSupported),
check.WithImpact(result.ImpactProhibited),
check.WithRemediation(managementStateRemediation),
check.WithRemediation(operatorInstalledManagedRemediation),
))
}

Expand Down
15 changes: 8 additions & 7 deletions pkg/lint/checks/components/kueue/kueue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func TestManagementStateCheck_CanApply_NotConfigured(t *testing.T) {
g.Expect(canApply).To(BeFalse())
}

func TestManagementStateCheck_ManagedBlocking(t *testing.T) {
func TestManagementStateCheck_ManagedProhibited(t *testing.T) {
g := NewWithT(t)
ctx := t.Context()

Expand All @@ -77,16 +77,16 @@ func TestManagementStateCheck_ManagedBlocking(t *testing.T) {
"Type": Equal(check.ConditionTypeCompatible),
"Status": Equal(metav1.ConditionFalse),
"Reason": Equal(check.ReasonVersionIncompatible),
"Message": And(ContainSubstring("managed by OpenShift AI"), ContainSubstring("Managed option will be removed")),
"Message": And(ContainSubstring("only supports the Kueue managementState of Removed"), ContainSubstring("migrated to the Red Hat build of Kueue Operator")),
}))
g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactBlocking))
g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactProhibited))
g.Expect(result.Annotations).To(And(
HaveKeyWithValue("component.opendatahub.io/management-state", "Managed"),
HaveKeyWithValue("check.opendatahub.io/target-version", "3.0.0"),
))
}

func TestManagementStateCheck_UnmanagedAllowed(t *testing.T) {
func TestManagementStateCheck_UnmanagedProhibited(t *testing.T) {
g := NewWithT(t)
ctx := t.Context()

Expand All @@ -104,10 +104,11 @@ func TestManagementStateCheck_UnmanagedAllowed(t *testing.T) {
g.Expect(result.Status.Conditions).To(HaveLen(1))
g.Expect(result.Status.Conditions[0].Condition).To(MatchFields(IgnoreExtras, Fields{
"Type": Equal(check.ConditionTypeCompatible),
"Status": Equal(metav1.ConditionTrue),
"Reason": Equal(check.ReasonVersionCompatible),
"Message": ContainSubstring("compatible with RHOAI 3.1"),
"Status": Equal(metav1.ConditionFalse),
"Reason": Equal(check.ReasonVersionIncompatible),
"Message": And(ContainSubstring("only supports the Kueue managementState of Removed"), Not(ContainSubstring("migrated to the Red Hat build of Kueue Operator"))),
}))
g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactProhibited))
}

func TestManagementStateCheck_CanApply_ManagementState(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/lint/checks/workloads/kueue/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func NewDataIntegrityCheck() *DataIntegrityCheck {
}

func (c *DataIntegrityCheck) CanApply(ctx context.Context, target check.Target) (bool, error) {
ok, err := IsKueueActive(ctx, target)
ok, err := IsKueueUnmanaged(ctx, target)
if err != nil {
return false, fmt.Errorf("checking kueue state: %w", err)
}
Expand Down
38 changes: 19 additions & 19 deletions pkg/lint/checks/workloads/kueue/check_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ func TestDataIntegrityCheck_CanApply_KueueNotActive(t *testing.T) {
g.Expect(applies).To(BeFalse())
}

func TestDataIntegrityCheck_CanApply_KueueActive(t *testing.T) {
func TestDataIntegrityCheck_CanApply_KueueManaged(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
Expand All @@ -156,7 +156,7 @@ func TestDataIntegrityCheck_CanApply_KueueActive(t *testing.T) {
applies, err := chk.CanApply(t.Context(), target)

g.Expect(err).ToNot(HaveOccurred())
g.Expect(applies).To(BeTrue())
g.Expect(applies).To(BeFalse())
}

func TestDataIntegrityCheck_CanApply_KueueUnmanaged(t *testing.T) {
Expand Down Expand Up @@ -184,7 +184,7 @@ func TestDataIntegrityCheck_BenignWorkload_NoViolations(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

// Non-kueue namespace.
Expand Down Expand Up @@ -216,7 +216,7 @@ func TestDataIntegrityCheck_NoRelevantNamespaces(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

target := testutil.NewTarget(t, testutil.TargetConfig{
Expand All @@ -242,7 +242,7 @@ func TestDataIntegrityCheck_FullyConsistent(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

ns := newNamespace("team-a", map[string]string{
Expand Down Expand Up @@ -281,7 +281,7 @@ func TestDataIntegrityCheck_Invariant1_MissingLabelInKueueNamespace(t *testing.T
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

ns := newNamespace("team-a", map[string]string{
Expand Down Expand Up @@ -318,7 +318,7 @@ func TestDataIntegrityCheck_Invariant2_LabeledInNonKueueNamespace(t *testing.T)
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

// Namespace WITHOUT kueue-managed label.
Expand Down Expand Up @@ -351,7 +351,7 @@ func TestDataIntegrityCheck_Invariant3_OwnerTreeMismatch(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

ns := newNamespace("team-a", map[string]string{
Expand Down Expand Up @@ -390,7 +390,7 @@ func TestDataIntegrityCheck_Invariant3_ChildHasLabelRootDoesNot(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

ns := newNamespace("team-a", map[string]string{
Expand Down Expand Up @@ -429,7 +429,7 @@ func TestDataIntegrityCheck_Invariant3_SingleNodeTreeConsistent(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

ns := newNamespace("team-a", map[string]string{
Expand Down Expand Up @@ -463,7 +463,7 @@ func TestDataIntegrityCheck_MultiLevelOwnerChain(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

ns := newNamespace("team-a", map[string]string{
Expand Down Expand Up @@ -505,7 +505,7 @@ func TestDataIntegrityCheck_MixedViolationsAcrossNamespaces(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

kueueNs := newNamespace("team-a", map[string]string{
Expand Down Expand Up @@ -546,7 +546,7 @@ func TestDataIntegrityCheck_BlockingImpact(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

ns := newNamespace("team-a", map[string]string{
Expand Down Expand Up @@ -574,7 +574,7 @@ func TestDataIntegrityCheck_AnnotationCheckTargetVersion(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

target := testutil.NewTarget(t, testutil.TargetConfig{
Expand Down Expand Up @@ -609,7 +609,7 @@ func TestDataIntegrityCheck_OpenshiftManagedLabel(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

// Uses the kueue.openshift.io/managed label instead of kueue-managed.
Expand Down Expand Up @@ -641,7 +641,7 @@ func TestDataIntegrityCheck_Invariant1_ObjectContextAnnotation(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

ns := newNamespace("team-a", map[string]string{
Expand Down Expand Up @@ -673,7 +673,7 @@ func TestDataIntegrityCheck_Invariant2_ObjectContextAnnotation(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

ns := newNamespace("team-b", nil)
Expand Down Expand Up @@ -704,7 +704,7 @@ func TestDataIntegrityCheck_Invariant3_ObjectContextAnnotation(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

ns := newNamespace("team-a", map[string]string{
Expand Down Expand Up @@ -740,7 +740,7 @@ func TestDataIntegrityCheck_MultipleWorkloadTypes(t *testing.T) {
g := NewWithT(t)

dsc := testutil.NewDSC(map[string]string{
"kueue": "Managed",
"kueue": "Unmanaged",
})

ns := newNamespace("team-a", map[string]string{
Expand Down
15 changes: 6 additions & 9 deletions pkg/lint/checks/workloads/kueue/support.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,9 @@ const (
msgInvariant3Mismatch = "%s %s/%s has kueue.x-k8s.io/queue-name=%s but root %s %s/%s has kueue.x-k8s.io/queue-name=%s"
)

// IsKueueActive returns true when Kueue is active (Managed or Unmanaged) on the DSC.
func IsKueueActive(
// IsKueueUnmanaged returns true when Kueue managementState is Unmanaged on the DSC.
// Data integrity checks only apply when the user manages Kueue themselves (Unmanaged state).
func IsKueueUnmanaged(
ctx context.Context,
target check.Target,
) (bool, error) {
Expand All @@ -83,14 +84,10 @@ func IsKueueActive(
return false, fmt.Errorf("getting DataScienceCluster: %w", err)
}

if !components.HasManagementState(
return components.HasManagementState(
dsc, constants.ComponentKueue,
constants.ManagementStateManaged, constants.ManagementStateUnmanaged,
) {
return false, nil
}

return true, nil
constants.ManagementStateUnmanaged,
), nil
}

// kueueEnabledNamespaces returns the set of namespaces that have a kueue-managed label.
Expand Down
5 changes: 3 additions & 2 deletions pkg/lint/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func NewCommand(
registry.MustRegister(dscinitialization.NewDSCInitializationReadinessCheck())
registry.MustRegister(datasciencecluster.NewDataScienceClusterReadinessCheck())

// Components (13)
// Components (12)
registry.MustRegister(raycomponent.NewCodeFlareRemovalCheck())
registry.MustRegister(dashboard.NewAcceleratorProfileMigrationCheck())
registry.MustRegister(dashboard.NewHardwareProfileMigrationCheck())
Expand All @@ -98,7 +98,8 @@ func NewCommand(
registry.MustRegister(kserve.NewServiceMeshOperatorCheck())
registry.MustRegister(kserve.NewServiceMeshRemovalCheck())
registry.MustRegister(kueue.NewManagementStateCheck())
registry.MustRegister(kueue.NewOperatorInstalledCheck())
// Deferred: re-enable when a future 3.3.x release supports Unmanaged + Red Hat build of Kueue Operator.
// registry.MustRegister(kueue.NewOperatorInstalledCheck())
registry.MustRegister(modelmesh.NewRemovalCheck())
registry.MustRegister(trainingoperator.NewDeprecationCheck())

Expand Down