Skip to content

Commit fa2aa34

Browse files
committed
feat: add kueue labels lint check for notebooks
Detect Notebooks in kueue-enabled namespaces that are missing the required kueue.x-k8s.io/queue-name label. Without this label, notebooks that attempt to be modified will end up failing with this error: ``` Error from server (Forbidden): admission webhook "kubeflow-kueuelabels-validator.opendatahub.io" denied the request: Kueue label validation failed: missing required label "kueue.x-k8s.io/queue-name" ``` This is a critical misconfiguration that must be flagged and addressed in order for subsequent migration to be successful. Signed-off-by: Andy Stoneberg <astonebe@redhat.com>
1 parent 1f0e1ea commit fa2aa34

File tree

4 files changed

+472
-1
lines changed

4 files changed

+472
-1
lines changed

pkg/lint/checks/workloads/notebook/constants.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ const (
1616
ConditionTypeHardwareProfileCompatible = "HardwareProfileCompatible"
1717
ConditionTypeHardwareProfileIntegrity = "HardwareProfileIntegrity"
1818
ConditionTypeNotebooksCompatible = "NotebooksCompatible"
19+
ConditionTypeKueueLabels = "KueueLabels"
1920
ConditionTypeRunningWorkloads = "RunningWorkloads"
2021
)
2122

@@ -80,6 +81,12 @@ const (
8081
MsgConnectionsMissing = "Found %d Notebook(s) referencing connection Secrets that do not exist on the cluster"
8182
)
8283

84+
// Messages for KueueLabels check.
85+
const (
86+
MsgAllKueueLabelsValid = "All Notebooks in kueue-enabled namespaces have the required queue label"
87+
MsgKueueLabelsMissing = "Found %d Notebook(s) in kueue-enabled namespaces missing the kueue.x-k8s.io/queue-name label"
88+
)
89+
8390
// Messages for ContainerName check.
8491
const (
8592
MsgNoContainerNameMismatch = "No Notebooks found with container name mismatch"
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
package notebook
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"strconv"
7+
8+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9+
"k8s.io/apimachinery/pkg/types"
10+
"k8s.io/apimachinery/pkg/util/sets"
11+
12+
"github.com/opendatahub-io/odh-cli/pkg/lint/check"
13+
"github.com/opendatahub-io/odh-cli/pkg/lint/check/result"
14+
"github.com/opendatahub-io/odh-cli/pkg/lint/check/validate"
15+
"github.com/opendatahub-io/odh-cli/pkg/resources"
16+
"github.com/opendatahub-io/odh-cli/pkg/util/client"
17+
)
18+
19+
// Kueue-specific label keys.
20+
const (
21+
LabelKueueManaged = "kueue-managed"
22+
LabelKueueOpenshiftManaged = "kueue.openshift.io/managed"
23+
LabelKueueQueueName = "kueue.x-k8s.io/queue-name"
24+
)
25+
26+
// KueueLabelsCheck verifies that Notebooks in kueue-enabled namespaces have the
27+
// required kueue queue label for workload scheduling.
28+
type KueueLabelsCheck struct {
29+
check.BaseCheck
30+
}
31+
32+
func NewKueueLabelsCheck() *KueueLabelsCheck {
33+
return &KueueLabelsCheck{
34+
BaseCheck: check.BaseCheck{
35+
CheckGroup: check.GroupWorkload,
36+
Kind: kind,
37+
Type: check.CheckTypeDataIntegrity,
38+
CheckID: "workloads.notebook.kueue-labels",
39+
CheckName: "Workloads :: Notebook :: Kueue Labels",
40+
CheckDescription: "Verifies that Notebooks in kueue-enabled namespaces have the required kueue queue label for workload scheduling",
41+
CheckRemediation: "Add the label kueue.x-k8s.io/queue-name: default to the affected Notebooks, or remove kueue labels from the namespace if kueue integration is not intended",
42+
},
43+
}
44+
}
45+
46+
// CanApply returns whether this check should run for the given target.
47+
// Applies whenever Workbenches is Managed, regardless of version.
48+
func (c *KueueLabelsCheck) CanApply(ctx context.Context, target check.Target) (bool, error) {
49+
return isWorkbenchesManaged(ctx, target)
50+
}
51+
52+
// Validate lists Notebooks and checks that those in kueue-enabled namespaces have
53+
// the required kueue queue label.
54+
func (c *KueueLabelsCheck) Validate(
55+
ctx context.Context,
56+
target check.Target,
57+
) (*result.DiagnosticResult, error) {
58+
return validate.WorkloadsMetadata(c, target, resources.Notebook).
59+
Run(ctx, c.checkKueueLabels)
60+
}
61+
62+
// checkKueueLabels cross-references notebook namespaces against kueue-enabled namespaces
63+
// and verifies that notebooks in those namespaces have the required queue label.
64+
func (c *KueueLabelsCheck) checkKueueLabels(
65+
ctx context.Context,
66+
req *validate.WorkloadRequest[*metav1.PartialObjectMetadata],
67+
) error {
68+
dr := req.Result
69+
70+
// Collect unique namespaces from notebooks.
71+
notebookNamespaces := sets.New[string]()
72+
for _, nb := range req.Items {
73+
notebookNamespaces.Insert(nb.GetNamespace())
74+
}
75+
76+
// Build kueue-enabled namespace set by fetching metadata for each namespace.
77+
kueueNamespaces := sets.New[string]()
78+
79+
for ns := range notebookNamespaces {
80+
nsMeta, err := req.Client.GetResourceMetadata(ctx, resources.Namespace, ns)
81+
if err != nil {
82+
if client.IsResourceTypeNotFound(err) {
83+
continue
84+
}
85+
86+
return fmt.Errorf("getting namespace %s metadata: %w", ns, err)
87+
}
88+
89+
if nsMeta == nil {
90+
continue
91+
}
92+
93+
labels := nsMeta.GetLabels()
94+
if labels[LabelKueueManaged] == "true" || labels[LabelKueueOpenshiftManaged] == "true" {
95+
kueueNamespaces.Insert(ns)
96+
}
97+
}
98+
99+
// Check notebooks in kueue-enabled namespaces for the required queue label.
100+
impacted := make([]types.NamespacedName, 0)
101+
102+
for _, nb := range req.Items {
103+
if !kueueNamespaces.Has(nb.GetNamespace()) {
104+
continue
105+
}
106+
107+
labels := nb.GetLabels()
108+
queueName, ok := labels[LabelKueueQueueName]
109+
if !ok || queueName == "" {
110+
impacted = append(impacted, types.NamespacedName{
111+
Namespace: nb.GetNamespace(),
112+
Name: nb.GetName(),
113+
})
114+
}
115+
}
116+
117+
totalImpacted := len(impacted)
118+
dr.Annotations[check.AnnotationImpactedWorkloadCount] = strconv.Itoa(totalImpacted)
119+
120+
dr.Status.Conditions = append(dr.Status.Conditions, c.newCondition(totalImpacted))
121+
dr.SetImpactedObjects(resources.Notebook, impacted)
122+
123+
return nil
124+
}
125+
126+
func (c *KueueLabelsCheck) newCondition(totalImpacted int) result.Condition {
127+
if totalImpacted == 0 {
128+
return check.NewCondition(
129+
ConditionTypeKueueLabels,
130+
metav1.ConditionTrue,
131+
check.WithReason(check.ReasonRequirementsMet),
132+
check.WithMessage(MsgAllKueueLabelsValid),
133+
)
134+
}
135+
136+
return check.NewCondition(
137+
ConditionTypeKueueLabels,
138+
metav1.ConditionFalse,
139+
check.WithReason(check.ReasonConfigurationInvalid),
140+
check.WithMessage(MsgKueueLabelsMissing, totalImpacted),
141+
check.WithImpact(result.ImpactBlocking),
142+
check.WithRemediation(c.CheckRemediation),
143+
)
144+
}

0 commit comments

Comments
 (0)