Skip to content

Commit a16dacc

Browse files
andyatmiamilburgazzoli
authored andcommitted
feat: add new notebook lint checks and harden code
New checks: - ConnectionIntegrityCheck: verifies that Notebooks referencing connections via the opendatahub.io/connections annotation have backing Secrets that exist. Uses a two-pass approach — collect referenced namespaces, then list Secrets only in those namespaces — to avoid cluster-wide Secret queries. - HardwareProfileIntegrityCheck: verifies that Notebooks referencing infrastructure HardwareProfiles (infrastructure.opendatahub.io) via annotations point to profiles that exist. Uses the same namespace-scoped two-pass pattern as ConnectionIntegrityCheck. - RunningWorkloadsCheck: detects Notebooks that are currently running (missing the kubeflow-resource-stopped annotation) before a 2.x to 3.x upgrade. All three new checks and AcceleratorMigrationCheck use validate.WorkloadsMetadata().Run() with callbacks. WorkloadBuilder handles result creation, target version annotation, CRD-not-found (empty list), and workload count annotation. Each callback overrides AnnotationImpactedWorkloadCount with the actual impacted count and unconditionally calls SetImpactedObjects (which always allocates via make, preventing WorkloadBuilder's auto-populate). New shared infrastructure: - validate.FilterWorkloadsWithAcceleratorRefs: extracted from FindWorkloadsWithAcceleratorRefs to process pre-listed items without re-listing. FindWorkloadsWithAcceleratorRefs is now a thin wrapper (list + delegate) and is still used by kserve. - constants.go: centralizes check metadata (condition types, annotation keys, message templates) used across all notebook checks. Inline constants from impacted.go, container_name.go, container_name_support.go, and hardwareprofile_migration.go are moved here. - utils.go: adds isWorkbenchesManaged helper, replacing duplicated DSC lookup logic in container_name.go, impacted.go, and hardwareprofile_migration.go. - helpers_test.go: shared newNotebook test helper to reduce inline notebook construction across test files. - resources.InfrastructureHardwareProfile: new ResourceType for the infrastructure.opendatahub.io/v1 HardwareProfile API group. - check.CheckTypeDataIntegrity and check.CheckTypeWorkloadState: new check type constants. Changes to existing checks: - AcceleratorMigrationCheck: refactored to WorkloadBuilder with a checkAcceleratorRefs callback that delegates to FilterWorkloadsWithAcceleratorRefs. Uses ReasonMigrationPending instead of ReasonConfigurationInvalid for the auto-migration advisory condition. - HardwareProfileMigrationCheck: CanApply now requires Workbenches to be Managed (previously always returned true). - ImpactedWorkloadsCheck: - Pre-compute nginxFixMinRHOAIVersion parsing at package load time via mustParseVersionParts, replacing per-call string splitting in isCompliantBuildRef. - Extract hardcoded check.opendatahub.io annotation keys into named constants (AnnotationCheckImageStatus, AnnotationCheckImageRef, AnnotationCheckReason). - findImageStreamByDockerRepo returns (ootbImageStream, bool) instead of *ootbImageStream, consistent with sibling lookup functions. - ContainerNameCheck: CanApply delegates to isWorkbenchesManaged instead of inlining DSC lookup. - buildSecretCache (connection_integrity.go): takes client.Reader instead of check.Target, consistent with how hardware_profile_integrity.go accesses the client directly. Removed: - export_test.go: replaced by helpers_test.go with a more flexible newNotebook helper using a notebookMeta options struct. Signed-off-by: Andy Stoneberg <astonebe@redhat.com>
1 parent 41bca85 commit a16dacc

24 files changed

+1828
-418
lines changed

pkg/lint/check/constants.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ const (
99
CheckTypeInstalled CheckType = "installed"
1010
CheckTypeImpactedWorkloads CheckType = "impacted-workloads"
1111
CheckTypeConfigMigration CheckType = "config-migration"
12+
CheckTypeDataIntegrity CheckType = "data-integrity"
13+
CheckTypeWorkloadState CheckType = "workload-state"
1214
CheckTypeAcceleratorProfileMigration CheckType = "acceleratorprofile-migration"
1315
)
1416

pkg/lint/check/validate/accelerator.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66

7+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
78
"k8s.io/apimachinery/pkg/types"
89

910
"github.com/opendatahub-io/odh-cli/pkg/lint/check"
@@ -39,15 +40,26 @@ func FindWorkloadsWithAcceleratorRefs(
3940
return nil, 0, fmt.Errorf("listing %s: %w", workloadType.Kind, err)
4041
}
4142

43+
return FilterWorkloadsWithAcceleratorRefs(ctx, target.Client, workloads)
44+
}
45+
46+
// FilterWorkloadsWithAcceleratorRefs checks which of the given workload items reference
47+
// AcceleratorProfiles via annotations, and returns the impacted workload names
48+
// along with a count of missing profiles.
49+
func FilterWorkloadsWithAcceleratorRefs(
50+
ctx context.Context,
51+
c client.Reader,
52+
items []*metav1.PartialObjectMetadata,
53+
) ([]types.NamespacedName, int, error) {
4254
// Resolve the applications namespace for AcceleratorProfile lookups.
4355
// AcceleratorProfiles live in the applications namespace, but workloads may not
4456
// have the namespace annotation set, so we need a proper default.
45-
appNS, err := client.GetApplicationsNamespace(ctx, target.Client)
57+
appNS, err := client.GetApplicationsNamespace(ctx, c)
4658
if err != nil {
4759
return nil, 0, fmt.Errorf("getting applications namespace: %w", err)
4860
}
4961

50-
profileCache, err := kube.BuildResourceNameSet(ctx, target.Client, resources.AcceleratorProfile)
62+
profileCache, err := kube.BuildResourceNameSet(ctx, c, resources.AcceleratorProfile)
5163
if err != nil {
5264
return nil, 0, fmt.Errorf("building AcceleratorProfile cache: %w", err)
5365
}
@@ -56,7 +68,7 @@ func FindWorkloadsWithAcceleratorRefs(
5668

5769
missingCount := 0
5870

59-
for _, w := range workloads {
71+
for _, w := range items {
6072
profileRef := types.NamespacedName{
6173
Namespace: kube.GetAnnotation(w, AnnotationAcceleratorNamespace),
6274
Name: kube.GetAnnotation(w, AnnotationAcceleratorName),

pkg/lint/checks/workloads/notebook/accelerator_migration.go

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,17 @@ package notebook
22

33
import (
44
"context"
5-
"fmt"
65
"strconv"
76

87
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
98

10-
"github.com/opendatahub-io/odh-cli/pkg/constants"
119
"github.com/opendatahub-io/odh-cli/pkg/lint/check"
1210
"github.com/opendatahub-io/odh-cli/pkg/lint/check/result"
1311
"github.com/opendatahub-io/odh-cli/pkg/lint/check/validate"
1412
"github.com/opendatahub-io/odh-cli/pkg/resources"
15-
"github.com/opendatahub-io/odh-cli/pkg/util/client"
16-
"github.com/opendatahub-io/odh-cli/pkg/util/components"
1713
"github.com/opendatahub-io/odh-cli/pkg/util/version"
1814
)
1915

20-
const ConditionTypeAcceleratorProfileCompatible = "AcceleratorProfileCompatible"
21-
2216
// AcceleratorMigrationCheck detects Notebook (workbench) CRs referencing deprecated AcceleratorProfiles
2317
// that will be auto-migrated to HardwareProfiles (infrastructure.opendatahub.io) during RHOAI 3.x upgrade.
2418
type AcceleratorMigrationCheck struct {
@@ -46,28 +40,28 @@ func (c *AcceleratorMigrationCheck) CanApply(ctx context.Context, target check.T
4640
return false, nil
4741
}
4842

49-
dsc, err := client.GetDataScienceCluster(ctx, target.Client)
50-
if err != nil {
51-
return false, fmt.Errorf("getting DataScienceCluster: %w", err)
52-
}
53-
54-
return components.HasManagementState(dsc, "workbenches", constants.ManagementStateManaged), nil
43+
return isWorkbenchesManaged(ctx, target)
5544
}
5645

5746
// Validate executes the check against the provided target.
5847
func (c *AcceleratorMigrationCheck) Validate(
5948
ctx context.Context,
6049
target check.Target,
6150
) (*result.DiagnosticResult, error) {
62-
dr := c.NewResult()
51+
return validate.WorkloadsMetadata(c, target, resources.Notebook).
52+
Run(ctx, c.checkAcceleratorRefs)
53+
}
6354

64-
if target.TargetVersion != nil {
65-
dr.Annotations[check.AnnotationCheckTargetVersion] = target.TargetVersion.String()
66-
}
55+
// checkAcceleratorRefs cross-references notebook accelerator annotations against existing AcceleratorProfiles.
56+
func (c *AcceleratorMigrationCheck) checkAcceleratorRefs(
57+
ctx context.Context,
58+
req *validate.WorkloadRequest[*metav1.PartialObjectMetadata],
59+
) error {
60+
dr := req.Result
6761

68-
impacted, missingCount, err := validate.FindWorkloadsWithAcceleratorRefs(ctx, target, resources.Notebook)
62+
impacted, missingCount, err := validate.FilterWorkloadsWithAcceleratorRefs(ctx, req.Client, req.Items)
6963
if err != nil {
70-
return nil, fmt.Errorf("finding Notebooks with AcceleratorProfiles: %w", err)
64+
return err
7165
}
7266

7367
totalImpacted := len(impacted)
@@ -77,12 +71,9 @@ func (c *AcceleratorMigrationCheck) Validate(
7771
dr.Status.Conditions,
7872
c.newAcceleratorMigrationCondition(totalImpacted, missingCount),
7973
)
74+
dr.SetImpactedObjects(resources.Notebook, impacted)
8075

81-
if totalImpacted > 0 {
82-
dr.SetImpactedObjects(resources.Notebook, impacted)
83-
}
84-
85-
return dr, nil
76+
return nil
8677
}
8778

8879
func (c *AcceleratorMigrationCheck) newAcceleratorMigrationCondition(
@@ -94,7 +85,7 @@ func (c *AcceleratorMigrationCheck) newAcceleratorMigrationCondition(
9485
ConditionTypeAcceleratorProfileCompatible,
9586
metav1.ConditionTrue,
9687
check.WithReason(check.ReasonVersionCompatible),
97-
check.WithMessage("No Notebooks found using deprecated AcceleratorProfiles - no migration needed"),
88+
check.WithMessage(MsgNoAcceleratorProfiles),
9889
)
9990
}
10091

@@ -104,7 +95,7 @@ func (c *AcceleratorMigrationCheck) newAcceleratorMigrationCondition(
10495
ConditionTypeAcceleratorProfileCompatible,
10596
metav1.ConditionFalse,
10697
check.WithReason(check.ReasonResourceNotFound),
107-
check.WithMessage("Found %d Notebook(s) referencing deprecated AcceleratorProfiles (%d missing): AcceleratorProfiles and Notebook references are automatically migrated to HardwareProfiles (infrastructure.opendatahub.io) during upgrade", totalImpacted, totalMissing),
98+
check.WithMessage(MsgAcceleratorProfilesMissing, totalImpacted, totalMissing),
10899
check.WithImpact(result.ImpactAdvisory),
109100
check.WithRemediation(c.CheckRemediation),
110101
)
@@ -114,8 +105,8 @@ func (c *AcceleratorMigrationCheck) newAcceleratorMigrationCondition(
114105
return check.NewCondition(
115106
ConditionTypeAcceleratorProfileCompatible,
116107
metav1.ConditionFalse,
117-
check.WithReason(check.ReasonConfigurationInvalid),
118-
check.WithMessage("Found %d Notebook(s) using deprecated AcceleratorProfiles: AcceleratorProfiles and Notebook references are automatically migrated to HardwareProfiles (infrastructure.opendatahub.io) during upgrade", totalImpacted),
108+
check.WithReason(check.ReasonMigrationPending),
109+
check.WithMessage(MsgAcceleratorProfilesMigrating, totalImpacted),
119110
check.WithImpact(result.ImpactAdvisory),
120111
check.WithRemediation(c.CheckRemediation),
121112
)

pkg/lint/checks/workloads/notebook/accelerator_migration_test.go

Lines changed: 34 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package notebook_test
22

33
import (
4+
"fmt"
45
"testing"
56

67
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -45,7 +46,7 @@ func TestAcceleratorMigrationCheck_NoNotebooks(t *testing.T) {
4546
"Type": Equal(notebook.ConditionTypeAcceleratorProfileCompatible),
4647
"Status": Equal(metav1.ConditionTrue),
4748
"Reason": Equal(check.ReasonVersionCompatible),
48-
"Message": ContainSubstring("No Notebooks found"),
49+
"Message": Equal(notebook.MsgNoAcceleratorProfiles),
4950
}))
5051
g.Expect(result.Annotations).To(HaveKeyWithValue(check.AnnotationImpactedWorkloadCount, "0"))
5152
g.Expect(result.ImpactedObjects).To(BeEmpty())
@@ -56,17 +57,7 @@ func TestAcceleratorMigrationCheck_NotebookWithoutAcceleratorProfile(t *testing.
5657
ctx := t.Context()
5758

5859
// Notebook without accelerator annotations
59-
nb := &unstructured.Unstructured{
60-
Object: map[string]any{
61-
"apiVersion": resources.Notebook.APIVersion(),
62-
"kind": resources.Notebook.Kind,
63-
"metadata": map[string]any{
64-
"name": "test-notebook",
65-
"namespace": "test-ns",
66-
},
67-
"spec": map[string]any{},
68-
},
69-
}
60+
nb := newNotebook("test-notebook", "test-ns", notebookOptions{})
7061

7162
target := testutil.NewTarget(t, testutil.TargetConfig{
7263
ListKinds: acceleratorListKinds,
@@ -106,21 +97,12 @@ func TestAcceleratorMigrationCheck_NotebookWithExistingAcceleratorProfile(t *tes
10697
}
10798

10899
// Notebook referencing existing AcceleratorProfile
109-
nb := &unstructured.Unstructured{
110-
Object: map[string]any{
111-
"apiVersion": resources.Notebook.APIVersion(),
112-
"kind": resources.Notebook.Kind,
113-
"metadata": map[string]any{
114-
"name": "gpu-notebook",
115-
"namespace": "user-ns",
116-
"annotations": map[string]any{
117-
"opendatahub.io/accelerator-name": "nvidia-gpu",
118-
"opendatahub.io/accelerator-profile-namespace": "redhat-ods-applications",
119-
},
120-
},
121-
"spec": map[string]any{},
100+
nb := newNotebook("gpu-notebook", "user-ns", notebookOptions{
101+
Annotations: map[string]any{
102+
"opendatahub.io/accelerator-name": "nvidia-gpu",
103+
"opendatahub.io/accelerator-profile-namespace": "redhat-ods-applications",
122104
},
123-
}
105+
})
124106

125107
target := testutil.NewTarget(t, testutil.TargetConfig{
126108
ListKinds: acceleratorListKinds,
@@ -137,8 +119,8 @@ func TestAcceleratorMigrationCheck_NotebookWithExistingAcceleratorProfile(t *tes
137119
g.Expect(result.Status.Conditions[0].Condition).To(MatchFields(IgnoreExtras, Fields{
138120
"Type": Equal(notebook.ConditionTypeAcceleratorProfileCompatible),
139121
"Status": Equal(metav1.ConditionFalse),
140-
"Reason": Equal(check.ReasonConfigurationInvalid),
141-
"Message": And(ContainSubstring("Found 1 Notebook(s)"), ContainSubstring("HardwareProfiles")),
122+
"Reason": Equal(check.ReasonMigrationPending),
123+
"Message": Equal(fmt.Sprintf(notebook.MsgAcceleratorProfilesMigrating, 1)),
142124
}))
143125
g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactAdvisory))
144126
g.Expect(result.Status.Conditions[0].Remediation).To(ContainSubstring("HardwareProfiles"))
@@ -154,21 +136,12 @@ func TestAcceleratorMigrationCheck_NotebookWithMissingAcceleratorProfile(t *test
154136
ctx := t.Context()
155137

156138
// Notebook referencing non-existent AcceleratorProfile
157-
nb := &unstructured.Unstructured{
158-
Object: map[string]any{
159-
"apiVersion": resources.Notebook.APIVersion(),
160-
"kind": resources.Notebook.Kind,
161-
"metadata": map[string]any{
162-
"name": "broken-notebook",
163-
"namespace": "user-ns",
164-
"annotations": map[string]any{
165-
"opendatahub.io/accelerator-name": "missing-profile",
166-
"opendatahub.io/accelerator-profile-namespace": "some-ns",
167-
},
168-
},
169-
"spec": map[string]any{},
139+
nb := newNotebook("broken-notebook", "user-ns", notebookOptions{
140+
Annotations: map[string]any{
141+
"opendatahub.io/accelerator-name": "missing-profile",
142+
"opendatahub.io/accelerator-profile-namespace": "some-ns",
170143
},
171-
}
144+
})
172145

173146
target := testutil.NewTarget(t, testutil.TargetConfig{
174147
ListKinds: acceleratorListKinds,
@@ -186,7 +159,7 @@ func TestAcceleratorMigrationCheck_NotebookWithMissingAcceleratorProfile(t *test
186159
"Type": Equal(notebook.ConditionTypeAcceleratorProfileCompatible),
187160
"Status": Equal(metav1.ConditionFalse),
188161
"Reason": Equal(check.ReasonResourceNotFound),
189-
"Message": And(ContainSubstring("1 missing"), ContainSubstring("automatically migrated to HardwareProfiles")),
162+
"Message": Equal(fmt.Sprintf(notebook.MsgAcceleratorProfilesMissing, 1, 1)),
190163
}))
191164
g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactAdvisory))
192165
g.Expect(result.Status.Conditions[0].Remediation).To(ContainSubstring("HardwareProfiles"))
@@ -211,51 +184,23 @@ func TestAcceleratorMigrationCheck_MixedNotebooks(t *testing.T) {
211184
}
212185

213186
// Notebook without accelerator
214-
nb1 := &unstructured.Unstructured{
215-
Object: map[string]any{
216-
"apiVersion": resources.Notebook.APIVersion(),
217-
"kind": resources.Notebook.Kind,
218-
"metadata": map[string]any{
219-
"name": "plain-notebook",
220-
"namespace": "ns1",
221-
},
222-
"spec": map[string]any{},
223-
},
224-
}
187+
nb1 := newNotebook("plain-notebook", "ns1", notebookOptions{})
225188

226189
// Notebook with existing accelerator
227-
nb2 := &unstructured.Unstructured{
228-
Object: map[string]any{
229-
"apiVersion": resources.Notebook.APIVersion(),
230-
"kind": resources.Notebook.Kind,
231-
"metadata": map[string]any{
232-
"name": "gpu-notebook",
233-
"namespace": "ns2",
234-
"annotations": map[string]any{
235-
"opendatahub.io/accelerator-name": "nvidia-gpu",
236-
"opendatahub.io/accelerator-profile-namespace": "redhat-ods-applications",
237-
},
238-
},
239-
"spec": map[string]any{},
190+
nb2 := newNotebook("gpu-notebook", "ns2", notebookOptions{
191+
Annotations: map[string]any{
192+
"opendatahub.io/accelerator-name": "nvidia-gpu",
193+
"opendatahub.io/accelerator-profile-namespace": "redhat-ods-applications",
240194
},
241-
}
195+
})
242196

243197
// Notebook with missing accelerator
244-
nb3 := &unstructured.Unstructured{
245-
Object: map[string]any{
246-
"apiVersion": resources.Notebook.APIVersion(),
247-
"kind": resources.Notebook.Kind,
248-
"metadata": map[string]any{
249-
"name": "broken-notebook",
250-
"namespace": "ns3",
251-
"annotations": map[string]any{
252-
"opendatahub.io/accelerator-name": "missing-profile",
253-
"opendatahub.io/accelerator-profile-namespace": "some-ns",
254-
},
255-
},
256-
"spec": map[string]any{},
198+
nb3 := newNotebook("broken-notebook", "ns3", notebookOptions{
199+
Annotations: map[string]any{
200+
"opendatahub.io/accelerator-name": "missing-profile",
201+
"opendatahub.io/accelerator-profile-namespace": "some-ns",
257202
},
258-
}
203+
})
259204

260205
target := testutil.NewTarget(t, testutil.TargetConfig{
261206
ListKinds: acceleratorListKinds,
@@ -273,7 +218,7 @@ func TestAcceleratorMigrationCheck_MixedNotebooks(t *testing.T) {
273218
"Type": Equal(notebook.ConditionTypeAcceleratorProfileCompatible),
274219
"Status": Equal(metav1.ConditionFalse),
275220
"Reason": Equal(check.ReasonResourceNotFound),
276-
"Message": And(ContainSubstring("2 Notebook(s)"), ContainSubstring("1 missing")),
221+
"Message": Equal(fmt.Sprintf(notebook.MsgAcceleratorProfilesMissing, 2, 1)),
277222
}))
278223
g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactAdvisory))
279224
g.Expect(result.Status.Conditions[0].Remediation).To(ContainSubstring("HardwareProfiles"))
@@ -401,21 +346,12 @@ func TestAcceleratorMigrationCheck_DefaultNamespace(t *testing.T) {
401346
}
402347

403348
// Notebook with accelerator name but no namespace annotation (should default to applications namespace)
404-
nb := &unstructured.Unstructured{
405-
Object: map[string]any{
406-
"apiVersion": resources.Notebook.APIVersion(),
407-
"kind": resources.Notebook.Kind,
408-
"metadata": map[string]any{
409-
"name": "notebook-default-ns",
410-
"namespace": "user-ns",
411-
"annotations": map[string]any{
412-
"opendatahub.io/accelerator-name": "my-gpu",
413-
// No namespace annotation - should default to applications namespace
414-
},
415-
},
416-
"spec": map[string]any{},
349+
nb := newNotebook("notebook-default-ns", "user-ns", notebookOptions{
350+
Annotations: map[string]any{
351+
"opendatahub.io/accelerator-name": "my-gpu",
352+
// No namespace annotation - should default to applications namespace
417353
},
418-
}
354+
})
419355

420356
target := testutil.NewTarget(t, testutil.TargetConfig{
421357
ListKinds: acceleratorListKinds,
@@ -433,7 +369,7 @@ func TestAcceleratorMigrationCheck_DefaultNamespace(t *testing.T) {
433369
g.Expect(result.Status.Conditions[0].Condition).To(MatchFields(IgnoreExtras, Fields{
434370
"Type": Equal(notebook.ConditionTypeAcceleratorProfileCompatible),
435371
"Status": Equal(metav1.ConditionFalse),
436-
"Reason": Equal(check.ReasonConfigurationInvalid),
372+
"Reason": Equal(check.ReasonMigrationPending),
437373
}))
438374
g.Expect(result.Status.Conditions[0].Impact).To(Equal(resultpkg.ImpactAdvisory))
439375
g.Expect(result.Annotations).To(HaveKeyWithValue(check.AnnotationImpactedWorkloadCount, "1"))

0 commit comments

Comments
 (0)