Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 49 additions & 4 deletions tests/common/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,15 @@ func GetNotebookUserName(t Test) string {
return name
}

// GetNotebookUserNameFromEnv returns NOTEBOOK_USER_NAME when provided.
// Otherwise it falls back to resolving the user identity from the provided token.
func GetNotebookUserNameFromEnv(t Test, token string) string {
if name, ok := os.LookupEnv(notebookUserName); ok && strings.TrimSpace(name) != "" {
return name
}
return GenerateNotebookUserNameFromToken(t, token)
}

func GetNotebookUserToken(t Test) string {
token, ok := os.LookupEnv(notebookUserToken)
if !ok {
Expand All @@ -77,6 +86,15 @@ func GetNotebookUserToken(t Test) string {
return token
}

// GetNotebookUserTokenFromEnv returns NOTEBOOK_USER_TOKEN when provided.
// Otherwise it falls back to generating a token from username/password.
func GetNotebookUserTokenFromEnv(t Test) string {
if token, ok := os.LookupEnv(notebookUserToken); ok && strings.TrimSpace(token) != "" {
return token
}
return GenerateNotebookUserToken(t)
}

func GetNotebookUserPassword(t Test) string {
password, ok := os.LookupEnv(notebookUserPassword)
if !ok {
Expand All @@ -85,7 +103,7 @@ func GetNotebookUserPassword(t Test) string {
return password
}

// GenerateNotebookUserToken generates an OpenShift token using oc login with username and password
// GenerateNotebookUserToken generates an OpenShift token using oc login with username and password.
func GenerateNotebookUserToken(t Test) string {
userName := GetNotebookUserName(t)
password := GetNotebookUserPassword(t)
Expand Down Expand Up @@ -128,12 +146,39 @@ func GenerateNotebookUserToken(t Test) string {
return strings.TrimSpace(string(out))
}

// GenerateNotebookUserNameFromToken resolves the username bound to a bearer token.
func GenerateNotebookUserNameFromToken(t Test, token string) string {
if strings.TrimSpace(token) == "" {
t.T().Fatalf("Cannot resolve Notebook username from token: token is empty")
}

cmd := exec.Command(
"oc", "whoami",
"--token="+token,
"--server="+GetOpenShiftApiUrl(t),
"--insecure-skip-tls-verify=true",
)
out, err := cmd.Output()
if err != nil {
if exitError, ok := err.(*exec.ExitError); ok {
t.T().Logf("Error running 'oc whoami' command: %v\n", exitError)
t.T().Logf("Output: %s\n", out)
t.T().Logf("Error output: %s\n", exitError.Stderr)
} else {
t.T().Logf("Error running 'oc whoami' command: %v\n", err)
}
t.T().FailNow()
}

return strings.TrimSpace(string(out))
}

func GetNotebookImage(t Test) string {
notebook_image, ok := os.LookupEnv(notebookImage)
if !ok {
notebookImageValue, ok := os.LookupEnv(notebookImage)
if !ok || strings.TrimSpace(notebookImageValue) == "" {
t.T().Fatalf("Expected environment variable %s not found, please use this environment variable to specify image of the Notebook.", notebookImage)
}
return notebook_image
return notebookImageValue
}

func GetTestTier(t Test) (string, bool) {
Expand Down
70 changes: 68 additions & 2 deletions tests/common/support/kueue_operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ limitations under the License.
package support

import (
"encoding/json"
"fmt"
"strings"
"time"

"github.com/onsi/gomega"
Expand Down Expand Up @@ -132,9 +135,72 @@ func VerifyKueueReady(test Test, expectedFrameworks ...string) {

for _, framework := range expectedFrameworks {
test.T().Logf("Verifying %s framework is present in Kueue CR...", framework)
test.Eventually(KueueCR(test, KueueCRName), TestTimeoutShort).Should(
gomega.WithTransform(KueueCRFrameworks, gomega.ContainElement(framework)),
// Capture baseline diagnostics before framework check polling starts.
dumpKueueDiagnostics(test, "before-framework-check", framework)
test.Eventually(func(g gomega.Gomega) bool {
kueue, err := GetKueueCR(test, KueueCRName)
g.Expect(err).NotTo(gomega.HaveOccurred())
frameworks := KueueCRFrameworks(kueue)
if !containsString(frameworks, framework) {
// Update snapshot while polling so failures leave a concrete artifact.
dumpKueueDiagnostics(test, "framework-missing", framework)
return false
}
return true
}, TestTimeoutShort).Should(
gomega.BeTrue(),
"Expected framework '%s' to be present in Kueue CR",
framework,
)
test.T().Logf("%s framework is present in Kueue CR", framework)
}
}

func dumpKueueDiagnostics(test Test, stage string, expectedFramework string) {
test.T().Helper()

kueue, err := GetKueueCR(test, KueueCRName)
if err != nil {
WriteToOutputDir(
test,
fmt.Sprintf("kueue-diagnostics-%s", sanitizeFilePart(stage)),
Log,
[]byte(fmt.Sprintf("failed to get Kueue CR '%s': %v", KueueCRName, err)),
)
return
}

payload := map[string]any{
"stage": stage,
"expected_framework": expectedFramework,
"actual_frameworks": KueueCRFrameworks(kueue),
"available_condition": KueueCRConditionAvailable(kueue),
"cert_condition": KueueCRConditionCertManagerAvailable(kueue),
"name": kueue.GetName(),
"namespace": kueue.GetNamespace(),
}

data, marshalErr := json.MarshalIndent(payload, "", " ")
if marshalErr != nil {
data = []byte(fmt.Sprintf("failed to marshal diagnostics: %v", marshalErr))
}
WriteToOutputDir(
test,
fmt.Sprintf("kueue-diagnostics-%s", sanitizeFilePart(stage)),
Log,
data,
)
}

func sanitizeFilePart(value string) string {
return strings.ReplaceAll(strings.TrimSpace(value), " ", "-")
}

func containsString(items []string, target string) bool {
for _, item := range items {
if item == target {
return true
}
}
return false
}
82 changes: 82 additions & 0 deletions tests/kubeflow_sdk/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
SHELL := /usr/bin/env bash

SDK_TEST_TIMEOUT ?= 60m
SDK_TEST_EXCLUDE_REGEX ?=
ACCELERATOR_TESTS ?= CUDA
NOTEBOOK_ACCELERATOR ?=
SKIP_S3_TESTS ?= false

.PHONY: help sdk-test sdk-test-all sdk-test-trainer-all sdk-test-trainer-cpu sdk-test-trainer-sanity sdk-test-trainer-tier1 sdk-test-trainer-cuda sdk-test-trainer-rocm sdk-test-trainer-with-tier sdk-print-selected-trainer-tests sdk-print-trainer-tiers sdk-print-tiers

help:
@echo "Kubeflow SDK test targets"
@echo ""
@echo "Core:"
@echo " sdk-test Run default SDK set (Sanity + Tier1 + accelerator lane)"
@echo " sdk-test-all Alias for sdk-test (all components, currently trainer only)"
@echo " sdk-test-trainer-all Run all trainer SDK tests (all tiers)"
@echo " sdk-test-trainer-cpu Run trainer SDK CPU lane (Sanity + Tier1)"
@echo " sdk-test-trainer-sanity Run trainer SDK sanity tier"
@echo " sdk-test-trainer-tier1 Run trainer SDK tier1"
@echo " sdk-test-trainer-cuda Run trainer SDK CUDA lane"
@echo " sdk-test-trainer-rocm Run trainer SDK ROCm lane"
@echo " sdk-test-trainer-with-tier Run trainer SDK tests with SDK_TEST_TIER=<tier>"
@echo ""
@echo "Debug:"
@echo " sdk-print-selected-trainer-tests"
@echo " sdk-print-trainer-tiers"
@echo " sdk-print-tiers Print tiers for all SDK components"
@echo ""
@echo "Supported env vars:"
@echo " SDK_TEST_TIMEOUT (default: 60m)"
@echo " SDK_TEST_EXCLUDE_REGEX Exclude tests by name regex"
@echo " SKIP_S3_TESTS true|false (exclude S3/AWS-related trainer SDK tests)"
@echo " ACCELERATOR_TESTS CUDA (default) | CPU | ROCM | ALL"
@echo " (controls lane selection and default NOTEBOOK_IMAGE when unset)"
@echo " NOTEBOOK_ACCELERATOR Optional image-only override: CUDA | CPU | ROCM"
@echo " SDK_TEST_TIER Custom trainer tier for sdk-test-trainer-with-tier"

sdk-test: sdk-test-all

sdk-test-all:
@set -euo pipefail; \
$(MAKE) -f tests/kubeflow_sdk/Makefile sdk-test-trainer-sanity SDK_TEST_EXCLUDE_REGEX="$(SDK_TEST_EXCLUDE_REGEX)" SDK_TEST_TIMEOUT="$(SDK_TEST_TIMEOUT)" ACCELERATOR_TESTS="$(ACCELERATOR_TESTS)" SKIP_S3_TESTS="$(SKIP_S3_TESTS)"; \
$(MAKE) -f tests/kubeflow_sdk/Makefile sdk-test-trainer-tier1 SDK_TEST_EXCLUDE_REGEX="$(SDK_TEST_EXCLUDE_REGEX)" SDK_TEST_TIMEOUT="$(SDK_TEST_TIMEOUT)" ACCELERATOR_TESTS="$(ACCELERATOR_TESTS)" SKIP_S3_TESTS="$(SKIP_S3_TESTS)"; \
case "$$(echo "$(ACCELERATOR_TESTS)" | tr '[:lower:]' '[:upper:]')" in \
CPU) ;; \
CUDA) $(MAKE) -f tests/kubeflow_sdk/Makefile sdk-test-trainer-cuda SDK_TEST_EXCLUDE_REGEX="$(SDK_TEST_EXCLUDE_REGEX)" SDK_TEST_TIMEOUT="$(SDK_TEST_TIMEOUT)" ACCELERATOR_TESTS="$(ACCELERATOR_TESTS)" SKIP_S3_TESTS="$(SKIP_S3_TESTS)" ;; \
ROCM) $(MAKE) -f tests/kubeflow_sdk/Makefile sdk-test-trainer-rocm SDK_TEST_EXCLUDE_REGEX="$(SDK_TEST_EXCLUDE_REGEX)" SDK_TEST_TIMEOUT="$(SDK_TEST_TIMEOUT)" ACCELERATOR_TESTS="$(ACCELERATOR_TESTS)" SKIP_S3_TESTS="$(SKIP_S3_TESTS)" ;; \
ALL) $(MAKE) -f tests/kubeflow_sdk/Makefile sdk-test-trainer-cuda SDK_TEST_EXCLUDE_REGEX="$(SDK_TEST_EXCLUDE_REGEX)" SDK_TEST_TIMEOUT="$(SDK_TEST_TIMEOUT)" ACCELERATOR_TESTS="$(ACCELERATOR_TESTS)" SKIP_S3_TESTS="$(SKIP_S3_TESTS)"; \
$(MAKE) -f tests/kubeflow_sdk/Makefile sdk-test-trainer-rocm SDK_TEST_EXCLUDE_REGEX="$(SDK_TEST_EXCLUDE_REGEX)" SDK_TEST_TIMEOUT="$(SDK_TEST_TIMEOUT)" ACCELERATOR_TESTS="$(ACCELERATOR_TESTS)" SKIP_S3_TESTS="$(SKIP_S3_TESTS)" ;; \
*) echo "Unsupported ACCELERATOR_TESTS='$(ACCELERATOR_TESTS)'. Use CPU, CUDA, ROCM, or ALL."; exit 1 ;; \
esac

sdk-test-trainer-all:
@$(MAKE) -f tests/kubeflow_sdk/trainer/Makefile trainer-sdk-test-all SDK_TEST_EXCLUDE_REGEX="$(SDK_TEST_EXCLUDE_REGEX)" SDK_TEST_TIMEOUT="$(SDK_TEST_TIMEOUT)" ACCELERATOR_TESTS="$(ACCELERATOR_TESTS)" NOTEBOOK_ACCELERATOR="$(NOTEBOOK_ACCELERATOR)" SKIP_S3_TESTS="$(SKIP_S3_TESTS)"

sdk-test-trainer-cpu:
@$(MAKE) -f tests/kubeflow_sdk/trainer/Makefile trainer-sdk-test-cpu SDK_TEST_EXCLUDE_REGEX="$(SDK_TEST_EXCLUDE_REGEX)" SDK_TEST_TIMEOUT="$(SDK_TEST_TIMEOUT)" ACCELERATOR_TESTS="$(ACCELERATOR_TESTS)" NOTEBOOK_ACCELERATOR="CPU" SKIP_S3_TESTS="$(SKIP_S3_TESTS)"

sdk-test-trainer-sanity:
@$(MAKE) -f tests/kubeflow_sdk/trainer/Makefile trainer-sdk-test-sanity SDK_TEST_EXCLUDE_REGEX="$(SDK_TEST_EXCLUDE_REGEX)" SDK_TEST_TIMEOUT="$(SDK_TEST_TIMEOUT)" ACCELERATOR_TESTS="$(ACCELERATOR_TESTS)" NOTEBOOK_ACCELERATOR="$(NOTEBOOK_ACCELERATOR)" SKIP_S3_TESTS="$(SKIP_S3_TESTS)"

sdk-test-trainer-tier1:
@$(MAKE) -f tests/kubeflow_sdk/trainer/Makefile trainer-sdk-test-tier1 SDK_TEST_EXCLUDE_REGEX="$(SDK_TEST_EXCLUDE_REGEX)" SDK_TEST_TIMEOUT="$(SDK_TEST_TIMEOUT)" ACCELERATOR_TESTS="$(ACCELERATOR_TESTS)" NOTEBOOK_ACCELERATOR="$(NOTEBOOK_ACCELERATOR)" SKIP_S3_TESTS="$(SKIP_S3_TESTS)"

sdk-test-trainer-cuda:
@$(MAKE) -f tests/kubeflow_sdk/trainer/Makefile trainer-sdk-test-cuda SDK_TEST_EXCLUDE_REGEX="$(SDK_TEST_EXCLUDE_REGEX)" SDK_TEST_TIMEOUT="$(SDK_TEST_TIMEOUT)" ACCELERATOR_TESTS="$(ACCELERATOR_TESTS)" NOTEBOOK_ACCELERATOR="CUDA" SKIP_S3_TESTS="$(SKIP_S3_TESTS)"

sdk-test-trainer-rocm:
@$(MAKE) -f tests/kubeflow_sdk/trainer/Makefile trainer-sdk-test-rocm SDK_TEST_EXCLUDE_REGEX="$(SDK_TEST_EXCLUDE_REGEX)" SDK_TEST_TIMEOUT="$(SDK_TEST_TIMEOUT)" ACCELERATOR_TESTS="$(ACCELERATOR_TESTS)" NOTEBOOK_ACCELERATOR="ROCM" SKIP_S3_TESTS="$(SKIP_S3_TESTS)"

sdk-test-trainer-with-tier:
@$(MAKE) -f tests/kubeflow_sdk/trainer/Makefile trainer-sdk-test-with-tier SDK_TEST_TIER="$(SDK_TEST_TIER)" SDK_TEST_EXCLUDE_REGEX="$(SDK_TEST_EXCLUDE_REGEX)" SDK_TEST_TIMEOUT="$(SDK_TEST_TIMEOUT)" ACCELERATOR_TESTS="$(ACCELERATOR_TESTS)" NOTEBOOK_ACCELERATOR="$(NOTEBOOK_ACCELERATOR)" SKIP_S3_TESTS="$(SKIP_S3_TESTS)"

sdk-print-selected-trainer-tests:
@$(MAKE) -f tests/kubeflow_sdk/trainer/Makefile trainer-sdk-print-selected SDK_TEST_EXCLUDE_REGEX="$(SDK_TEST_EXCLUDE_REGEX)" SKIP_S3_TESTS="$(SKIP_S3_TESTS)"

sdk-print-trainer-tiers:
@$(MAKE) -f tests/kubeflow_sdk/trainer/Makefile trainer-sdk-print-tiers

sdk-print-tiers:
@$(MAKE) -f tests/kubeflow_sdk/Makefile sdk-print-trainer-tiers
Loading