update breaking changes for v0.5 #2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI - OpenShift E2E Tests | ||
|
Check failure on line 1 in .github/workflows/ci-e2e-openshift.yaml
|
||
| # Permissions needed for various jobs | ||
| permissions: | ||
| contents: read | ||
| packages: write | ||
| pull-requests: write # For posting comments on PRs | ||
| statuses: write # For reporting status on fork PR commits | ||
| # Cancel previous runs on the same PR to avoid resource conflicts | ||
| # Only group by PR number for legitimate triggers (pull_request, workflow_dispatch, /ok-to-test, or /retest comments) | ||
| # Regular comments get a unique group (run_id) so they don't cancel in-progress test runs | ||
| # | ||
| # Logic: | ||
| # - Regular comments (not /ok-to-test or /retest): unique group prevents cancellation of real tests | ||
| # - Valid triggers: group 'e2e-openshift-{pr_number}' (can cancel previous runs for same PR) | ||
| # - Fallback chain for ID: pull_request.number -> issue.number -> run_id | ||
| # | ||
| # NOTE: Valid command list (/ok-to-test, /retest) must stay in sync with gate job validation (line ~125) | ||
| concurrency: | ||
| group: >- | ||
| ${{ | ||
| github.event_name == 'issue_comment' && | ||
| !contains(github.event.comment.body, '/ok-to-test') && | ||
| !contains(github.event.comment.body, '/retest') | ||
| && format('comment-isolated-{0}', github.run_id) | ||
| || format('e2e-openshift-{0}', | ||
| github.event.pull_request.number | ||
| || github.event.issue.number | ||
| || github.run_id) | ||
| }} | ||
| cancel-in-progress: true | ||
| on: | ||
| pull_request: | ||
| branches: | ||
| - main | ||
| - dev | ||
| # Allow maintainers to trigger tests on fork PRs via /ok-to-test comment | ||
| issue_comment: | ||
| types: [created] | ||
| workflow_dispatch: | ||
| inputs: | ||
| model_id: | ||
| description: 'Model ID' | ||
| required: false | ||
| default: 'unsloth/Meta-Llama-3.1-8B' | ||
| accelerator_type: | ||
| description: 'Accelerator type (H100, A100, L40S)' | ||
| required: false | ||
| default: 'H100' | ||
| request_rate: | ||
| description: 'Request rate (req/s)' | ||
| required: false | ||
| default: '20' | ||
| num_prompts: | ||
| description: 'Number of prompts' | ||
| required: false | ||
| default: '3000' | ||
| skip_cleanup: | ||
| description: 'Skip cleanup after tests' | ||
| required: false | ||
| default: 'false' | ||
| max_num_seqs: | ||
| description: 'vLLM max batch size (lower = easier to saturate)' | ||
| required: false | ||
| default: '1' | ||
| hpa_stabilization_seconds: | ||
| description: 'HPA stabilization window in seconds' | ||
| required: false | ||
| default: '30' | ||
| jobs: | ||
| # Gate: Check permissions and handle /ok-to-test for fork PRs | ||
| # - Maintainers (write access): Tests run automatically | ||
| # - External contributors: Must wait for maintainer to comment /ok-to-test | ||
| gate: | ||
| runs-on: ubuntu-latest | ||
| outputs: | ||
| should_run: ${{ steps.check.outputs.should_run }} | ||
| pr_number: ${{ steps.check.outputs.pr_number }} | ||
| pr_head_sha: ${{ steps.check.outputs.pr_head_sha }} | ||
| is_fork_pr: ${{ steps.check.outputs.is_fork_pr }} | ||
| steps: | ||
| - name: Check permissions and /ok-to-test | ||
| id: check | ||
| uses: actions/github-script@v7 | ||
| with: | ||
| script: | | ||
| // Helper to check if user has write access | ||
| async function hasWriteAccess(username) { | ||
| try { | ||
| const { data: permission } = await github.rest.repos.getCollaboratorPermissionLevel({ | ||
| owner: context.repo.owner, | ||
| repo: context.repo.repo, | ||
| username: username | ||
| }); | ||
| const privilegedRoles = ['admin', 'maintain', 'write']; | ||
| return privilegedRoles.includes(permission.permission); | ||
| } catch (e) { | ||
| console.log(`Could not get permissions for ${username}: ${e.message}`); | ||
| return false; | ||
| } | ||
| } | ||
| // Always run for workflow_dispatch | ||
| if (context.eventName === 'workflow_dispatch') { | ||
| core.setOutput('should_run', 'true'); | ||
| core.setOutput('pr_number', ''); | ||
| core.setOutput('pr_head_sha', context.sha); | ||
| core.setOutput('is_fork_pr', 'false'); | ||
| return; | ||
| } | ||
| // Handle issue_comment event (/ok-to-test or /retest) | ||
| if (context.eventName === 'issue_comment') { | ||
| const comment = context.payload.comment.body.trim(); | ||
| const issue = context.payload.issue; | ||
| // Only process /ok-to-test or /retest comments on PRs | ||
| if (!issue.pull_request) { | ||
| console.log('Comment is not on a PR, skipping'); | ||
| core.setOutput('should_run', 'false'); | ||
| return; | ||
| } | ||
| // NOTE: This list must stay in sync with concurrency group logic (lines 23-25) | ||
| const validCommands = ['/ok-to-test', '/retest']; | ||
| if (!validCommands.includes(comment)) { | ||
| console.log(`Comment "${comment}" is not a valid trigger command, skipping`); | ||
| core.setOutput('should_run', 'false'); | ||
| return; | ||
| } | ||
| // Check if commenter has write access | ||
| const commenter = context.payload.comment.user.login; | ||
| const hasAccess = await hasWriteAccess(commenter); | ||
| if (!hasAccess) { | ||
| console.log(`User ${commenter} does not have write access, ignoring ${comment}`); | ||
| core.setOutput('should_run', 'false'); | ||
| return; | ||
| } | ||
| // Get PR details to get head SHA | ||
| const { data: pr } = await github.rest.pulls.get({ | ||
| owner: context.repo.owner, | ||
| repo: context.repo.repo, | ||
| pull_number: issue.number | ||
| }); | ||
| // Check if PR is from a fork | ||
| const baseRepo = `${context.repo.owner}/${context.repo.repo}`; | ||
| const headRepo = pr.head.repo ? pr.head.repo.full_name : baseRepo; | ||
| const isFork = headRepo !== baseRepo; | ||
| console.log(`${comment} approved by ${commenter} for PR #${issue.number}`); | ||
| console.log(`PR head SHA: ${pr.head.sha}`); | ||
| console.log(`Is fork PR: ${isFork} (head: ${headRepo}, base: ${baseRepo})`); | ||
| core.setOutput('should_run', 'true'); | ||
| core.setOutput('pr_number', issue.number.toString()); | ||
| core.setOutput('pr_head_sha', pr.head.sha); | ||
| core.setOutput('is_fork_pr', isFork ? 'true' : 'false'); | ||
| // Add reaction to acknowledge | ||
| await github.rest.reactions.createForIssueComment({ | ||
| owner: context.repo.owner, | ||
| repo: context.repo.repo, | ||
| comment_id: context.payload.comment.id, | ||
| content: 'rocket' | ||
| }); | ||
| // Post comment with link to the e2e workflow run | ||
| const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; | ||
| await github.rest.issues.createComment({ | ||
| owner: context.repo.owner, | ||
| repo: context.repo.repo, | ||
| issue_number: issue.number, | ||
| body: `🚀 **E2E tests triggered by ${comment}**\n\n[View the OpenShift E2E workflow run](${runUrl})` | ||
| }); | ||
| return; | ||
| } | ||
| // Handle pull_request event | ||
| const pr = context.payload.pull_request; | ||
| const prAuthor = pr.user.login; | ||
| const prNumber = pr.number; | ||
| const prHeadSha = pr.head.sha; | ||
| // Check if PR is from a fork | ||
| const baseRepo = `${context.repo.owner}/${context.repo.repo}`; | ||
| const headRepo = pr.head.repo ? pr.head.repo.full_name : baseRepo; | ||
| const isFork = headRepo !== baseRepo; | ||
| console.log(`PR #${prNumber} is from fork: ${isFork} (head: ${headRepo}, base: ${baseRepo})`); | ||
| core.setOutput('pr_number', prNumber.toString()); | ||
| core.setOutput('pr_head_sha', prHeadSha); | ||
| core.setOutput('is_fork_pr', isFork ? 'true' : 'false'); | ||
| // Check if PR author has write access | ||
| const isPrivileged = await hasWriteAccess(prAuthor); | ||
| console.log(`PR #${prNumber} author ${prAuthor}: privileged=${isPrivileged}`); | ||
| // Check if we already posted a bot comment | ||
| const comments = await github.rest.issues.listComments({ | ||
| owner: context.repo.owner, | ||
| repo: context.repo.repo, | ||
| issue_number: prNumber | ||
| }); | ||
| const botComment = comments.data.find(c => | ||
| c.user.type === 'Bot' && | ||
| c.body.includes('ok-to-test') | ||
| ); | ||
| // Helper to safely post a comment (may fail on fork PRs due to permissions) | ||
| async function tryPostComment(body) { | ||
| try { | ||
| await github.rest.issues.createComment({ | ||
| owner: context.repo.owner, | ||
| repo: context.repo.repo, | ||
| issue_number: prNumber, | ||
| body: body | ||
| }); | ||
| return true; | ||
| } catch (e) { | ||
| // Fork PRs can't post comments on pull_request event (GitHub security restriction) | ||
| console.log(`Could not post comment (expected for fork PRs): ${e.message}`); | ||
| return false; | ||
| } | ||
| } | ||
| if (isPrivileged) { | ||
| // For maintainer/admin fork PRs, we need to trigger via /ok-to-test | ||
| // because fork PRs don't have access to secrets on pull_request event | ||
| if (isFork) { | ||
| console.log(`Maintainer fork PR detected - auto-triggering /ok-to-test for ${prAuthor}`); | ||
| core.setOutput('should_run', 'false'); // Don't run on pull_request event | ||
| // Auto-post /ok-to-test to trigger issue_comment workflow | ||
| if (!botComment) { | ||
| const posted = await tryPostComment(`/ok-to-test`); | ||
| if (!posted) { | ||
| console.log('Note: Maintainer will need to manually comment /ok-to-test'); | ||
| } | ||
| } | ||
| return; | ||
| } | ||
| // Non-fork PR from maintainer - run directly | ||
| core.setOutput('should_run', 'true'); | ||
| return; | ||
| } | ||
| // External contributor - post instructions and skip | ||
| console.log('External contributor PR - posting instructions'); | ||
| core.setOutput('should_run', 'false'); | ||
| if (!botComment) { | ||
| const posted = await tryPostComment(`👋 Thanks for your contribution!\n\nThis PR is from a fork, so the e2e tests require approval to run (they use GPU resources).\n\n**For maintainers/admins:** Comment \`/ok-to-test\` to trigger the e2e tests after reviewing the code.\n\n**For contributors:** Please wait for a maintainer or admin to approve running the tests.`); | ||
| if (!posted) { | ||
| console.log('Note: Could not post instructions comment on fork PR'); | ||
| } | ||
| } | ||
| # Build the WVA controller image on GitHub-hosted runner (has proper Docker setup) | ||
| # Note: Skip for fork PRs on pull_request event (no secrets access). | ||
| # For fork PRs, build-image runs via issue_comment trigger (/ok-to-test). | ||
| build-image: | ||
| needs: gate | ||
| if: | | ||
| needs.gate.outputs.should_run == 'true' && | ||
| (needs.gate.outputs.is_fork_pr != 'true' || github.event_name != 'pull_request') | ||
| runs-on: ubuntu-latest | ||
| outputs: | ||
| image_tag: ${{ steps.build.outputs.image_tag }} | ||
| steps: | ||
| - name: Checkout source | ||
| uses: actions/checkout@v4 | ||
| with: | ||
| # Use PR head SHA from gate (works for both pull_request and issue_comment) | ||
| ref: ${{ needs.gate.outputs.pr_head_sha }} | ||
| - name: Log in to GHCR | ||
| uses: docker/login-action@v3 | ||
| with: | ||
| registry: ghcr.io | ||
| username: ${{ secrets.CR_USER }} | ||
| password: ${{ secrets.CR_TOKEN }} | ||
| - name: Build and push image | ||
| id: build | ||
| env: | ||
| REGISTRY: ghcr.io | ||
| IMAGE_NAME: ${{ github.repository }} | ||
| # Use PR head SHA from gate | ||
| GIT_REF: ${{ needs.gate.outputs.pr_head_sha }} | ||
| run: | | ||
| # Build image with git ref tag for this PR | ||
| # Use first 8 chars of the git ref (POSIX-compliant) | ||
| IMAGE_TAG="ref-$(printf '%s' "$GIT_REF" | cut -c1-8)" | ||
| FULL_IMAGE="${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG}" | ||
| echo "Building image: $FULL_IMAGE" | ||
| echo "Git ref: $GIT_REF" | ||
| # Build and push using make targets | ||
| make docker-build IMG="$FULL_IMAGE" | ||
| make docker-push IMG="$FULL_IMAGE" | ||
| echo "image_tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT | ||
| echo "Image built and pushed: $FULL_IMAGE" | ||
| # Run e2e tests on OpenShift self-hosted runner | ||
| e2e-openshift: | ||
| runs-on: [self-hosted, openshift] | ||
| needs: [gate, build-image] | ||
| if: needs.gate.outputs.should_run == 'true' | ||
| env: | ||
| MODEL_ID: ${{ github.event.inputs.model_id || 'unsloth/Meta-Llama-3.1-8B' }} | ||
| ACCELERATOR_TYPE: ${{ github.event.inputs.accelerator_type || 'A100' }} | ||
| REQUEST_RATE: ${{ github.event.inputs.request_rate || '20' }} | ||
| NUM_PROMPTS: ${{ github.event.inputs.num_prompts || '3000' }} | ||
| MAX_NUM_SEQS: ${{ github.event.inputs.max_num_seqs || '1' }} | ||
| HPA_STABILIZATION_SECONDS: ${{ github.event.inputs.hpa_stabilization_seconds || '30' }} | ||
| SKIP_CLEANUP: ${{ github.event.inputs.skip_cleanup || 'false' }} | ||
| # Use main branch of llm-d/llm-d for inferencepool chart v1.2.1 (GA API support) | ||
| LLM_D_RELEASE: main | ||
| # PR-specific namespaces for isolation between concurrent PR tests | ||
| # Primary llm-d namespace (Model A1 + A2) | ||
| LLMD_NAMESPACE: llm-d-inference-scheduler-pr-${{ needs.gate.outputs.pr_number || github.run_id }} | ||
| # Secondary llm-d namespace (Model B) | ||
| LLMD_NAMESPACE_B: llm-d-inference-scheduler-pr-${{ needs.gate.outputs.pr_number || github.run_id }}-b | ||
| # WVA controller namespace (monitors all models) | ||
| WVA_NAMESPACE: llm-d-autoscaler-pr-${{ needs.gate.outputs.pr_number || github.run_id }} | ||
| # Unique release names per run to avoid conflicts | ||
| WVA_RELEASE_NAME: wva-e2e-${{ github.run_id }} | ||
| # Model A1: Primary deployment in LLMD_NAMESPACE | ||
| MODEL_A1_RELEASE: model-a1-${{ github.run_id }} | ||
| # Model B: Deployment in LLMD_NAMESPACE_B | ||
| MODEL_B_RELEASE: model-b-${{ github.run_id }} | ||
| # Use the image built in the previous job | ||
| WVA_IMAGE_TAG: ${{ needs.build-image.outputs.image_tag }} | ||
| steps: | ||
| - name: Checkout source | ||
| uses: actions/checkout@v4 | ||
| with: | ||
| # Use PR head SHA from gate (works for both pull_request and issue_comment) | ||
| ref: ${{ needs.gate.outputs.pr_head_sha }} | ||
| - name: Extract Go version from go.mod | ||
| run: sed -En 's/^go (.*)$/GO_VERSION=\1/p' go.mod >> $GITHUB_ENV | ||
| - name: Set up Go | ||
| uses: actions/setup-go@v5 | ||
| with: | ||
| go-version: "${{ env.GO_VERSION }}" | ||
| cache-dependency-path: ./go.sum | ||
| - name: Install tools (kubectl, oc, helm, make) | ||
| run: | | ||
| sudo apt-get update && sudo apt-get install -y make | ||
| # Install kubectl - use pinned version for reproducible CI builds | ||
| # Pinned 2025-12: v1.31.0 tested compatible with OpenShift 4.16+ | ||
| # Update this version when upgrading target cluster or during regular dependency reviews | ||
| KUBECTL_VERSION="v1.31.0" | ||
| echo "Installing kubectl version: $KUBECTL_VERSION" | ||
| curl -fsSL --retry 3 --retry-delay 5 -o kubectl "https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl" | ||
| curl -fsSL --retry 3 --retry-delay 5 -o kubectl.sha256 "https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl.sha256" | ||
| echo "$(cat kubectl.sha256) kubectl" | sha256sum --check | ||
| chmod +x kubectl | ||
| sudo mv kubectl /usr/local/bin/ | ||
| rm -f kubectl.sha256 | ||
| # Install oc (OpenShift CLI) | ||
| curl -fsSL --retry 3 --retry-delay 5 -O "https://mirror.openshift.com/pub/openshift-v4/clients/ocp/stable/openshift-client-linux.tar.gz" | ||
| tar -xzf openshift-client-linux.tar.gz | ||
| sudo mv oc /usr/local/bin/ | ||
| rm -f openshift-client-linux.tar.gz kubectl README.md | ||
| # Install helm | ||
| curl -fsSL --retry 3 --retry-delay 5 https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash | ||
| - name: Verify cluster access | ||
| run: | | ||
| echo "Verifying cluster access..." | ||
| kubectl cluster-info | ||
| kubectl get nodes | ||
| - name: Get HF token from cluster secret | ||
| id: hf-token | ||
| run: | | ||
| echo "Reading HF token from cluster secret llm-d-hf-token in default namespace..." | ||
| # The llm-d-hf-token secret exists in the default namespace on the cluster | ||
| # Check secret existence separately from key retrieval for better error messages | ||
| if ! kubectl get secret llm-d-hf-token -n default &>/dev/null; then | ||
| echo "::error::Secret 'llm-d-hf-token' not found in default namespace" | ||
| echo "::error::Please ensure the HF token secret exists on the cluster" | ||
| exit 1 | ||
| fi | ||
| # Read the token and mask it in logs | ||
| HF_TOKEN=$(kubectl get secret llm-d-hf-token -n default -o jsonpath='{.data.HF_TOKEN}' | base64 -d) | ||
| if [ -z "$HF_TOKEN" ]; then | ||
| echo "::error::Secret 'llm-d-hf-token' exists but 'HF_TOKEN' key is empty or missing" | ||
| exit 1 | ||
| fi | ||
| # Mask the token in workflow logs | ||
| echo "::add-mask::$HF_TOKEN" | ||
| # Export for subsequent steps | ||
| echo "HF_TOKEN=$HF_TOKEN" >> $GITHUB_ENV | ||
| echo "HF token retrieved successfully from cluster secret" | ||
| - name: Clean up resources for this PR | ||
| run: | | ||
| echo "Cleaning up WVA resources for this PR's namespaces only..." | ||
| echo " LLMD_NAMESPACE: $LLMD_NAMESPACE" | ||
| echo " LLMD_NAMESPACE_B: $LLMD_NAMESPACE_B" | ||
| echo " WVA_NAMESPACE: $WVA_NAMESPACE" | ||
| # Only clean up the 3 namespaces associated with THIS PR | ||
| # Do NOT touch namespaces from other PRs to avoid race conditions | ||
| for ns in "$LLMD_NAMESPACE" "$LLMD_NAMESPACE_B" "$WVA_NAMESPACE"; do | ||
| if kubectl get namespace "$ns" &>/dev/null; then | ||
| echo "" | ||
| echo "=== Cleaning up namespace: $ns ===" | ||
| # Delete WVA resources in this namespace | ||
| echo " Removing HPAs and VAs..." | ||
| kubectl delete hpa -n "$ns" -l app.kubernetes.io/name=workload-variant-autoscaler --ignore-not-found || true | ||
| kubectl delete variantautoscaling -n "$ns" -l app.kubernetes.io/name=workload-variant-autoscaler --ignore-not-found || true | ||
| # Uninstall all helm releases in the namespace | ||
| for release in $(helm list -n "$ns" -q 2>/dev/null); do | ||
| echo " Uninstalling helm release: $release" | ||
| helm uninstall "$release" -n "$ns" --ignore-not-found --wait --timeout 60s || true | ||
| done | ||
| echo " Deleting namespace: $ns" | ||
| kubectl delete namespace "$ns" --ignore-not-found --timeout=60s || true | ||
| else | ||
| echo "Namespace $ns does not exist, skipping cleanup" | ||
| fi | ||
| done | ||
| # Clean up legacy namespaces if they exist (these are not PR-specific) | ||
| for legacy_ns in llm-d-inference-scheduler workload-variant-autoscaler-system; do | ||
| if kubectl get namespace "$legacy_ns" &>/dev/null; then | ||
| echo "" | ||
| echo "=== Cleaning up legacy namespace: $legacy_ns ===" | ||
| # Uninstall all helm releases in the namespace first | ||
| for release in $(helm list -n "$legacy_ns" -q 2>/dev/null); do | ||
| echo " Uninstalling helm release: $release" | ||
| helm uninstall "$release" -n "$legacy_ns" --ignore-not-found --wait --timeout 60s || true | ||
| done | ||
| echo " Deleting namespace: $legacy_ns" | ||
| kubectl delete namespace "$legacy_ns" --ignore-not-found --timeout=60s || true | ||
| fi | ||
| done | ||
| echo "" | ||
| echo "Cleanup complete for this PR's namespaces" | ||
| - name: Apply latest CRDs | ||
| run: | | ||
| echo "Applying latest VariantAutoscaling CRD..." | ||
| # Helm doesn't auto-update CRDs, so we need to apply them manually | ||
| # to ensure the cluster has the latest schema (including scaleTargetRef) | ||
| kubectl apply -f charts/workload-variant-autoscaler/crds/ | ||
| - name: Deploy WVA and llm-d infrastructure | ||
| env: | ||
| # HF_TOKEN is inherited from GITHUB_ENV (set in 'Get HF token from cluster secret' step) | ||
| ENVIRONMENT: openshift | ||
| INSTALL_GATEWAY_CTRLPLANE: "false" | ||
| # Disable benchmark mode - istioBench environment not available in llm-d helmfile | ||
| BENCHMARK_MODE: "false" | ||
| E2E_TESTS_ENABLED: "true" | ||
| NAMESPACE_SCOPED: "false" | ||
| # Pass PR-specific namespaces to install script | ||
| LLMD_NS: ${{ env.LLMD_NAMESPACE }} | ||
| WVA_NS: ${{ env.WVA_NAMESPACE }} | ||
| # Controller instance label for multi-controller isolation in parallel e2e tests | ||
| CONTROLLER_INSTANCE: ${{ env.WVA_NAMESPACE }} | ||
| # vLLM max-num-seqs for e2e testing (lower = easier to saturate) | ||
| VLLM_MAX_NUM_SEQS: ${{ env.MAX_NUM_SEQS }} | ||
| # Disable benchmark mode - istioBench environment not available in llm-d helmfile | ||
| BENCHMARK_MODE: "false" | ||
| run: | | ||
| echo "Deploying WVA and llm-d infrastructure..." | ||
| echo " MODEL_ID: $MODEL_ID" | ||
| echo " ACCELERATOR_TYPE: $ACCELERATOR_TYPE" | ||
| echo " LLMD_NS: $LLMD_NS" | ||
| echo " WVA_NS: $WVA_NS" | ||
| echo " WVA_RELEASE_NAME: $WVA_RELEASE_NAME" | ||
| echo " WVA_IMAGE_TAG: $WVA_IMAGE_TAG" | ||
| echo " CONTROLLER_INSTANCE: $CONTROLLER_INSTANCE" | ||
| echo " VLLM_MAX_NUM_SEQS: $VLLM_MAX_NUM_SEQS" | ||
| echo " HF token configuration: ✓" | ||
| ./deploy/install.sh --model "$MODEL_ID" --accelerator "$ACCELERATOR_TYPE" --release-name "$WVA_RELEASE_NAME" --environment openshift | ||
| - name: Create secondary namespace for Model B | ||
| run: | | ||
| echo "Creating secondary namespace for Model B..." | ||
| kubectl create namespace "$LLMD_NAMESPACE_B" --dry-run=client -o yaml | kubectl apply -f - | ||
| echo "Secondary namespace $LLMD_NAMESPACE_B created" | ||
| - name: Label namespaces for OpenShift monitoring | ||
| run: | | ||
| echo "Adding openshift.io/user-monitoring label to namespaces for Prometheus scraping..." | ||
| kubectl label namespace "$LLMD_NAMESPACE" openshift.io/user-monitoring=true --overwrite | ||
| kubectl label namespace "$LLMD_NAMESPACE_B" openshift.io/user-monitoring=true --overwrite | ||
| kubectl label namespace "$WVA_NAMESPACE" openshift.io/user-monitoring=true --overwrite | ||
| echo "Namespace labels applied" | ||
| - name: Wait for infrastructure to be ready | ||
| run: | | ||
| echo "Waiting for WVA controller to be ready..." | ||
| kubectl wait --for=condition=available --timeout=300s deployment -l app.kubernetes.io/name=workload-variant-autoscaler -n "$WVA_NAMESPACE" || true | ||
| kubectl get pods -n "$WVA_NAMESPACE" | ||
| echo "Waiting for llm-d deployment (Model A1) to be ready..." | ||
| kubectl get pods -n "$LLMD_NAMESPACE" | ||
| - name: Deploy Model B infrastructure in secondary namespace | ||
| env: | ||
| # HF_TOKEN is inherited from GITHUB_ENV | ||
| ENVIRONMENT: openshift | ||
| INSTALL_GATEWAY_CTRLPLANE: "false" | ||
| # Disable benchmark mode - istioBench environment not available in llm-d helmfile | ||
| BENCHMARK_MODE: "false" | ||
| E2E_TESTS_ENABLED: "true" | ||
| NAMESPACE_SCOPED: "false" | ||
| # Override namespaces for Model B stack | ||
| LLMD_NS: ${{ env.LLMD_NAMESPACE_B }} | ||
| WVA_NS: ${{ env.WVA_NAMESPACE }} | ||
| # Skip WVA controller and prometheus (use existing) | ||
| DEPLOY_WVA: "false" | ||
| DEPLOY_PROMETHEUS: "false" | ||
| DEPLOY_PROMETHEUS_ADAPTER: "false" | ||
| DEPLOY_VA: "false" | ||
| DEPLOY_HPA: "false" | ||
| # vLLM max-num-seqs for e2e testing (lower = easier to saturate) | ||
| VLLM_MAX_NUM_SEQS: ${{ env.MAX_NUM_SEQS }} | ||
| # Disable benchmark mode - istioBench environment not available in llm-d helmfile | ||
| BENCHMARK_MODE: "false" | ||
| run: | | ||
| echo "Deploying Model B infrastructure in $LLMD_NAMESPACE_B..." | ||
| echo " MODEL_ID: $MODEL_ID" | ||
| echo " ACCELERATOR_TYPE: $ACCELERATOR_TYPE" | ||
| echo " VLLM_MAX_NUM_SEQS: $VLLM_MAX_NUM_SEQS" | ||
| # Deploy llm-d infrastructure only (no WVA controller, no VA/HPA) | ||
| ./deploy/install.sh --model "$MODEL_ID" --accelerator "$ACCELERATOR_TYPE" --environment openshift | ||
| echo "Waiting for Model B deployment to be ready..." | ||
| kubectl wait --for=condition=available --timeout=300s deployment --all -n "$LLMD_NAMESPACE_B" || true | ||
| kubectl get pods -n "$LLMD_NAMESPACE_B" | ||
| - name: Deploy Model B WVA resources | ||
| env: | ||
| LLMD_NS: ${{ env.LLMD_NAMESPACE_B }} | ||
| WVA_NS: ${{ env.WVA_NAMESPACE }} | ||
| # Use same controller instance as Model A for HPA selector matching | ||
| CONTROLLER_INSTANCE: ${{ env.WVA_NAMESPACE }} | ||
| run: | | ||
| echo "Deploying Model B WVA resources..." | ||
| echo " Release name: $MODEL_B_RELEASE" | ||
| echo " CONTROLLER_INSTANCE: $CONTROLLER_INSTANCE" | ||
| # Deploy WVA resources (VA, HPA, ServiceMonitor) for Model B | ||
| # controller.enabled=false since we're using the existing WVA controller | ||
| # Note: llmd.modelName should be base name without -decode suffix (template appends it) | ||
| helm upgrade -i "$MODEL_B_RELEASE" ./charts/workload-variant-autoscaler \ | ||
| -n "$WVA_NAMESPACE" \ | ||
| --set controller.enabled=false \ | ||
| --set va.enabled=true \ | ||
| --set hpa.enabled=true \ | ||
| --set hpa.behavior.scaleUp.stabilizationWindowSeconds="$HPA_STABILIZATION_SECONDS" \ | ||
| --set hpa.behavior.scaleDown.stabilizationWindowSeconds="$HPA_STABILIZATION_SECONDS" \ | ||
| --set llmd.namespace="$LLMD_NAMESPACE_B" \ | ||
| --set llmd.modelName="ms-inference-scheduling-llm-d-modelservice" \ | ||
| --set llmd.modelID="$MODEL_ID" \ | ||
| --set va.accelerator="$ACCELERATOR_TYPE" \ | ||
| --set wva.baseName="inference-scheduling" \ | ||
| --set wva.prometheus.monitoringNamespace=openshift-user-workload-monitoring \ | ||
| --set wva.controllerInstance="$CONTROLLER_INSTANCE" | ||
| echo "Model B WVA resources deployed" | ||
| kubectl get hpa -n "$LLMD_NAMESPACE_B" -l app.kubernetes.io/instance="$MODEL_B_RELEASE" || true | ||
| kubectl get variantautoscaling -n "$LLMD_NAMESPACE_B" -l app.kubernetes.io/instance="$MODEL_B_RELEASE" || true | ||
| - name: Verify multi-model deployment | ||
| run: | | ||
| echo "=== Multi-Model Deployment Status ===" | ||
| echo "" | ||
| echo "=== Model A1 (Primary, $LLMD_NAMESPACE) ===" | ||
| kubectl get deployment -n "$LLMD_NAMESPACE" | grep -E "decode|NAME" || true | ||
| kubectl get hpa -n "$LLMD_NAMESPACE" || true | ||
| kubectl get variantautoscaling -n "$LLMD_NAMESPACE" || true | ||
| echo "" | ||
| echo "=== Model B ($LLMD_NAMESPACE_B) ===" | ||
| kubectl get deployment -n "$LLMD_NAMESPACE_B" | grep -E "decode|NAME" || true | ||
| kubectl get hpa -n "$LLMD_NAMESPACE_B" || true | ||
| kubectl get variantautoscaling -n "$LLMD_NAMESPACE_B" || true | ||
| echo "" | ||
| echo "=== WVA Controller ($WVA_NAMESPACE) ===" | ||
| kubectl get pods -n "$WVA_NAMESPACE" | ||
| - name: Install Go dependencies | ||
| run: go mod download | ||
| - name: Run OpenShift E2E tests | ||
| env: | ||
| CONTROLLER_NAMESPACE: ${{ env.WVA_NAMESPACE }} | ||
| MONITORING_NAMESPACE: openshift-user-workload-monitoring | ||
| LLMD_NAMESPACE: ${{ env.LLMD_NAMESPACE }} | ||
| # Multi-model testing: secondary namespace for Model B | ||
| LLMD_NAMESPACE_B: ${{ env.LLMD_NAMESPACE_B }} | ||
| GATEWAY_NAME: infra-inference-scheduling-inference-gateway-istio | ||
| DEPLOYMENT: ms-inference-scheduling-llm-d-modelservice-decode | ||
| # Pass WVA_RELEASE_NAME so test can filter for current run's resources | ||
| WVA_RELEASE_NAME: ${{ env.WVA_RELEASE_NAME }} | ||
| run: | | ||
| echo "Running OpenShift E2E tests with configuration:" | ||
| echo " CONTROLLER_NAMESPACE: $CONTROLLER_NAMESPACE" | ||
| echo " LLMD_NAMESPACE: $LLMD_NAMESPACE" | ||
| echo " LLMD_NAMESPACE_B: $LLMD_NAMESPACE_B (multi-model)" | ||
| echo " DEPLOYMENT: $DEPLOYMENT" | ||
| echo " GATEWAY_NAME: $GATEWAY_NAME" | ||
| echo " MODEL_ID: $MODEL_ID" | ||
| echo " REQUEST_RATE: $REQUEST_RATE" | ||
| echo " NUM_PROMPTS: $NUM_PROMPTS" | ||
| echo " WVA_RELEASE_NAME: $WVA_RELEASE_NAME" | ||
| make test-e2e-openshift | ||
| - name: Cleanup infrastructure | ||
| # Cleanup on success or cancellation, but NOT on failure (preserve for debugging) | ||
| # Use SKIP_CLEANUP=true to keep resources after successful runs | ||
| if: (success() || cancelled()) && env.SKIP_CLEANUP != 'true' | ||
| run: | | ||
| echo "Cleaning up ALL test infrastructure..." | ||
| echo " LLMD_NAMESPACE: $LLMD_NAMESPACE" | ||
| echo " LLMD_NAMESPACE_B: $LLMD_NAMESPACE_B" | ||
| echo " WVA_NAMESPACE: $WVA_NAMESPACE" | ||
| echo " WVA_RELEASE_NAME: $WVA_RELEASE_NAME" | ||
| echo " MODEL_B_RELEASE: $MODEL_B_RELEASE" | ||
| # Uninstall all WVA helm releases before deleting namespaces | ||
| # This ensures proper cleanup of resources and removes helm tracking | ||
| echo "Uninstalling WVA helm releases..." | ||
| helm uninstall "$WVA_RELEASE_NAME" -n "$WVA_NAMESPACE" --ignore-not-found --wait --timeout 60s || true | ||
| helm uninstall "$MODEL_B_RELEASE" -n "$WVA_NAMESPACE" --ignore-not-found --wait --timeout 60s || true | ||
| echo "Uninstalling llm-d helm releases in primary namespace..." | ||
| for release in $(helm list -n "$LLMD_NAMESPACE" -q 2>/dev/null); do | ||
| echo " Uninstalling release: $release" | ||
| helm uninstall "$release" -n "$LLMD_NAMESPACE" --ignore-not-found --wait --timeout 60s || true | ||
| done | ||
| echo "Uninstalling llm-d helm releases in secondary namespace..." | ||
| for release in $(helm list -n "$LLMD_NAMESPACE_B" -q 2>/dev/null); do | ||
| echo " Uninstalling release: $release" | ||
| helm uninstall "$release" -n "$LLMD_NAMESPACE_B" --ignore-not-found --wait --timeout 60s || true | ||
| done | ||
| # Delete all PR-specific namespaces | ||
| echo "Deleting llm-d namespace $LLMD_NAMESPACE..." | ||
| kubectl delete namespace "$LLMD_NAMESPACE" --ignore-not-found --timeout=120s || true | ||
| echo "Deleting llm-d namespace $LLMD_NAMESPACE_B..." | ||
| kubectl delete namespace "$LLMD_NAMESPACE_B" --ignore-not-found --timeout=120s || true | ||
| echo "Deleting WVA namespace $WVA_NAMESPACE..." | ||
| kubectl delete namespace "$WVA_NAMESPACE" --ignore-not-found --timeout=120s || true | ||
| # Clean up cluster-scoped WVA resources for THIS release only | ||
| # Use both name and instance labels to avoid deleting resources from other PRs | ||
| echo "Removing cluster-scoped WVA resources for release $WVA_RELEASE_NAME..." | ||
| kubectl delete clusterrole,clusterrolebinding -l app.kubernetes.io/name=workload-variant-autoscaler,app.kubernetes.io/instance="$WVA_RELEASE_NAME" --ignore-not-found || true | ||
| echo "Cleanup complete" | ||
| - name: Scale down GPU workloads on failure | ||
| # On failure, scale down decode deployments to free GPUs while preserving | ||
| # other resources (VA, HPA, controller, gateway) for debugging | ||
| if: failure() | ||
| run: | | ||
| echo "Test failed - scaling down decode deployments to free GPUs..." | ||
| echo "Other resources (VA, HPA, controller logs) are preserved for debugging" | ||
| echo "" | ||
| for ns in "$LLMD_NAMESPACE" "$LLMD_NAMESPACE_B"; do | ||
| if kubectl get namespace "$ns" &>/dev/null; then | ||
| echo "=== Scaling down decode deployments in $ns ===" | ||
| kubectl scale deployment -n "$ns" -l llm-d.ai/inferenceServing=true --replicas=0 || true | ||
| # Also try by name pattern in case labels are missing | ||
| kubectl get deployment -n "$ns" -o name 2>/dev/null | grep decode | while read -r deploy; do | ||
| echo " Scaling down: $deploy" | ||
| kubectl scale "$deploy" -n "$ns" --replicas=0 || true | ||
| done | ||
| fi | ||
| done | ||
| echo "" | ||
| echo "GPU workloads scaled down. Remaining resources for debugging:" | ||
| echo "" | ||
| echo "=== VAs ===" | ||
| kubectl get va -n "$LLMD_NAMESPACE" 2>/dev/null || true | ||
| kubectl get va -n "$LLMD_NAMESPACE_B" 2>/dev/null || true | ||
| echo "" | ||
| echo "=== HPAs ===" | ||
| kubectl get hpa -n "$LLMD_NAMESPACE" 2>/dev/null || true | ||
| kubectl get hpa -n "$LLMD_NAMESPACE_B" 2>/dev/null || true | ||
| echo "" | ||
| echo "=== Controller pods ===" | ||
| kubectl get pods -n "$WVA_NAMESPACE" 2>/dev/null || true | ||
| # Report status back to PR for issue_comment triggered runs | ||
| # This ensures fork PRs show the correct status after /ok-to-test runs complete | ||
| report-status: | ||
| runs-on: ubuntu-latest | ||
| needs: [gate, e2e-openshift] | ||
| # Run always (even on failure) but only for issue_comment events | ||
| if: always() && github.event_name == 'issue_comment' && needs.gate.outputs.should_run == 'true' | ||
| steps: | ||
| - name: Report status to PR | ||
| uses: actions/github-script@v7 | ||
| with: | ||
| script: | | ||
| const prHeadSha = '${{ needs.gate.outputs.pr_head_sha }}'; | ||
| const e2eResult = '${{ needs.e2e-openshift.result }}'; | ||
| // Map job result to commit status | ||
| let state, description; | ||
| if (e2eResult === 'success') { | ||
| state = 'success'; | ||
| description = 'E2E tests passed'; | ||
| } else if (e2eResult === 'skipped') { | ||
| state = 'pending'; | ||
| description = 'E2E tests skipped'; | ||
| } else if (e2eResult === 'cancelled') { | ||
| state = 'failure'; | ||
| description = 'E2E tests cancelled'; | ||
| } else { | ||
| state = 'failure'; | ||
| description = 'E2E tests failed'; | ||
| } | ||
| console.log(`Reporting status to PR commit ${prHeadSha}: ${state} - ${description}`); | ||
| await github.rest.repos.createCommitStatus({ | ||
| owner: context.repo.owner, | ||
| repo: context.repo.repo, | ||
| sha: prHeadSha, | ||
| state: state, | ||
| target_url: `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`, | ||
| description: description, | ||
| context: '${{ github.workflow }} / e2e (comment trigger)' | ||
| }); | ||
| console.log('Status reported successfully'); | ||