.github/workflows/nightly-e2e-wva-cks.yaml #26
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Nightly - WVA E2E (CKS) | |
| # Nightly regression test for WVA (Workload Variant Autoscaler) on CoreWeave | |
| # Kubernetes (CKS). Deploys the workload-autoscaling guide stack via the | |
| # consolidated helmfile reusable workflow and runs the WVA e2e test suite. | |
| # | |
| # Builds the WVA controller image from main before testing, since the helm | |
| # chart on main may require features not yet in a tagged release image. | |
| on: | |
| schedule: | |
| - cron: '30 7 * * *' # 07:30 UTC daily (staggered from wide-ep-lws CKS at 07:00) | |
| workflow_dispatch: | |
| inputs: | |
| model_id: | |
| description: 'Model ID' | |
| required: false | |
| default: 'unsloth/Meta-Llama-3.1-8B' | |
| accelerator_type: | |
| description: 'Accelerator type (H100, H200, A100)' | |
| required: false | |
| default: 'H100' | |
| image_tag: | |
| description: 'WVA image tag (leave empty to build from main)' | |
| required: false | |
| default: '' | |
| request_rate: | |
| description: 'Request rate (req/s)' | |
| required: false | |
| default: '20' | |
| num_prompts: | |
| description: 'Number of prompts' | |
| required: false | |
| default: '3000' | |
| max_num_seqs: | |
| description: 'vLLM max batch size (lower = easier to saturate)' | |
| required: false | |
| default: '1' | |
| hpa_stabilization_seconds: | |
| description: 'HPA stabilization window in seconds' | |
| required: false | |
| default: '240' | |
| caller_ref: | |
| description: 'WVA repo ref to checkout for tests and image build (branch, tag, or SHA)' | |
| required: false | |
| default: 'main' | |
| skip_cleanup: | |
| description: 'Skip cleanup after tests (for debugging)' | |
| required: false | |
| default: 'false' | |
| permissions: | |
| contents: read | |
| packages: write | |
| concurrency: | |
| group: nightly-e2e-wva-cks | |
| cancel-in-progress: true | |
| jobs: | |
| # Build the WVA controller image from main so the chart and binary match. | |
| # The chart on main may include flags (e.g. --config-file) that only exist | |
| # in the latest code, not in the last tagged release. | |
| build-wva-image: | |
| if: github.repository == 'llm-d/llm-d' && github.event.inputs.image_tag == '' | |
| runs-on: ubuntu-latest | |
| outputs: | |
| image_tag: ${{ steps.build.outputs.image_tag }} | |
| steps: | |
| - name: Checkout WVA source | |
| uses: actions/checkout@v6 | |
| with: | |
| repository: llm-d/llm-d-workload-variant-autoscaler | |
| ref: ${{ github.event.inputs.caller_ref || 'main' }} | |
| - name: Set up Go | |
| uses: actions/setup-go@v6 | |
| with: | |
| go-version-file: go.mod | |
| - name: Log in to GHCR | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ secrets.GHCR_USER }} | |
| password: ${{ secrets.GHCR_TOKEN }} | |
| - name: Build and push image | |
| id: build | |
| run: | | |
| SHA=$(git rev-parse --short=8 HEAD) | |
| IMAGE_TAG="nightly-${SHA}" | |
| FULL_IMAGE="ghcr.io/llm-d/llm-d-workload-variant-autoscaler:${IMAGE_TAG}" | |
| echo "Building WVA controller from main: $FULL_IMAGE" | |
| for attempt in 1 2 3; do | |
| echo "Build attempt $attempt/3" | |
| if make docker-build IMG="$FULL_IMAGE"; then | |
| break | |
| fi | |
| if [ "$attempt" -eq 3 ]; then | |
| echo "Build failed after 3 attempts" | |
| exit 1 | |
| fi | |
| echo "Build failed (likely transient registry error), retrying in 30s..." | |
| sleep 30 | |
| done | |
| make docker-push IMG="$FULL_IMAGE" | |
| echo "image_tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT | |
| nightly: | |
| needs: build-wva-image | |
| if: github.repository == 'llm-d/llm-d' && always() && (needs.build-wva-image.result == 'success' || needs.build-wva-image.result == 'skipped') | |
| uses: llm-d/llm-d-infra/.github/workflows/reusable-nightly-e2e-cks-helmfile.yaml@main | |
| with: | |
| guide_name: workload-autoscaling | |
| namespace: llm-d-nightly-wva | |
| deploy_wva: true | |
| caller_repo: llm-d/llm-d-workload-variant-autoscaler | |
| caller_ref: ${{ github.event.inputs.caller_ref || 'main' }} | |
| wva_image_tag: ${{ needs.build-wva-image.outputs.image_tag || github.event.inputs.image_tag }} | |
| image_override: 'ghcr.io/llm-d/llm-d-cuda-dev:latest' | |
| allow_gpu_preemption: true | |
| skip_cleanup: ${{ github.event.inputs.skip_cleanup == 'true' }} | |
| required_gpus: 2 | |
| recommended_gpus: 4 | |
| test_target: nightly-test-llm-d | |
| secrets: inherit |