Skip to content

.github/workflows/nightly-e2e-wva-cks.yaml #29

.github/workflows/nightly-e2e-wva-cks.yaml

.github/workflows/nightly-e2e-wva-cks.yaml #29

name: Nightly - WVA E2E (CKS)
# Nightly regression test for WVA (Workload Variant Autoscaler) on CoreWeave
# Kubernetes (CKS). Deploys the workload-autoscaling guide stack via the
# consolidated helmfile reusable workflow and runs the WVA e2e test suite.
#
# Builds the WVA controller image from main before testing, since the helm
# chart on main may require features not yet in a tagged release image.
on:
schedule:
- cron: '30 7 * * *' # 07:30 UTC daily (staggered from wide-ep-lws CKS at 07:00)
workflow_dispatch:
inputs:
model_id:
description: 'Model ID'
required: false
default: 'unsloth/Meta-Llama-3.1-8B'
accelerator_type:
description: 'Accelerator type (H100, H200, A100)'
required: false
default: 'H100'
image_tag:
description: 'WVA image tag (leave empty to build from main)'
required: false
default: ''
request_rate:
description: 'Request rate (req/s)'
required: false
default: '20'
num_prompts:
description: 'Number of prompts'
required: false
default: '3000'
max_num_seqs:
description: 'vLLM max batch size (lower = easier to saturate)'
required: false
default: '1'
hpa_stabilization_seconds:
description: 'HPA stabilization window in seconds'
required: false
default: '240'
caller_ref:
description: 'WVA repo ref to checkout for tests and image build (branch, tag, or SHA)'
required: false
default: 'main'
skip_cleanup:
description: 'Skip cleanup after tests (for debugging)'
required: false
default: 'false'
permissions:
contents: read
packages: write
concurrency:
group: nightly-e2e-wva-cks
cancel-in-progress: true
jobs:
# Build the WVA controller image from main so the chart and binary match.
# The chart on main may include flags (e.g. --config-file) that only exist
# in the latest code, not in the last tagged release.
build-wva-image:
if: github.repository == 'llm-d/llm-d' && github.event.inputs.image_tag == ''
runs-on: ubuntu-latest
outputs:
image_tag: ${{ steps.build.outputs.image_tag }}
steps:
- name: Checkout WVA source
uses: actions/checkout@v6
with:
repository: llm-d/llm-d-workload-variant-autoscaler
ref: ${{ github.event.inputs.caller_ref || 'main' }}
- name: Set up Go
uses: actions/setup-go@v6
with:
go-version-file: go.mod
- name: Log in to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ secrets.GHCR_USER }}
password: ${{ secrets.GHCR_TOKEN }}
- name: Build and push image
id: build
run: |
SHA=$(git rev-parse --short=8 HEAD)
IMAGE_TAG="nightly-${SHA}"
FULL_IMAGE="ghcr.io/llm-d/llm-d-workload-variant-autoscaler:${IMAGE_TAG}"
echo "Building WVA controller from main: $FULL_IMAGE"
for attempt in 1 2 3; do
echo "Build attempt $attempt/3"
if make docker-build IMG="$FULL_IMAGE"; then
break
fi
if [ "$attempt" -eq 3 ]; then
echo "Build failed after 3 attempts"
exit 1
fi
echo "Build failed (likely transient registry error), retrying in 30s..."
sleep 30
done
make docker-push IMG="$FULL_IMAGE"
echo "image_tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT
nightly:
needs: build-wva-image
if: github.repository == 'llm-d/llm-d' && always() && (needs.build-wva-image.result == 'success' || needs.build-wva-image.result == 'skipped')
uses: llm-d/llm-d-infra/.github/workflows/reusable-nightly-e2e-cks-helmfile.yaml@main
with:
guide_name: workload-autoscaling
namespace: llm-d-nightly-wva
deploy_wva: true
caller_repo: llm-d/llm-d-workload-variant-autoscaler
caller_ref: ${{ github.event.inputs.caller_ref || 'main' }}
wva_image_tag: ${{ needs.build-wva-image.outputs.image_tag || github.event.inputs.image_tag }}
image_override: 'ghcr.io/llm-d/llm-d-cuda-dev:latest'
allow_gpu_preemption: true
skip_cleanup: ${{ github.event.inputs.skip_cleanup == 'true' }}
required_gpus: 2
recommended_gpus: 4
test_target: nightly-test-llm-d
secrets: inherit