Nightly - CKS E2E Tests #48
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Nightly - CKS E2E Tests | |
| # Nightly regression test for WVA on CoreWeave Kubernetes (CKS). | |
| # Calls the reusable CKS helmfile workflow from llm-d/llm-d-infra to deploy | |
| # the workload-autoscaling guide stack and run the e2e test suite on waldorf. | |
| on: | |
| schedule: | |
| - cron: '30 6 * * *' # 06:30 UTC daily (staggered from IS CKS at 06:00) | |
| workflow_dispatch: | |
| inputs: | |
| model_id: | |
| description: 'Model ID' | |
| required: false | |
| default: 'unsloth/Meta-Llama-3.1-8B' | |
| accelerator_type: | |
| description: 'Accelerator type (H100, H200, A100)' | |
| required: false | |
| default: 'H100' | |
| image_tag: | |
| description: 'WVA image tag — "latest" auto-resolves to newest release' | |
| required: false | |
| default: 'latest' | |
| request_rate: | |
| description: 'Request rate (req/s)' | |
| required: false | |
| default: '20' | |
| num_prompts: | |
| description: 'Number of prompts' | |
| required: false | |
| default: '3000' | |
| max_num_seqs: | |
| description: 'vLLM max batch size (lower = easier to saturate)' | |
| required: false | |
| default: '1' | |
| hpa_stabilization_seconds: | |
| description: 'HPA stabilization window in seconds' | |
| required: false | |
| default: '240' | |
| skip_cleanup: | |
| description: 'Skip cleanup after tests (for debugging)' | |
| required: false | |
| default: 'false' | |
| permissions: | |
| contents: read | |
| concurrency: | |
| group: nightly-e2e-cks-wva | |
| cancel-in-progress: true | |
| jobs: | |
| nightly: | |
| uses: llm-d/llm-d-infra/.github/workflows/reusable-nightly-e2e-cks-helmfile.yaml@main | |
| with: | |
| guide_name: workload-autoscaling | |
| namespace: llm-d-nightly-wva-cks | |
| helmfile_env: istio | |
| gateway_type: istio | |
| caller_repo: ${{ github.repository }} | |
| caller_ref: ${{ github.ref_name }} | |
| deploy_wva: true | |
| model_id: ${{ github.event.inputs.model_id || 'unsloth/Meta-Llama-3.1-8B' }} | |
| accelerator_type: ${{ github.event.inputs.accelerator_type || 'H100' }} | |
| wva_image_tag: ${{ github.event.inputs.image_tag || 'latest' }} | |
| request_rate: ${{ github.event.inputs.request_rate || '20' }} | |
| num_prompts: ${{ github.event.inputs.num_prompts || '3000' }} | |
| max_num_seqs: ${{ github.event.inputs.max_num_seqs || '1' }} | |
| hpa_stabilization_seconds: ${{ github.event.inputs.hpa_stabilization_seconds || '240' }} | |
| skip_cleanup: ${{ github.event.inputs.skip_cleanup == 'true' }} | |
| required_gpus: 2 | |
| recommended_gpus: 4 | |
| allow_gpu_preemption: true | |
| pod_wait_timeout: '30m' | |
| pod_readiness_delay: 180 | |
| image_override: 'ghcr.io/llm-d/llm-d-cuda-dev:latest' | |
| test_target: test-e2e-full | |
| secrets: inherit |