Skip to content

Nightly - CKS E2E Tests #48

Nightly - CKS E2E Tests

Nightly - CKS E2E Tests #48

name: Nightly - CKS E2E Tests
# Nightly regression test for WVA on CoreWeave Kubernetes (CKS).
# Calls the reusable CKS helmfile workflow from llm-d/llm-d-infra to deploy
# the workload-autoscaling guide stack and run the e2e test suite on waldorf.
on:
schedule:
- cron: '30 6 * * *' # 06:30 UTC daily (staggered from IS CKS at 06:00)
workflow_dispatch:
inputs:
model_id:
description: 'Model ID'
required: false
default: 'unsloth/Meta-Llama-3.1-8B'
accelerator_type:
description: 'Accelerator type (H100, H200, A100)'
required: false
default: 'H100'
image_tag:
description: 'WVA image tag — "latest" auto-resolves to newest release'
required: false
default: 'latest'
request_rate:
description: 'Request rate (req/s)'
required: false
default: '20'
num_prompts:
description: 'Number of prompts'
required: false
default: '3000'
max_num_seqs:
description: 'vLLM max batch size (lower = easier to saturate)'
required: false
default: '1'
hpa_stabilization_seconds:
description: 'HPA stabilization window in seconds'
required: false
default: '240'
skip_cleanup:
description: 'Skip cleanup after tests (for debugging)'
required: false
default: 'false'
permissions:
contents: read
concurrency:
group: nightly-e2e-cks-wva
cancel-in-progress: true
jobs:
nightly:
uses: llm-d/llm-d-infra/.github/workflows/reusable-nightly-e2e-cks-helmfile.yaml@main
with:
guide_name: workload-autoscaling
namespace: llm-d-nightly-wva-cks
helmfile_env: istio
gateway_type: istio
caller_repo: ${{ github.repository }}
caller_ref: ${{ github.ref_name }}
deploy_wva: true
model_id: ${{ github.event.inputs.model_id || 'unsloth/Meta-Llama-3.1-8B' }}
accelerator_type: ${{ github.event.inputs.accelerator_type || 'H100' }}
wva_image_tag: ${{ github.event.inputs.image_tag || 'latest' }}
request_rate: ${{ github.event.inputs.request_rate || '20' }}
num_prompts: ${{ github.event.inputs.num_prompts || '3000' }}
max_num_seqs: ${{ github.event.inputs.max_num_seqs || '1' }}
hpa_stabilization_seconds: ${{ github.event.inputs.hpa_stabilization_seconds || '240' }}
skip_cleanup: ${{ github.event.inputs.skip_cleanup == 'true' }}
required_gpus: 2
recommended_gpus: 4
allow_gpu_preemption: true
pod_wait_timeout: '30m'
pod_readiness_delay: 180
image_override: 'ghcr.io/llm-d/llm-d-cuda-dev:latest'
test_target: test-e2e-full
secrets: inherit