Skip to content

Nightly - Precise Prefix Cache E2E (OpenShift) #52

Nightly - Precise Prefix Cache E2E (OpenShift)

Nightly - Precise Prefix Cache E2E (OpenShift) #52

name: Nightly - Precise Prefix Cache E2E (OpenShift)
# Nightly regression test for the precise-prefix-cache-aware guide on OpenShift.
# Deploys the guide via custom_deploy_script (kustomize + standalone chart with
# UDS tokenizer post-renderer) and validates with e2e-validate.sh against the
# scheduler EPP service. Mirrors the optimized-baseline CKS/GKE migration in
# llm-d/llm-d#1268; the OCP helmfile reusable workflow doesn't expose a
# `gateway_host` input yet, so pre_deploy_script seeds GATEWAY_HOST via
# GITHUB_ENV.
on:
schedule:
- cron: '0 1 * * *' # 01:00 UTC daily
workflow_dispatch:
inputs:
skip_cleanup:
description: 'Skip cleanup after tests (for debugging)'
required: false
default: 'false'
permissions:
contents: read
concurrency:
group: nightly-e2e-precise-prefix-cache
cancel-in-progress: true
jobs:
nightly:
if: github.repository == 'llm-d/llm-d'
uses: llm-d/llm-d-infra/.github/workflows/reusable-nightly-e2e-openshift-helmfile.yaml@main
with:
guide_name: precise-prefix-cache-aware
namespace: llm-d-nightly-prefix-cache
accelerator_type: H100
required_gpus: 2
recommended_gpus: 4
pod_wait_timeout: '30m'
pod_readiness_delay: 180
image_override: 'ghcr.io/llm-d/llm-d-cuda-dev:latest'
allow_gpu_preemption: true
install_gateway_provider: false
skip_cleanup: ${{ github.event.inputs.skip_cleanup == 'true' }}
pre_deploy_script: |
echo "Adding H100 nodeSelector to gpu/vllm overlay..."
yq e '.spec.template.spec.nodeSelector["nvidia.com/gpu.product"] = "NVIDIA-H100-80GB-HBM3"' \
-i "${GUIDE_PATH}/modelserver/gpu/vllm/patch-vllm.yaml"
yq e '.spec.template.spec.nodeSelector' "${GUIDE_PATH}/modelserver/gpu/vllm/patch-vllm.yaml"
echo "GATEWAY_HOST=precise-prefix-cache-aware-epp" >> "$GITHUB_ENV"
custom_deploy_script: |
kubectl apply -k guides/precise-prefix-cache-aware/modelserver/gpu/vllm -n ${NAMESPACE}
helm install precise-prefix-cache-aware \
oci://registry.k8s.io/gateway-api-inference-extension/charts/standalone \
-f guides/recipes/scheduler/base.values.yaml \
-f guides/precise-prefix-cache-aware/scheduler/precise-prefix-cache-aware.values.yaml \
--post-renderer ./guides/precise-prefix-cache-aware/scheduler/patches/uds-tokenizer/post-renderer.sh \
-n ${NAMESPACE} --version v1.4.0
secrets: inherit