Skip to content

Commit bcc8927

Browse files
committed
Add e2e test environment setup scripts and configuration files
This commit introduces a comprehensive setup for the llama-stack-provider-ragas e2e test environment, including: - A Containerfile for building the test image. - Deployment and teardown scripts for managing the test environment on OpenShift. - Configuration files for Kubernetes resources, including ConfigMaps, Secrets, and DataSciencePipelinesApplication manifests. - MinIO deployment for results storage and necessary operator configurations. These additions facilitate automated testing and deployment of the llama-stack provider in a Kubernetes environment.
1 parent 90f8c1e commit bcc8927

File tree

10 files changed

+833
-0
lines changed

10 files changed

+833
-0
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# This Containerfile is used to build the llama-stack-provider-ragas-distro-image for the e2e tests.
2+
3+
FROM python:3.12-slim
4+
5+
WORKDIR /app
6+
7+
# Install uv by copying the static binaries from the official image
8+
COPY --from=ghcr.io/astral-sh/uv:0.9.21 /uv /uvx /bin/
9+
10+
# Create a venv and make it the default Python for subsequent steps.
11+
RUN uv venv /app/.venv
12+
ENV VIRTUAL_ENV=/app/.venv
13+
ENV PATH="/app/.venv/bin:${PATH}"
14+
15+
# Install sentence-transformers + torch (cached layer — these rarely change).
16+
RUN uv pip install --python /app/.venv/bin/python \
17+
--extra-index-url https://download.pytorch.org/whl/cpu \
18+
torch sentence-transformers einops tokenizers safetensors
19+
20+
# Pre-download the embedding model so no HF fetch is needed at runtime.
21+
# Pass --build-arg HF_TOKEN=hf_... to avoid rate limits.
22+
ARG HF_TOKEN=""
23+
RUN HF_TOKEN=${HF_TOKEN} python -c "from huggingface_hub import snapshot_download; snapshot_download('nomic-ai/nomic-embed-text-v1.5')"
24+
25+
# Copy code (changes frequently — kept after heavy layers for caching).
26+
COPY src /app/src
27+
COPY distribution /app/distribution
28+
COPY pyproject.toml /app/pyproject.toml
29+
COPY uv.lock /app/uv.lock
30+
COPY README.md /app/README.md
31+
32+
# Install the project into the venv.
33+
RUN uv pip install --python /app/.venv/bin/python -e ".[remote,distro]"
34+
35+
EXPOSE 8321
36+
37+
ENTRYPOINT ["uv", "run", "--no-sync", "llama", "stack", "run", "distribution/run.yaml"]
Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Deploy the llama-stack-provider-ragas e2e test environment on an OpenShift cluster.
4+
#
5+
# Usage:
6+
# ./deploy-e2e.sh --build
7+
# ./deploy-e2e.sh --image <image-ref>
8+
#
9+
# Reads credentials from ../../.env (repo root) and creates a single
10+
# 'ragas-env' k8s secret from it.
11+
#
12+
# Prerequisites:
13+
# - oc CLI installed and logged into an OpenShift cluster
14+
# - podman (only required for --build mode)
15+
#
16+
17+
set -e
18+
19+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
20+
REPO_ROOT="${SCRIPT_DIR}/../.."
21+
IMAGE_NAME="llama-stack-provider-ragas-distro-image"
22+
NAMESPACE="ragas-test"
23+
24+
# ---------------------------------------------------------------------------
25+
# Parse arguments
26+
# ---------------------------------------------------------------------------
27+
MODE=""
28+
IMAGE_REF=""
29+
30+
while [[ $# -gt 0 ]]; do
31+
case "$1" in
32+
--build)
33+
MODE="build"
34+
shift
35+
;;
36+
--image)
37+
MODE="image"
38+
IMAGE_REF="$2"
39+
if [[ -z "${IMAGE_REF}" ]]; then
40+
echo "Error: --image requires an image reference argument."
41+
exit 1
42+
fi
43+
shift 2
44+
;;
45+
*)
46+
echo "Unknown option: $1"
47+
echo "Usage: $0 --build | --image <image-ref>"
48+
exit 1
49+
;;
50+
esac
51+
done
52+
53+
if [[ -z "${MODE}" ]]; then
54+
echo "Usage: $0 --build | --image <image-ref>"
55+
exit 1
56+
fi
57+
58+
# ---------------------------------------------------------------------------
59+
# Prerequisites
60+
# ---------------------------------------------------------------------------
61+
echo "Checking prerequisites..."
62+
63+
if ! command -v oc &> /dev/null; then
64+
echo "Error: oc is not installed."
65+
exit 1
66+
fi
67+
68+
if ! oc whoami &> /dev/null; then
69+
echo "Error: Not logged into an OpenShift cluster. Run 'oc login' first."
70+
exit 1
71+
fi
72+
73+
echo " Logged in as: $(oc whoami)"
74+
echo " Cluster: $(oc whoami --show-server)"
75+
76+
# ---------------------------------------------------------------------------
77+
# Resolve image
78+
# ---------------------------------------------------------------------------
79+
if [[ "${MODE}" == "build" ]]; then
80+
if ! command -v podman &> /dev/null; then
81+
echo "Error: podman is not installed (required for --build)."
82+
exit 1
83+
fi
84+
85+
echo ""
86+
echo "=== Building image from Containerfile ==="
87+
88+
# Detect cluster node architecture (not local host arch)
89+
NODE_ARCH=$(oc get nodes -o jsonpath='{.items[0].status.nodeInfo.architecture}' 2>/dev/null || echo "amd64")
90+
case "${NODE_ARCH}" in
91+
amd64) PLATFORM="linux/amd64" ;;
92+
arm64) PLATFORM="linux/arm64" ;;
93+
*) echo "Warning: unknown cluster architecture ${NODE_ARCH}, defaulting to linux/amd64"; PLATFORM="linux/amd64" ;;
94+
esac
95+
echo " Cluster node architecture: ${NODE_ARCH} -> ${PLATFORM}"
96+
97+
# Build the image
98+
LOCAL_TAG="${IMAGE_NAME}:latest"
99+
echo " Building ${LOCAL_TAG}..."
100+
podman build --platform "${PLATFORM}" \
101+
--build-arg HF_TOKEN="${HF_TOKEN:-}" \
102+
-t "${LOCAL_TAG}" \
103+
-f "${SCRIPT_DIR}/Containerfile" "${REPO_ROOT}"
104+
105+
# Expose the OpenShift internal registry route (idempotent)
106+
echo " Exposing OpenShift internal registry..."
107+
oc patch configs.imageregistry.operator.openshift.io/cluster \
108+
--type=merge --patch '{"spec":{"defaultRoute":true}}' 2>/dev/null || true
109+
110+
# Wait briefly for the route to appear
111+
for i in $(seq 1 12); do
112+
REGISTRY_ROUTE=$(oc get route default-route -n openshift-image-registry \
113+
--template='{{ .spec.host }}' 2>/dev/null) && break
114+
sleep 5
115+
done
116+
117+
if [[ -z "${REGISTRY_ROUTE}" ]]; then
118+
echo "Error: Could not determine the OpenShift internal registry route."
119+
exit 1
120+
fi
121+
echo " Registry route: ${REGISTRY_ROUTE}"
122+
123+
# Login to the registry
124+
echo " Logging into registry..."
125+
podman login --tls-verify=false -u "$(oc whoami)" -p "$(oc whoami -t)" "${REGISTRY_ROUTE}"
126+
127+
# Ensure the namespace exists before pushing (registry needs the namespace/project)
128+
oc create namespace "${NAMESPACE}" 2>/dev/null || true
129+
130+
# Tag and push
131+
REMOTE_TAG="${REGISTRY_ROUTE}/${NAMESPACE}/${IMAGE_NAME}:latest"
132+
echo " Tagging ${LOCAL_TAG} -> ${REMOTE_TAG}"
133+
podman tag "${LOCAL_TAG}" "${REMOTE_TAG}"
134+
135+
echo " Pushing to internal registry..."
136+
podman push --tls-verify=false "${REMOTE_TAG}"
137+
138+
# The in-cluster image reference uses the internal service address
139+
IMAGE_REF="image-registry.openshift-image-registry.svc:5000/${NAMESPACE}/${IMAGE_NAME}:latest"
140+
echo " In-cluster image ref: ${IMAGE_REF}"
141+
142+
elif [[ "${MODE}" == "image" ]]; then
143+
echo ""
144+
echo "=== Using pre-built image ==="
145+
echo " Image: ${IMAGE_REF}"
146+
fi
147+
148+
# ---------------------------------------------------------------------------
149+
# Install operators
150+
# ---------------------------------------------------------------------------
151+
echo ""
152+
echo "=== Installing Open Data Hub operator ==="
153+
oc apply -f "${SCRIPT_DIR}/manifests/operators/opendatahub-operator.yaml"
154+
155+
echo "Waiting for ODH operator to be ready..."
156+
for i in $(seq 1 60); do
157+
if oc get csv -n openshift-operators 2>/dev/null | grep -q "opendatahub-operator.*Succeeded"; then
158+
echo " ODH operator is ready."
159+
break
160+
fi
161+
if [ "$i" -eq 60 ]; then
162+
echo "Error: Timed out waiting for ODH operator to install."
163+
exit 1
164+
fi
165+
sleep 10
166+
done
167+
168+
echo ""
169+
echo "=== Configuring DataScienceCluster ==="
170+
oc apply -f "${SCRIPT_DIR}/manifests/operators/datasciencecluster.yaml"
171+
172+
echo "Waiting for DataScienceCluster to be ready..."
173+
for i in $(seq 1 60); do
174+
if oc get dsc default-dsc -o jsonpath='{.status.phase}' 2>/dev/null | grep -q "Ready"; then
175+
echo " DataScienceCluster is ready."
176+
break
177+
fi
178+
if [ "$i" -eq 60 ]; then
179+
echo "Error: Timed out waiting for DataScienceCluster to become ready."
180+
exit 1
181+
fi
182+
sleep 10
183+
done
184+
185+
echo ""
186+
echo "=== Installing LlamaStack operator ==="
187+
oc apply -f https://raw.githubusercontent.com/llamastack/llama-stack-k8s-operator/main/release/operator.yaml
188+
189+
echo "Waiting for LlamaStack operator to be ready..."
190+
oc wait --for=condition=available deployment/llama-stack-k8s-operator-controller-manager \
191+
-n llama-stack-k8s-operator-system --timeout=120s
192+
193+
# ---------------------------------------------------------------------------
194+
# Create namespace and apply manifests
195+
# ---------------------------------------------------------------------------
196+
echo ""
197+
echo "=== Setting up ${NAMESPACE} namespace ==="
198+
oc create namespace "${NAMESPACE}" 2>/dev/null || true
199+
200+
echo "Applying configmaps and secrets..."
201+
oc apply -f "${SCRIPT_DIR}/manifests/configmap-and-secrets.yaml"
202+
203+
echo "Creating ragas-env secret from .env..."
204+
ENV_FILE="${REPO_ROOT}/.env"
205+
if [[ ! -f "${ENV_FILE}" ]]; then
206+
echo "Error: ${ENV_FILE} not found."
207+
exit 1
208+
fi
209+
oc create secret generic ragas-env -n "${NAMESPACE}" \
210+
--from-env-file="${ENV_FILE}" \
211+
--dry-run=client -o yaml | oc apply -f -
212+
213+
echo "Applying MinIO (results storage)..."
214+
oc apply -f "${SCRIPT_DIR}/manifests/minio.yaml"
215+
216+
echo "Applying Kubeflow pipeline resources (aws-credentials)..."
217+
oc apply -f "${SCRIPT_DIR}/manifests/kubeflow-pipeline-resources.yaml"
218+
219+
echo "Applying DataSciencePipelinesApplication..."
220+
oc apply -f "${SCRIPT_DIR}/manifests/datasciencepipelinesapplication.yaml"
221+
222+
echo "Applying LlamaStackDistribution CR (image: ${IMAGE_REF})..."
223+
sed "s|__LLAMA_STACK_IMAGE__|${IMAGE_REF}|g" \
224+
"${SCRIPT_DIR}/manifests/llama-stack-distribution.yaml" | oc apply -f -
225+
226+
# ---------------------------------------------------------------------------
227+
# Wait for MinIO (results storage)
228+
# ---------------------------------------------------------------------------
229+
echo ""
230+
echo "=== Waiting for MinIO ==="
231+
echo "Waiting for MinIO deployment..."
232+
oc wait --for=condition=available deployment/ragas-results-minio -n "${NAMESPACE}" --timeout=120s
233+
234+
echo "Waiting for MinIO bucket creation job..."
235+
oc wait --for=condition=complete job/minio-create-bucket -n "${NAMESPACE}" --timeout=120s
236+
237+
# ---------------------------------------------------------------------------
238+
# Wait for Data Science Pipelines
239+
# ---------------------------------------------------------------------------
240+
echo ""
241+
echo "=== Waiting for Data Science Pipelines ==="
242+
echo "Waiting for DSPA to be ready..."
243+
for i in $(seq 1 60); do
244+
DSPA_READY=$(oc get dspa ragas-e2e-dspa -n "${NAMESPACE}" \
245+
-o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null)
246+
if [ "${DSPA_READY}" = "True" ]; then
247+
echo " DSPA is ready."
248+
break
249+
fi
250+
if [ "$i" -eq 60 ]; then
251+
echo "Error: Timed out waiting for DSPA to become ready."
252+
exit 1
253+
fi
254+
sleep 10
255+
done
256+
257+
# ---------------------------------------------------------------------------
258+
# Wait for operator reconciliation and deployments
259+
# ---------------------------------------------------------------------------
260+
echo ""
261+
echo "=== Waiting for deployments ==="
262+
263+
echo "Waiting for operator to reconcile LlamaStackDistribution..."
264+
for i in $(seq 1 30); do
265+
if oc get deployment/lsd-ragas-test -n "${NAMESPACE}" &>/dev/null; then
266+
echo " Deployment created."
267+
break
268+
fi
269+
if [ "$i" -eq 30 ]; then
270+
echo "Error: Timed out waiting for deployment/lsd-ragas-test to be created by the operator."
271+
exit 1
272+
fi
273+
sleep 5
274+
done
275+
276+
echo "Waiting for llama-stack deployment..."
277+
oc wait --for=condition=available deployment/lsd-ragas-test -n "${NAMESPACE}" --timeout=300s
278+
279+
# ---------------------------------------------------------------------------
280+
# Summary
281+
# ---------------------------------------------------------------------------
282+
echo ""
283+
echo "========================================="
284+
echo " E2E deployment complete!"
285+
echo "========================================="
286+
echo ""
287+
echo " Namespace: ${NAMESPACE}"
288+
echo " Image: ${IMAGE_REF}"
289+
echo " Env file: ${ENV_FILE}"
290+
echo ""
291+
echo "Next steps:"
292+
echo " 1. Verify pods: oc get pods -n ${NAMESPACE}"
293+
echo " 2. Port forward: oc port-forward -n ${NAMESPACE} svc/lsd-ragas-test-service 8321:8321 &"
294+
echo " 3. Test API: curl http://localhost:8321/v1/models"
295+
echo ""
296+
echo "To tear down:"
297+
echo " ./teardown-e2e.sh"
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Default configuration for the e2e test environment.
2+
#
3+
# All values here can be overridden by the ragas-env secret (created from .env).
4+
# The ragas-env secret is loaded AFTER this ConfigMap, so .env values take precedence.
5+
#
6+
# The following keys MUST be provided in .env (they are left blank here):
7+
# LITELLM_API_URL, LITELLM_API_KEY
8+
#
9+
apiVersion: v1
10+
kind: ConfigMap
11+
metadata:
12+
name: kubeflow-ragas-config
13+
namespace: ragas-test
14+
data:
15+
# Inference
16+
INFERENCE_MODEL: "Mistral-Small-24B-W8A8"
17+
LITELLM_API_URL: ""
18+
LITELLM_API_KEY: ""
19+
20+
# Embedding (inline sentence-transformers, model downloaded at startup)
21+
EMBEDDING_MODEL: "nomic-ai/nomic-embed-text-v1.5"
22+
23+
# Kubeflow pipelines
24+
KUBEFLOW_LLAMA_STACK_URL: "http://lsd-ragas-test-service.ragas-test.svc.cluster.local:8321"
25+
KUBEFLOW_PIPELINES_ENDPOINT: "http://ds-pipeline-ragas-e2e-dspa.ragas-test.svc.cluster.local:8888"
26+
KUBEFLOW_PIPELINES_TOKEN: ""
27+
KUBEFLOW_NAMESPACE: "ragas-test"
28+
KUBEFLOW_BASE_IMAGE: "quay.io/sclorg/python-312-minimal-c9s:latest"
29+
30+
# S3 / MinIO results storage
31+
KUBEFLOW_RESULTS_S3_PREFIX: "s3://ragas-results/evaluations"
32+
KUBEFLOW_S3_CREDENTIALS_SECRET_NAME: "aws-credentials"
33+
RESULTS_S3_ENDPOINT: "http://ragas-results-minio.ragas-test.svc.cluster.local:9000"
34+
RESULTS_S3_PATH_STYLE: "true"
35+
AWS_ACCESS_KEY_ID: "minioadmin"
36+
AWS_SECRET_ACCESS_KEY: "minioadmin"
37+
AWS_DEFAULT_REGION: "us-east-1"

0 commit comments

Comments
 (0)