Skip to content

Commit 5bef09c

Browse files
committed
Merge remote-tracking branch 'upstream/main' into rhoai-3.2
2 parents bcd4ca5 + 5cf2852 commit 5bef09c

File tree

5 files changed

+109
-41
lines changed

5 files changed

+109
-41
lines changed

.github/workflows/redhat-distro-container.yml

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,22 @@ jobs:
4343
build-test-push:
4444
runs-on: ubuntu-latest
4545
env:
46-
INFERENCE_MODEL: Qwen/Qwen3-0.6B
47-
EMBEDDING_MODEL: ibm-granite/granite-embedding-125m-english
46+
VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
47+
VERTEX_AI_LOCATION: us-central1
48+
GCP_WORKLOAD_IDENTITY_PROVIDER: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
49+
# Deployment configuration - Llama Stack will support both vLLM and Vertex AI
50+
# Model names include provider prefixes for consistency
51+
VLLM_INFERENCE_MODEL: vllm-inference/Qwen/Qwen3-0.6B
52+
VERTEX_AI_INFERENCE_MODEL: vertexai/google/gemini-2.0-flash
53+
EMBEDDING_MODEL: sentence-transformers/ibm-granite/granite-embedding-125m-english
4854
VLLM_URL: http://localhost:8000/v1
4955
LLAMA_STACK_COMMIT_SHA: ${{ github.event.inputs.llama_stack_commit_sha || 'main' }}
5056
strategy:
5157
matrix:
5258
platform: [linux/amd64] # TODO: enable other arch once all pip packages are available.
59+
permissions:
60+
id-token: write # for Google Cloud authentication
61+
contents: read
5362

5463
steps:
5564
- name: Checkout repository
@@ -97,6 +106,14 @@ jobs:
97106
cache-from: type=gha
98107
cache-to: type=gha,mode=max
99108

109+
- name: Authenticate to Google Cloud (Vertex)
110+
if: github.event_name != 'workflow_dispatch'
111+
uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093 # v3
112+
with:
113+
project_id: ${{ env.VERTEX_AI_PROJECT }}
114+
workload_identity_provider: ${{ env.GCP_WORKLOAD_IDENTITY_PROVIDER }}
115+
create_credentials_file: true
116+
100117
- name: Setup vllm for image test
101118
if: github.event_name != 'workflow_dispatch'
102119
id: vllm

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Open Data Hub Llama Stack Distribution
22

3-
![Build](https://github.com/opendatahub-io/llama-stack-distribution/actions/workflows/redhat-distro-container.yml/badge.svg?branch=main)
3+
[![Build](https://github.com/opendatahub-io/llama-stack-distribution/actions/workflows/redhat-distro-container.yml/badge.svg?branch=main)](https://github.com/opendatahub-io/llama-stack-distribution/actions/workflows/redhat-distro-container.yml)
44

55
This directory contains the necessary files to build an Open Data Hub-compatible container image for [Llama Stack](https://github.com/llamastack/llama-stack).
66

tests/run_integration_tests.sh

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@ set -exuo pipefail
44

55
# Configuration
66
WORK_DIR="/tmp/llama-stack-integration-tests"
7-
INFERENCE_MODEL="${INFERENCE_MODEL:-Qwen/Qwen3-0.6B}"
8-
EMBEDDING_MODEL="${EMBEDDING_MODEL:-ibm-granite/granite-embedding-125m-english}"
9-
107
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
118

9+
# Source common test utilities
10+
# shellcheck source=/dev/null
11+
source "$SCRIPT_DIR/test_utils.sh"
12+
1213
# Get repository and version dynamically from Containerfile
1314
# Look for git URL format: git+https://github.com/*/llama-stack.git@vVERSION or @VERSION
1415
CONTAINERFILE="$SCRIPT_DIR/../distribution/Containerfile"
@@ -53,7 +54,9 @@ function clone_llama_stack() {
5354
}
5455

5556
function run_integration_tests() {
56-
echo "Running integration tests..."
57+
validate_model_parameter "$1"
58+
local model="$1"
59+
echo "Running integration tests for model $model..."
5760

5861
cd "$WORK_DIR"
5962

@@ -76,8 +79,8 @@ function run_integration_tests() {
7679
uv pip install llama-stack-client
7780
uv run pytest -s -v tests/integration/inference/ \
7881
--stack-config=server:"$STACK_CONFIG_PATH" \
79-
--text-model=vllm-inference/"$INFERENCE_MODEL" \
80-
--embedding-model=sentence-transformers/"$EMBEDDING_MODEL" \
82+
--text-model="$model" \
83+
--embedding-model="$EMBEDDING_MODEL" \
8184
-k "not ($SKIP_TESTS)"
8285
}
8386

@@ -87,11 +90,14 @@ function main() {
8790
echo " LLAMA_STACK_VERSION: $LLAMA_STACK_VERSION"
8891
echo " LLAMA_STACK_REPO: $LLAMA_STACK_REPO"
8992
echo " WORK_DIR: $WORK_DIR"
90-
echo " INFERENCE_MODEL: $INFERENCE_MODEL"
93+
echo " VLLM_INFERENCE_MODEL: $VLLM_INFERENCE_MODEL"
94+
echo " VERTEX_AI_INFERENCE_MODEL: $VERTEX_AI_INFERENCE_MODEL"
95+
echo " EMBEDDING_MODEL: $EMBEDDING_MODEL"
9196

9297
clone_llama_stack
93-
run_integration_tests
94-
98+
for model in "$VLLM_INFERENCE_MODEL" "$VERTEX_AI_INFERENCE_MODEL"; do
99+
run_integration_tests "$model"
100+
done
95101
echo "Integration tests completed successfully!"
96102
}
97103

tests/smoke.sh

Lines changed: 64 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,35 @@
22

33
set -uo pipefail
44

5+
# Source common test utilities
6+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
7+
# shellcheck source=/dev/null
8+
source "$SCRIPT_DIR/test_utils.sh"
9+
10+
LLAMA_STACK_BASE_URL="http://127.0.0.1:8321"
11+
512
function start_and_wait_for_llama_stack_container {
613
# Start llama stack
714
docker run \
815
-d \
916
--pull=never \
1017
--net=host \
1118
-p 8321:8321 \
12-
--env INFERENCE_MODEL="$INFERENCE_MODEL" \
19+
--env INFERENCE_MODEL="$VLLM_INFERENCE_MODEL" \
1320
--env EMBEDDING_MODEL="$EMBEDDING_MODEL" \
1421
--env VLLM_URL="$VLLM_URL" \
1522
--env ENABLE_SENTENCE_TRANSFORMERS=True \
1623
--env EMBEDDING_PROVIDER=sentence-transformers \
1724
--env TRUSTYAI_LMEVAL_USE_K8S=False \
25+
--env VERTEX_AI_PROJECT="$VERTEX_AI_PROJECT" \
26+
--env VERTEX_AI_LOCATION="$VERTEX_AI_LOCATION" \
27+
--env GOOGLE_APPLICATION_CREDENTIALS="/run/secrets/gcp-credentials" \
1828
--env POSTGRES_HOST="${POSTGRES_HOST:-localhost}" \
1929
--env POSTGRES_PORT="${POSTGRES_PORT:-5432}" \
2030
--env POSTGRES_DB="${POSTGRES_DB:-llamastack}" \
2131
--env POSTGRES_USER="${POSTGRES_USER:-llamastack}" \
2232
--env POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-llamastack}" \
33+
--volume "$GOOGLE_APPLICATION_CREDENTIALS:/run/secrets/gcp-credentials:ro" \
2334
--name llama-stack \
2435
"$IMAGE_NAME:$GITHUB_SHA"
2536
echo "Started Llama Stack container..."
@@ -28,7 +39,7 @@ function start_and_wait_for_llama_stack_container {
2839
echo "Waiting for Llama Stack server..."
2940
for i in {1..60}; do
3041
echo "Attempt $i to connect to Llama Stack..."
31-
resp=$(curl -fsS http://127.0.0.1:8321/v1/health)
42+
resp=$(curl -fsS $LLAMA_STACK_BASE_URL/v1/health)
3243
if [ "$resp" == '{"status":"OK"}' ]; then
3344
echo "Llama Stack server is up!"
3445
return
@@ -42,36 +53,37 @@ function start_and_wait_for_llama_stack_container {
4253
}
4354

4455
function test_model_list {
45-
for model in "$INFERENCE_MODEL" "$EMBEDDING_MODEL"; do
46-
echo "===> Looking for model $model..."
47-
resp=$(curl -fsS http://127.0.0.1:8321/v1/models)
56+
validate_model_parameter "$1"
57+
local model="$1"
58+
echo "===> Looking for model $model..."
59+
resp=$(curl -fsS $LLAMA_STACK_BASE_URL/v1/models)
60+
echo "Response: $resp"
61+
if echo "$resp" | grep -q "$model"; then
62+
echo "Model $model was found :)"
63+
else
64+
echo "Model $model was not found :("
4865
echo "Response: $resp"
49-
if echo "$resp" | grep -q "$model"; then
50-
echo "Model $model was found :)"
51-
continue
52-
else
53-
echo "Model $model was not found :("
54-
echo "Response: $resp"
55-
echo "Container logs:"
56-
docker logs llama-stack || true
57-
return 1
58-
fi
59-
done
66+
echo "Container logs:"
67+
docker logs llama-stack || true
68+
return 1
69+
fi
6070
return 0
6171
}
6272

6373
function test_model_openai_inference {
64-
echo "===> Attempting to chat with model $INFERENCE_MODEL..."
65-
resp=$(curl -fsS http://127.0.0.1:8321/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"vllm-inference/$INFERENCE_MODEL\",\"messages\": [{\"role\": \"user\", \"content\": \"What color is grass?\"}], \"max_tokens\": 128, \"temperature\": 0.0}")
74+
validate_model_parameter "$1"
75+
local model="$1"
76+
echo "===> Attempting to chat with model $model..."
77+
resp=$(curl -fsS $LLAMA_STACK_BASE_URL/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"$model\",\"messages\": [{\"role\": \"user\", \"content\": \"What color is grass?\"}], \"max_tokens\": 128, \"temperature\": 0.0}")
6678
if echo "$resp" | grep -q "green"; then
6779
echo "===> Inference is working :)"
68-
return
80+
return 0
6981
else
7082
echo "===> Inference is not working :("
7183
echo "Response: $resp"
7284
echo "Container logs:"
7385
docker logs llama-stack || true
74-
exit 1
86+
return 1
7587
fi
7688
}
7789

@@ -137,20 +149,43 @@ function test_postgres_populated {
137149
main() {
138150
echo "===> Starting smoke test..."
139151
start_and_wait_for_llama_stack_container
140-
if ! test_model_list; then
141-
echo "Model list test failed :("
142-
exit 1
143-
fi
144-
test_model_openai_inference
152+
153+
# Track failures
154+
failed_checks=()
155+
156+
echo "===> Testing model list for all models..."
157+
for model in "$VLLM_INFERENCE_MODEL" "$VERTEX_AI_INFERENCE_MODEL" "$EMBEDDING_MODEL"; do
158+
if ! test_model_list "$model"; then
159+
failed_checks+=("model_list:$model")
160+
fi
161+
done
162+
163+
echo "===> Testing inference for all models..."
164+
for model in "$VLLM_INFERENCE_MODEL" "$VERTEX_AI_INFERENCE_MODEL"; do
165+
if ! test_model_openai_inference "$model"; then
166+
failed_checks+=("inference:$model")
167+
fi
168+
done
169+
170+
# Verify PostgreSQL tables and data
145171
if ! test_postgres_tables_exist; then
146-
echo "PostgreSQL tables verification failed :("
147-
exit 1
172+
failed_checks+=("postgres:tables")
148173
fi
149174
if ! test_postgres_populated; then
150-
echo "PostgreSQL data verification failed :("
175+
failed_checks+=("postgres:data")
176+
fi
177+
178+
# Report results
179+
if [ ${#failed_checks[@]} -eq 0 ]; then
180+
echo "===> Smoke test completed successfully!"
181+
return 0
182+
else
183+
echo "===> Smoke test failed for the following:"
184+
for failure in "${failed_checks[@]}"; do
185+
echo " - $failure"
186+
done
151187
exit 1
152188
fi
153-
echo "===> Smoke test completed successfully!"
154189
}
155190

156191
main "$@"

tests/test_utils.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/usr/bin/env bash
2+
# Common utility functions for test scripts
3+
4+
function validate_model_parameter() {
5+
# Check if model is provided
6+
if [ -z "$1" ]; then
7+
echo "Error: No model provided"
8+
return 1
9+
fi
10+
}

0 commit comments

Comments
 (0)