Merge remote-tracking branch 'upstream/main' into rhoai-3.2

m-rafeeq · m-rafeeq · commit 5bef09cd0ee7 · 2025-12-17T00:49:04.000Z
diff --git a/.github/workflows/redhat-distro-container.yml b/.github/workflows/redhat-distro-container.yml
@@ -43,13 +43,22 @@ jobs:
   build-test-push:
     runs-on: ubuntu-latest
     env:
-      INFERENCE_MODEL: Qwen/Qwen3-0.6B
-      EMBEDDING_MODEL: ibm-granite/granite-embedding-125m-english
+      VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
+      VERTEX_AI_LOCATION: us-central1
+      GCP_WORKLOAD_IDENTITY_PROVIDER: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
+      # Deployment configuration - Llama Stack will support both vLLM and Vertex AI
+      # Model names include provider prefixes for consistency
+      VLLM_INFERENCE_MODEL: vllm-inference/Qwen/Qwen3-0.6B
+      VERTEX_AI_INFERENCE_MODEL: vertexai/google/gemini-2.0-flash
+      EMBEDDING_MODEL: sentence-transformers/ibm-granite/granite-embedding-125m-english
       VLLM_URL: http://localhost:8000/v1
       LLAMA_STACK_COMMIT_SHA: ${{ github.event.inputs.llama_stack_commit_sha || 'main' }}
     strategy:
       matrix:
         platform: [linux/amd64] # TODO: enable other arch once all pip packages are available.
+    permissions:
+      id-token: write # for Google Cloud authentication
+      contents: read
 
     steps:
       - name: Checkout repository
@@ -97,6 +106,14 @@ jobs:
           cache-from: type=gha
           cache-to: type=gha,mode=max
 
+      - name: Authenticate to Google Cloud (Vertex)
+        if: github.event_name != 'workflow_dispatch'
+        uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093 # v3
+        with:
+          project_id: ${{ env.VERTEX_AI_PROJECT }}
+          workload_identity_provider: ${{ env.GCP_WORKLOAD_IDENTITY_PROVIDER }}
+          create_credentials_file: true
+
       - name: Setup vllm for image test
         if: github.event_name != 'workflow_dispatch'
         id: vllm
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Open Data Hub Llama Stack Distribution
 
-![Build](https://github.com/opendatahub-io/llama-stack-distribution/actions/workflows/redhat-distro-container.yml/badge.svg?branch=main)
+[![Build](https://github.com/opendatahub-io/llama-stack-distribution/actions/workflows/redhat-distro-container.yml/badge.svg?branch=main)](https://github.com/opendatahub-io/llama-stack-distribution/actions/workflows/redhat-distro-container.yml)
 
 This directory contains the necessary files to build an Open Data Hub-compatible container image for [Llama Stack](https://github.com/llamastack/llama-stack).
 
diff --git a/tests/run_integration_tests.sh b/tests/run_integration_tests.sh
@@ -4,11 +4,12 @@ set -exuo pipefail
 
 # Configuration
 WORK_DIR="/tmp/llama-stack-integration-tests"
-INFERENCE_MODEL="${INFERENCE_MODEL:-Qwen/Qwen3-0.6B}"
-EMBEDDING_MODEL="${EMBEDDING_MODEL:-ibm-granite/granite-embedding-125m-english}"
-
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
+# Source common test utilities
+# shellcheck source=/dev/null
+source "$SCRIPT_DIR/test_utils.sh"
+
 # Get repository and version dynamically from Containerfile
 # Look for git URL format: git+https://github.com/*/llama-stack.git@vVERSION or @VERSION
 CONTAINERFILE="$SCRIPT_DIR/../distribution/Containerfile"
@@ -53,7 +54,9 @@ function clone_llama_stack() {
 }
 
 function run_integration_tests() {
-    echo "Running integration tests..."
+    validate_model_parameter "$1"
+    local model="$1"
+    echo "Running integration tests for model $model..."
 
     cd "$WORK_DIR"
 
@@ -76,8 +79,8 @@ function run_integration_tests() {
     uv pip install llama-stack-client
     uv run pytest -s -v tests/integration/inference/ \
         --stack-config=server:"$STACK_CONFIG_PATH" \
-        --text-model=vllm-inference/"$INFERENCE_MODEL" \
-        --embedding-model=sentence-transformers/"$EMBEDDING_MODEL" \
+        --text-model="$model" \
+        --embedding-model="$EMBEDDING_MODEL" \
         -k "not ($SKIP_TESTS)"
 }
 
@@ -87,11 +90,14 @@ function main() {
     echo "  LLAMA_STACK_VERSION: $LLAMA_STACK_VERSION"
     echo "  LLAMA_STACK_REPO: $LLAMA_STACK_REPO"
     echo "  WORK_DIR: $WORK_DIR"
-    echo "  INFERENCE_MODEL: $INFERENCE_MODEL"
+    echo "  VLLM_INFERENCE_MODEL: $VLLM_INFERENCE_MODEL"
+    echo "  VERTEX_AI_INFERENCE_MODEL: $VERTEX_AI_INFERENCE_MODEL"
+    echo "  EMBEDDING_MODEL: $EMBEDDING_MODEL"
 
     clone_llama_stack
-    run_integration_tests
-
+    for model in "$VLLM_INFERENCE_MODEL" "$VERTEX_AI_INFERENCE_MODEL"; do
+        run_integration_tests "$model"
+    done
     echo "Integration tests completed successfully!"
 }
 
diff --git a/tests/smoke.sh b/tests/smoke.sh
@@ -2,24 +2,35 @@
 
 set -uo pipefail
 
+# Source common test utilities
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=/dev/null
+source "$SCRIPT_DIR/test_utils.sh"
+
+LLAMA_STACK_BASE_URL="http://127.0.0.1:8321"
+
 function start_and_wait_for_llama_stack_container {
   # Start llama stack
   docker run \
     -d \
     --pull=never \
     --net=host \
     -p 8321:8321 \
-    --env INFERENCE_MODEL="$INFERENCE_MODEL" \
+    --env INFERENCE_MODEL="$VLLM_INFERENCE_MODEL" \
     --env EMBEDDING_MODEL="$EMBEDDING_MODEL" \
     --env VLLM_URL="$VLLM_URL" \
     --env ENABLE_SENTENCE_TRANSFORMERS=True \
     --env EMBEDDING_PROVIDER=sentence-transformers \
     --env TRUSTYAI_LMEVAL_USE_K8S=False \
+    --env VERTEX_AI_PROJECT="$VERTEX_AI_PROJECT" \
+    --env VERTEX_AI_LOCATION="$VERTEX_AI_LOCATION" \
+    --env GOOGLE_APPLICATION_CREDENTIALS="/run/secrets/gcp-credentials" \
     --env POSTGRES_HOST="${POSTGRES_HOST:-localhost}" \
     --env POSTGRES_PORT="${POSTGRES_PORT:-5432}" \
     --env POSTGRES_DB="${POSTGRES_DB:-llamastack}" \
     --env POSTGRES_USER="${POSTGRES_USER:-llamastack}" \
     --env POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-llamastack}" \
+    --volume "$GOOGLE_APPLICATION_CREDENTIALS:/run/secrets/gcp-credentials:ro" \
     --name llama-stack \
     "$IMAGE_NAME:$GITHUB_SHA"
   echo "Started Llama Stack container..."
@@ -28,7 +39,7 @@ function start_and_wait_for_llama_stack_container {
   echo "Waiting for Llama Stack server..."
   for i in {1..60}; do
     echo "Attempt $i to connect to Llama Stack..."
-    resp=$(curl -fsS http://127.0.0.1:8321/v1/health)
+    resp=$(curl -fsS $LLAMA_STACK_BASE_URL/v1/health)
     if [ "$resp" == '{"status":"OK"}' ]; then
       echo "Llama Stack server is up!"
       return
@@ -42,36 +53,37 @@ function start_and_wait_for_llama_stack_container {
 }
 
 function test_model_list {
-  for model in "$INFERENCE_MODEL" "$EMBEDDING_MODEL"; do
-    echo "===> Looking for model $model..."
-    resp=$(curl -fsS http://127.0.0.1:8321/v1/models)
+  validate_model_parameter "$1"
+  local model="$1"
+  echo "===> Looking for model $model..."
+  resp=$(curl -fsS $LLAMA_STACK_BASE_URL/v1/models)
+  echo "Response: $resp"
+  if echo "$resp" | grep -q "$model"; then
+    echo "Model $model was found :)"
+  else
+    echo "Model $model was not found :("
     echo "Response: $resp"
-    if echo "$resp" | grep -q "$model"; then
-      echo "Model $model was found :)"
-      continue
-    else
-      echo "Model $model was not found :("
-      echo "Response: $resp"
-      echo "Container logs:"
-      docker logs llama-stack || true
-      return 1
-    fi
-  done
+    echo "Container logs:"
+    docker logs llama-stack || true
+    return 1
+  fi
   return 0
 }
 
 function test_model_openai_inference {
-  echo "===> Attempting to chat with model $INFERENCE_MODEL..."
-  resp=$(curl -fsS http://127.0.0.1:8321/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"vllm-inference/$INFERENCE_MODEL\",\"messages\": [{\"role\": \"user\", \"content\": \"What color is grass?\"}], \"max_tokens\": 128, \"temperature\": 0.0}")
+  validate_model_parameter "$1"
+  local model="$1"
+  echo "===> Attempting to chat with model $model..."
+  resp=$(curl -fsS $LLAMA_STACK_BASE_URL/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"$model\",\"messages\": [{\"role\": \"user\", \"content\": \"What color is grass?\"}], \"max_tokens\": 128, \"temperature\": 0.0}")
   if echo "$resp" | grep -q "green"; then
     echo "===> Inference is working :)"
-    return
+    return 0
   else
     echo "===> Inference is not working :("
     echo "Response: $resp"
     echo "Container logs:"
     docker logs llama-stack || true
-    exit 1
+    return 1
   fi
 }
 
@@ -137,20 +149,43 @@ function test_postgres_populated {
 main() {
   echo "===> Starting smoke test..."
   start_and_wait_for_llama_stack_container
-  if ! test_model_list; then
-    echo "Model list test failed :("
-    exit 1
-  fi
-  test_model_openai_inference
+
+  # Track failures
+  failed_checks=()
+
+  echo "===> Testing model list for all models..."
+  for model in "$VLLM_INFERENCE_MODEL" "$VERTEX_AI_INFERENCE_MODEL" "$EMBEDDING_MODEL"; do
+    if ! test_model_list "$model"; then
+      failed_checks+=("model_list:$model")
+    fi
+  done
+
+  echo "===> Testing inference for all models..."
+  for model in "$VLLM_INFERENCE_MODEL" "$VERTEX_AI_INFERENCE_MODEL"; do
+    if ! test_model_openai_inference "$model"; then
+      failed_checks+=("inference:$model")
+    fi
+  done
+
+  # Verify PostgreSQL tables and data
   if ! test_postgres_tables_exist; then
-    echo "PostgreSQL tables verification failed :("
-    exit 1
+    failed_checks+=("postgres:tables")
   fi
   if ! test_postgres_populated; then
-    echo "PostgreSQL data verification failed :("
+    failed_checks+=("postgres:data")
+  fi
+
+  # Report results
+  if [ ${#failed_checks[@]} -eq 0 ]; then
+    echo "===> Smoke test completed successfully!"
+    return 0
+  else
+    echo "===> Smoke test failed for the following:"
+    for failure in "${failed_checks[@]}"; do
+      echo "  - $failure"
+    done
     exit 1
   fi
-  echo "===> Smoke test completed successfully!"
 }
 
 main "$@"
diff --git a/tests/test_utils.sh b/tests/test_utils.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+# Common utility functions for test scripts
+
+function validate_model_parameter() {
+    # Check if model is provided
+    if [ -z "$1" ]; then
+        echo "Error: No model provided"
+        return 1
+    fi
+}