opendatahub-io · Artemon-line · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025
diff --git a/.github/workflows/live-tests.yml b/.github/workflows/live-tests.yml
@@ -0,0 +1,180 @@
+name: Live CI tests
+
+on:
+  schedule:
+    - cron: '0 2 * * 1' # Every Monday at 2 AM UTC
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event_name }}
+  cancel-in-progress: false
+
+env:
+  REGISTRY: quay.io
+  IMAGE_NAME: quay.io/opendatahub/llama-stack
+  LLAMA_STACK_TEST_INFERENCE_MODE: live
+
+jobs:
+  live-tests:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      id-token: write
+      pull-requests: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+
+      - name: Check required secrets
+        run: |
+          if [ -z "${{ secrets.VERTEX_AI_PROJECT }}" ] || [ -z "${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}" ]; then
+            echo "Error: VERTEX_AI_PROJECT and GCP_WORKLOAD_IDENTITY_PROVIDER secrets must be set"
+            exit 1
+          fi
+
+      - name: Authenticate to Google Cloud
+        uses: google-github-actions/auth@v2
+        with:
+          workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
+
+      - name: Set up Cloud SDK
+        uses: google-github-actions/setup-gcloud@v2
+
+      - name: Configure gcloud
+        run: |
+          gcloud config set project ${{ secrets.VERTEX_AI_PROJECT }}
+          gcloud config set compute/region us-central1
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
+        with:
+          python-version: 3.12
+          version: 0.7.6
+
+      - name: Build image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
+        with:
+          context: .
+          file: distribution/Containerfile
+          platforms: linux/amd64
+          push: false
+          tags: ${{ env.IMAGE_NAME }}:vertex-test-${{ github.sha }}
+          load: true
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Start Llama Stack container
+        run: |
+          docker run -d --net=host -p 8321:8321 \
+            -v "$HOME/.config/gcloud:/root/.config/gcloud:ro" \
+            -e VERTEX_AI_PROJECT="${{ secrets.VERTEX_AI_PROJECT }}" \
+            -e VERTEX_AI_LOCATION="us-central1" \
+            -e GOOGLE_APPLICATION_CREDENTIALS="" \
+            --name llama-stack-vertex \
+            "${{ env.IMAGE_NAME }}:vertex-test-${{ github.sha }}"
+
+          for i in {1..60}; do
+            curl -fsS http://127.0.0.1:8321/v1/health 2>/dev/null | grep -q '"status":"OK"' && break
+            [ "$i" -eq 60 ] && { docker logs llama-stack-vertex; docker rm -f llama-stack-vertex; exit 1; }
+            sleep 1
+          done
+
+      - name: Run integration tests
+        id: run-tests
+        env:
+          VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
+          VERTEX_AI_LOCATION: us-central1
+          PROVIDER_MODEL: vertexai/gemini-1.5-flash
+        shell: bash
+        run: |
+          ./tests/run_integration_tests.sh
+
+          # Extract recordings
+          WORK_DIR="/tmp/llama-stack-integration-tests"
+          RECORDINGS_DIR="$WORK_DIR/tests/integration/recordings"
+
+          [ ! -d "$RECORDINGS_DIR" ] && exit 0
+
+          # Find Vertex AI recordings
+          PROVIDER_RECORDINGS=$(find "$RECORDINGS_DIR" -type f \( -name "*vertex*" -o -name "*vertexai*" -o -path "*vertex*" \) 2>/dev/null || true)
+          [ -z "$PROVIDER_RECORDINGS" ] && exit 0
+
+          # Copy to repository
+          REPO_RECORDINGS_DIR="tests/integration/recordings"
+          mkdir -p "$REPO_RECORDINGS_DIR"
+          echo "$PROVIDER_RECORDINGS" | while IFS= read -r recording; do
+            [ -n "$recording" ] && [ -f "$recording" ] && {
+              relative_path="${recording#"$RECORDINGS_DIR"/}"
+              mkdir -p "$REPO_RECORDINGS_DIR/$(dirname "$relative_path")"
+              cp "$recording" "$REPO_RECORDINGS_DIR/$relative_path"
+            }
+          done
+      - name: Cleanup
+        if: always()
+        run: docker rm -f llama-stack-vertex || true
+
+  create-pr:
+    needs: live-tests
+    if: always() && needs.live-tests.result == 'success' && github.event_name != 'pull_request'
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+
+      - name: Extract all recordings and create PR
+        env:
+          LLAMA_STACK_REPO: https://github.com/llamastack/llama-stack.git
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          REPO_RECORDINGS_DIR="tests/integration/recordings"
+          [ ! -d "$REPO_RECORDINGS_DIR" ] && exit 0
+
+          # Check if there are any recordings
+          [ -z "$(find "$REPO_RECORDINGS_DIR" -type f)" ] && exit 0
+
+          # Clone llama-stack and update recordings
+          LLAMA_STACK_CLONE_DIR=$(mktemp -d)
+          git clone "https://x-access-token:$GITHUB_TOKEN@github.com/llamastack/llama-stack.git" "$LLAMA_STACK_CLONE_DIR"
+          cd "$LLAMA_STACK_CLONE_DIR"
+
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+          BRANCH_NAME="update-recordings-$(date +%Y%m%d-%H%M%S)-${{ github.run_id }}"
+          git checkout -b "$BRANCH_NAME"
+
+          TARGET_RECORDINGS_DIR="$LLAMA_STACK_CLONE_DIR/tests/integration/recordings"
+          mkdir -p "$TARGET_RECORDINGS_DIR"
+
+          # Copy all recordings
+          find "$REPO_RECORDINGS_DIR" -type f | while IFS= read -r recording; do
+            relative_path="${recording#"$REPO_RECORDINGS_DIR"/}"
+            mkdir -p "$(dirname "$TARGET_RECORDINGS_DIR/$relative_path")"
+            cp "$recording" "$TARGET_RECORDINGS_DIR/$relative_path"
+          done
+
+          git diff --quiet && exit 0
+
+          git add tests/integration/recordings/
+          git commit -m "chore: update test recordings from live tests
+
+          Updated recordings from successful live CI tests run on $(date -u +%Y-%m-%d).
+          Generated by: ${{ github.workflow }} workflow
+          Run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+
+          git push origin "$BRANCH_NAME" || exit 1
+
+          gh auth setup-git
+          gh pr create \
+            --repo llamastack/llama-stack \
+            --title "chore: update test recordings" \
+            --body "This PR updates test recordings from successful live CI tests.
+
+          **Source**: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+          **Date**: $(date -u +%Y-%m-%d)" \
+            --head "$BRANCH_NAME" \
+            --base main || true
@@ -40,7 +40,55 @@ env:
   IMAGE_NAME: quay.io/opendatahub/llama-stack # tags for the image will be added dynamically
 
 jobs:
+  check-vertex-recordings:
+    name: Check for Vertex AI recordings
+    runs-on: ubuntu-latest
+    if: github.event_name != 'workflow_dispatch'
+    outputs:
+      vertex_recordings_exist: ${{ steps.detect-vertex.outputs.vertex_recordings_exist }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+
+      - name: Detect Vertex AI recordings
+        id: detect-vertex
+        shell: bash
+        run: |
+          set -euo pipefail
+          WORK_DIR="/tmp/llama-stack-integration-tests"
+          source scripts/extract-llama-stack-info.sh
+
+          vertex_recordings_exist=false
+
+          if [ ! -d "$WORK_DIR" ]; then
+            git clone "$LLAMA_STACK_REPO" "$WORK_DIR"
+          fi
+
+          cd "$WORK_DIR"
+          git fetch origin
+          git checkout "v$LLAMA_STACK_VERSION" || git checkout "$LLAMA_STACK_VERSION"
+
+          RECORDINGS_DIR="$WORK_DIR/tests/integration/recordings"
+          if [ -d "$RECORDINGS_DIR" ]; then
+            VERTEX_RECORDINGS=$(find "$RECORDINGS_DIR" -type f \( -name "*vertex*" -o -name "*vertexai*" -o -path "*vertex*" \) 2>/dev/null | head -1 || true)
+            if [ -n "$VERTEX_RECORDINGS" ]; then
+              echo "Found Vertex AI recordings in $RECORDINGS_DIR"
+              vertex_recordings_exist=true
+            else
+              echo "No Vertex AI recordings found in $RECORDINGS_DIR"
+            fi
+          else
+            echo "Recordings directory not found at $RECORDINGS_DIR"
+          fi
+
+          echo "vertex_recordings_exist=$vertex_recordings_exist" >> "$GITHUB_OUTPUT"
+          if [ "$vertex_recordings_exist" != "true" ]; then
+            echo "No recordings detected; Vertex AI tests will be skipped." >> "$GITHUB_STEP_SUMMARY"
+          fi
+
   build-test-push:
+    needs:
+      - check-vertex-recordings
     runs-on: ubuntu-latest
     env:
       INFERENCE_MODEL: Qwen/Qwen3-0.6B
@@ -108,12 +156,29 @@ jobs:
         shell: bash
         run: ./tests/smoke.sh
 
-      - name: Integration tests
+      - name: Integration tests (vLLM)
         if: github.event_name != 'workflow_dispatch'
         id: integration-tests
         shell: bash
         run: ./tests/run_integration_tests.sh
 
+      - name: Integration tests (Vertex AI)
+        if: github.event_name != 'workflow_dispatch' && needs.check-vertex-recordings.outputs.vertex_recordings_exist == 'true'
+        id: integration-tests-vertex
+        env:
+          # Use dummy values when using recordings - actual API calls won't be made
+          VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT || 'dummy-project' }}
+          VERTEX_AI_LOCATION: ${{ secrets.VERTEX_AI_LOCATION || 'us-central1' }}
+          # Don't set LLAMA_STACK_TEST_INFERENCE_MODE - this will use recordings
+        shell: bash
+        run: |
+          export VERTEX_AI_PROJECT="${VERTEX_AI_PROJECT:-dummy-project}"
+          export VERTEX_AI_LOCATION="${VERTEX_AI_LOCATION:-us-central1}"
+          # Explicitly unset live mode to ensure recordings are used
+          unset LLAMA_STACK_TEST_INFERENCE_MODE
+          echo "Running Vertex AI tests with recordings (no live API calls)"
+          ./tests/run_integration_tests.sh
+
       - name: Gather logs and debugging information
         if: always()
         shell: bash

@@ -0,0 +1,108 @@
+# Live Tests Guide
+
+This guide explains how to run live tests and generate recordings for supported providers.
+
+## Supported Providers
+
+- **vllm** - VLLM inference server (default, requires local container)
+- **vertex** - Google Cloud Vertex AI
+
+The script auto-detects the provider from environment variables. If no provider is specified, it defaults to vllm.
+
+## Prerequisites
+
+- **Podman** - For building and running containers
+- **gcloud CLI** - For GCP authentication (Vertex AI only)
+
+## Local Testing
+
+### Using VLLM (Default)
+
+```bash
+# Start the vllm container
+podman run -d \
+  --name vllm \
+  --privileged=true \
+  --net=host \
+  quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \
+  --host 0.0.0.0 \
+  --port 8000 \
+  --enable-auto-tool-choice \
+  --tool-call-parser hermes \
+  --model /root/.cache/Qwen3-0.6B \
+  --served-model-name Qwen/Qwen3-0.6B \
+  --max-model-len 8192
+
+# Wait for vllm to be ready
+timeout 900 bash -c 'until curl -fsS http://localhost:8000/health >/dev/null; do
+  echo "Waiting for vllm..."
+  sleep 5
+done'
+
+# Run the script
+./scripts/run-live-tests-local.sh
+
+# Clean up
+podman rm -f vllm
+```
+
+### Using Vertex AI
+
+```bash
+# Set GCP project and authenticate
+export VERTEX_AI_PROJECT=your-gcp-project-id
+gcloud auth application-default login
+
+# Run the script
+./scripts/run-live-tests-local.sh
+```
+
+The script will:
+- Build the container image
+- Start the Llama Stack container with the selected provider
+- Run integration tests in live mode
+- Extract recordings to `tests/integration/recordings/`
+- Clean up the container
+
+**Note**: The script automatically creates a podman secret from your GCP credentials if needed.
+
+## CI/CD Workflow
+
+The workflow `.github/workflows/live-tests.yml` automatically runs live tests and updates recordings.
+
+### Triggering
+
+- **Scheduled**: Runs every Monday at 2 AM UTC
+- **Manual**: `gh workflow run live-tests.yml` or via GitHub UI
+
+### Required Secrets
+
+- `VERTEX_AI_PROJECT` - GCP project ID
+- `GCP_WORKLOAD_IDENTITY_PROVIDER` - For OIDC authentication
+
+The workflow uses `us-central1` as the GCP region.
+
+### How it Works
+
+1. Authenticates to GCP using OIDC (Workload Identity Federation)
+2. Builds and starts the Llama Stack container with Vertex AI provider
+3. Runs integration tests in `live` mode to generate recordings
+4. Creates a PR to `llamastack/llama-stack` with updated recordings (if tests pass)
+
+## Recordings
+
+Recordings are generated during live tests and saved to `tests/integration/recordings/`. Look for files with "vertex"/"vertexai" (Vertex AI) or "vllm"/"vllm-inference" (VLLM) in their names or paths.
+
+- **Local**: Recordings are automatically extracted to `tests/integration/recordings/` after running the script
+- **CI**: Recordings are pushed to `llamastack/llama-stack` via PR
+
+To include recordings in your PR, commit and push the changes from `tests/integration/recordings/`.
+
+## Adding New Providers
+
+To add a new provider:
+
+1. Update `scripts/run-live-tests-local.sh` - Add provider detection, environment variables, and recording patterns
+2. Update `tests/run_integration_tests.sh` - Add provider detection and model selection
+3. Update `.github/workflows/live-tests.yml` - Add authentication and container setup if needed
+4. Update `distribution/run.yaml` - Ensure provider configuration is present