opendatahub-io · Artemon-line · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025
@@ -0,0 +1,126 @@
+name: Setup Llama Stack
+description: Start Llama Stack container and wait for it to be ready
+inputs:
+  image_name:
+    description: 'Container image name'
+    required: true
+  image_tag:
+    description: 'Container image tag'
+    required: true
+  inference_model:
+    description: 'Inference model name'
+    required: true
+  embedding_model:
+    description: 'Embedding model name'
+    required: true
+  vllm_url:
+    description: 'VLLM URL (for vLLM provider)'
+    required: false
+    default: ''
+  vertex_ai_project:
+    description: 'Vertex AI project ID (for Vertex AI provider)'
+    required: false
+    default: ''
+  vertex_ai_location:
+    description: 'Vertex AI location (for Vertex AI provider)'
+    required: false
+    default: 'us-central1'
+runs:
+  using: "composite"
+  steps:
+    - name: Start Llama Stack container
+      shell: bash
+      env:
+        IMAGE_NAME: ${{ inputs.image_name }}
+        IMAGE_TAG: ${{ inputs.image_tag }}
+        INFERENCE_MODEL: ${{ inputs.inference_model }}
+        EMBEDDING_MODEL: ${{ inputs.embedding_model }}
+        VLLM_URL: ${{ inputs.vllm_url }}
+        VERTEX_AI_PROJECT: ${{ inputs.vertex_ai_project }}
+        VERTEX_AI_LOCATION: ${{ inputs.vertex_ai_location }}
+      run: |
+        # Start llama stack container
+        # Build docker run command with conditional environment variables
+        DOCKER_ENV_ARGS=(
+          --env INFERENCE_MODEL="$INFERENCE_MODEL"
+          --env EMBEDDING_MODEL="$EMBEDDING_MODEL"
+          --env TRUSTYAI_LMEVAL_USE_K8S=False
+        )
+
+        # Add VLLM_URL only if defined and non-empty
+        if [ -n "$VLLM_URL" ]; then
+          DOCKER_ENV_ARGS+=(--env VLLM_URL="$VLLM_URL")
+        fi
+
+        # Add VERTEX_AI_PROJECT only if defined and non-empty
+        if [ -n "$VERTEX_AI_PROJECT" ]; then
+          DOCKER_ENV_ARGS+=(--env VERTEX_AI_PROJECT="$VERTEX_AI_PROJECT")
+        fi
+
+        # Add VERTEX_AI_LOCATION only if defined and non-empty
+        if [ -n "$VERTEX_AI_LOCATION" ]; then
+          DOCKER_ENV_ARGS+=(--env VERTEX_AI_LOCATION="$VERTEX_AI_LOCATION")
+        fi
+
+        # Mount GCP credentials if they exist (for Vertex AI)
+        # Mount to /run/secrets/gcp-credentials to match local Podman setup
+        # Use GOOGLE_APPLICATION_CREDENTIALS env var if set (from google-github-actions/auth with create_credentials_file: true)
+        # Otherwise fall back to standard location
+        DOCKER_VOLUME_ARGS=()
+        if [ -n "$GOOGLE_APPLICATION_CREDENTIALS" ] && [ -f "$GOOGLE_APPLICATION_CREDENTIALS" ]; then
+          # Use the credentials file path from GOOGLE_APPLICATION_CREDENTIALS env var
+          GCP_CREDENTIALS_FILE="$GOOGLE_APPLICATION_CREDENTIALS"
+        elif [ -f "$HOME/.config/gcloud/application_default_credentials.json" ]; then
+          # Fall back to standard location
+          GCP_CREDENTIALS_FILE="$HOME/.config/gcloud/application_default_credentials.json"
+        else
+          GCP_CREDENTIALS_FILE=""
+        fi
+
+        if [ -n "$GCP_CREDENTIALS_FILE" ] && [ -f "$GCP_CREDENTIALS_FILE" ]; then
+          # Mount credentials file to /run/secrets/gcp-credentials (matching local Podman setup)
+          DOCKER_VOLUME_ARGS=(
+            -v "$GCP_CREDENTIALS_FILE:/run/secrets/gcp-credentials:ro"
+          )
+          DOCKER_ENV_ARGS+=(--env GOOGLE_APPLICATION_CREDENTIALS="/run/secrets/gcp-credentials")
+          echo "Mounting GCP credentials to /run/secrets/gcp-credentials for Vertex AI support"
+          echo "Credentials file: $GCP_CREDENTIALS_FILE"
+        else
+          echo "Warning: GCP credentials file not found"
+          echo "Checked GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-not set}"
+          echo "Checked standard location: $HOME/.config/gcloud/application_default_credentials.json"
+          echo "Vertex AI authentication may fail"
+        fi
+
+        docker run \
+          -d \
+          --pull=never \
+          --net=host \
+          -p 8321:8321 \
+          "${DOCKER_VOLUME_ARGS[@]}" \
+          "${DOCKER_ENV_ARGS[@]}" \
+          --name llama-stack \
+          "$IMAGE_NAME:$IMAGE_TAG"
+        echo "Started Llama Stack container..."
+
+    - name: Wait for Llama Stack to be ready
+      shell: bash
+      run: |
+        # Wait for llama stack to be ready by doing a health check
+        echo "Waiting for Llama Stack server..."
+        for i in {1..60}; do
+          echo "Attempt $i/60 to connect to Llama Stack..."
+          if resp=$(curl -fsS http://127.0.0.1:8321/v1/health 2>/dev/null); then
+            if [ "$resp" == '{"status":"OK"}' ]; then
+              echo "Llama Stack server is up!"
+              exit 0
+            fi
+          else
+            echo "Connection failed, retrying in 1 second..."
+          fi
+          sleep 1
+        done
+        echo "Llama Stack server failed to start after 60 attempts :("
+        echo "Container logs:"
+        docker logs llama-stack || true
+        exit 1
@@ -1,12 +1,12 @@
 name: Setup VLLM
-description: Start VLLM
+description: Start VLLM container and wait for it to be ready
 runs:
   using: "composite"
   steps:
     - name: Start VLLM
       shell: bash
       run: |
-        # Start vllm container
+        # Start vllm container in background (non-blocking)
         docker run -d \
           --name vllm \
           --privileged=true \
@@ -19,10 +19,20 @@ runs:
           --model /root/.cache/Qwen3-0.6B \
           --served-model-name Qwen/Qwen3-0.6B \
           --max-model-len 8192
+        echo "vLLM container started (waiting for it to be ready)..."
 
-          # Wait for vllm to be ready
-          echo "Waiting for vllm to be ready..."
-          timeout 900 bash -c 'until curl -fsS http://localhost:8000/health >/dev/null; do
-            echo "Waiting for vllm..."
-            sleep 5
-          done'
+    - name: Wait for VLLM to be ready
+      shell: bash
+      run: |
+        echo "Validating vLLM is ready..."
+        for i in {1..60}; do
+          if curl -fsS http://localhost:8000/health >/dev/null 2>&1; then
+            echo "vLLM is ready!"
+            exit 0
+          fi
+          echo "Waiting for vLLM... ($i/60)"
+          sleep 2
+        done
+        echo "vLLM failed to start after 120 seconds"
+        docker logs vllm || true
+        exit 1