ci: update distro container action with smoke test (#16)

nathan-weinberg · derekhiggins · web-flow · commit 4d50ebebfe5a · 2025-09-11T10:18:35.000-04:00
* ci: update distro container action with smoke test

also add CI linter to pre-commit config

Assisted-by: coderabbitai
Signed-off-by: Nathan Weinberg &lt;nweinber@redhat.com&gt;

* chore: bump trustyai_fms to 0.2.1

to mirror d/s change and unblock CI

Signed-off-by: Nathan Weinberg &lt;nweinber@redhat.com&gt;

* chore: bump lmeval to 0.2.4

to mirror d/s change and unblock CI

Signed-off-by: Nathan Weinberg &lt;nweinber@redhat.com&gt;

* ci: add vllm action and integration to image test

Co-authored-by: Derek Higgins &lt;derekh@redhat.com&gt;
Signed-off-by: Nathan Weinberg &lt;nweinber@redhat.com&gt;

* ci: standardize concurrency rules

Signed-off-by: Nathan Weinberg &lt;nweinber@redhat.com&gt;

* fix: add env var to disable K8S usage for LM Eval

Signed-off-by: Nathan Weinberg &lt;nweinber@redhat.com&gt;

* refactor: move smoke tests into a seperate shell script

also add shell linter to pre-commit config

Assisted-by: coderabbitai
Signed-off-by: Nathan Weinberg &lt;nweinber@redhat.com&gt;

* fix: modify VLLM_URL for containerized environments

Signed-off-by: Nathan Weinberg &lt;nweinber@redhat.com&gt;

---------

Signed-off-by: Nathan Weinberg &lt;nweinber@redhat.com&gt;
Co-authored-by: Derek Higgins &lt;derekh@redhat.com&gt;
diff --git a/.github/actions/setup-vllm/action.yml b/.github/actions/setup-vllm/action.yml
@@ -0,0 +1,28 @@
+name: Setup VLLM
+description: Start VLLM
+runs:
+  using: "composite"
+  steps:
+    - name: Start VLLM
+      shell: bash
+      run: |
+        # Start vllm container
+        docker run -d \
+          --name vllm \
+          -p 8000:8000 \
+          --privileged=true \
+          --net=host \
+          quay.io/higginsd/vllm-cpu:65393ee064 \
+          --host 0.0.0.0 \
+          --port 8000 \
+          --enable-auto-tool-choice \
+          --tool-call-parser llama3_json \
+          --model /root/.cache/Llama-3.2-1B-Instruct \
+          --served-model-name meta-llama/Llama-3.2-1B-Instruct
+
+          # Wait for vllm to be ready
+          echo "Waiting for vllm to be ready..."
+          timeout 900 bash -c 'until curl -fsS http://localhost:8000/health >/dev/null; do
+            echo "Waiting for vllm..."
+            sleep 5
+          done'
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -6,7 +6,7 @@ on:
     branches: [main]
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
   cancel-in-progress: true
 
 jobs:
diff --git a/.github/workflows/redhat-distro-container-build.yml b/.github/workflows/redhat-distro-container-build.yml
diff --git a/.github/workflows/redhat-distro-container.yml b/.github/workflows/redhat-distro-container.yml
@@ -0,0 +1,86 @@
+name: Build, test, and publish Red Hat Distribution Containers
+
+on:
+  pull_request:
+    branches:
+      - main
+      - rhoai-v*
+    types:
+      - opened
+      - synchronize
+  push:
+    branches:
+      - main
+      - rhoai-v*
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+env:
+  REGISTRY: quay.io
+  IMAGE_NAME: quay.io/opendatahub/llama-stack # tags for the image will be added dynamically
+
+jobs:
+  build-test-push:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        platform: [linux/amd64] # TODO: enable other arch once all pip packages are available.
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
+
+      - name: Build image
+        id: build
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
+        with:
+          context: .
+          file: distribution/Containerfile
+          platforms: ${{ matrix.platform }}
+          push: false
+          tags: ${{ env.IMAGE_NAME }}:${{ github.sha }}
+          load: true  # needed to load for smoke test
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Setup vllm for image test
+        id: vllm
+        uses: ./.github/actions/setup-vllm
+
+      - name: Smoke test image
+        id: smoke-test
+        shell: bash
+        env:
+          INFERENCE_MODEL: meta-llama/Llama-3.2-1B-Instruct
+          VLLM_URL: http://localhost:8000/v1
+        run: ./tests/smoke.sh
+
+      - name: Log in to Quay.io
+        id: login
+        if: github.event_name == 'push'
+        uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ secrets.QUAY_USERNAME }}
+          password: ${{ secrets.QUAY_PASSWORD }}
+
+      - name: Publish image to Quay.io
+        id: publish
+        if: github.event_name == 'push'
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
+        with:
+          context: .
+          file: distribution/Containerfile
+          platforms: ${{ matrix.platform }}
+          push: true
+          tags: ${{ env.IMAGE_NAME }}:${{ github.sha }}${{ github.ref == 'refs/heads/main' && format(',{0}:latest', env.IMAGE_NAME) || '' }}  # only update 'latest' tag if push is to the 'main' branch
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/.github/workflows/semantic-pr.yml b/.github/workflows/semantic-pr.yml
@@ -9,7 +9,7 @@ on:
       - synchronize
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
   cancel-in-progress: true
 
 permissions:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -35,6 +35,16 @@ repos:
         args: [ --fix ]
     -   id: ruff-format
 
+-   repo: https://github.com/rhysd/actionlint
+    rev: v1.7.7
+    hooks:
+      - id: actionlint
+
+-   repo: https://github.com/koalaman/shellcheck-precommit
+    rev: v0.11.0
+    hooks:
+    -   id: shellcheck
+
 -   repo: local
     hooks:
       - id: pkg-gen
diff --git a/distribution/Containerfile b/distribution/Containerfile
@@ -14,8 +14,8 @@ RUN pip install \
     fire \
     httpx \
     kubernetes \
-    llama_stack_provider_lmeval==0.1.7 \
-    llama_stack_provider_trustyai_fms==0.1.2 \
+    llama_stack_provider_lmeval==0.2.4 \
+    llama_stack_provider_trustyai_fms==0.2.1 \
     matplotlib \
     mcp>=1.8.1 \
     nltk \
diff --git a/distribution/providers.d/remote/eval/trustyai_lmeval.yaml b/distribution/providers.d/remote/eval/trustyai_lmeval.yaml
@@ -1,6 +1,6 @@
 adapter:
   adapter_type: trustyai_lmeval
-  pip_packages: ["kubernetes", "llama_stack_provider_lmeval==0.1.7"]
+  pip_packages: ["kubernetes", "llama_stack_provider_lmeval==0.2.4"]
   config_class: llama_stack_provider_lmeval.config.LMEvalEvalProviderConfig
   module: llama_stack_provider_lmeval
 api_dependencies: ["inference"]
diff --git a/distribution/providers.d/remote/safety/trustyai_fms.yaml b/distribution/providers.d/remote/safety/trustyai_fms.yaml
@@ -1,6 +1,6 @@
 adapter:
   adapter_type: trustyai_fms
-  pip_packages: ["llama_stack_provider_trustyai_fms==0.1.2"]
+  pip_packages: ["llama_stack_provider_trustyai_fms==0.2.1"]
   config_class: llama_stack_provider_trustyai_fms.config.FMSSafetyProviderConfig
   module: llama_stack_provider_trustyai_fms
 api_dependencies: ["safety"]
diff --git a/tests/smoke.sh b/tests/smoke.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+set -uo pipefail
+
+function start_and_wait_for_llama_stack_container {
+  # Start llama stack
+  docker run \
+    -d \
+    --pull=never \
+    --net=host \
+    -p 8321:8321 \
+    --env INFERENCE_MODEL="$INFERENCE_MODEL" \
+    --env VLLM_URL="$VLLM_URL" \
+    --env TRUSTYAI_LMEVAL_USE_K8S=False \
+    --env TRUSTYAI_LM_EVAL_NAMESPACE=dummy \
+    --name llama-stack \
+    "$IMAGE_NAME:$GITHUB_SHA"
+  echo "Started Llama Stack container..."
+
+  # Wait for llama stack to be ready by doing a health check
+  echo "Waiting for Llama Stack server..."
+  for i in {1..60}; do
+    echo "Attempt $i to connect to Llama Stack..."
+    resp=$(curl -fsS http://127.0.0.1:8321/v1/health)
+    if [ "$resp" == '{"status":"OK"}' ]; then
+      echo "Llama Stack server is up!"
+      return
+    fi
+    sleep 1
+  done
+  echo "Llama Stack server failed to start :("
+  echo "Container logs:"
+  docker logs llama-stack || true
+  exit 1
+}
+
+function test_model_list {
+  echo "===> Looking for model $INFERENCE_MODEL..."
+  resp=$(curl -fsS http://127.0.0.1:8321/v1/models)
+  if echo "$resp" | grep -q "$INFERENCE_MODEL"; then
+    echo "Model $INFERENCE_MODEL was found :)"
+    return
+  else
+    echo "Model $INFERENCE_MODEL was not found :("
+    echo "Container logs:"
+    docker logs llama-stack || true
+    exit 1
+  fi
+}
+
+function test_model_openai_inference {
+  echo "===> Attempting to chat with model $INFERENCE_MODEL..."
+  resp=$(curl -fsS http://127.0.0.1:8321/v1/openai/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"$INFERENCE_MODEL\",\"messages\": [{\"role\": \"user\", \"content\": \"What color is grass?\"}], \"max_tokens\": 10, \"temperature\": 0.0}")
+  if echo "$resp" | grep -q "green"; then
+    echo "===> Inference is working :)"
+    return
+  else
+    echo "===> Inference is not working :("
+    echo "Container logs:"
+    docker logs llama-stack || true
+    exit 1
+  fi
+}
+
+main() {
+  echo "===> Starting smoke test..."
+  start_and_wait_for_llama_stack_container
+  test_model_list
+  test_model_openai_inference
+  echo "===> Smoke test completed successfully!"
+}
+
+trap 'docker rm -f -v llama-stack >/dev/null 2>&1 || true' EXIT
+main "$@"
+exit 0