Skip to content

feat: make inference provider optional #137

feat: make inference provider optional

feat: make inference provider optional #137

name: Build, test, and publish Red Hat Distribution Containers
on:
pull_request:
branches:
- main
- rhoai-v*
types:
- opened
- synchronize
push:
branches:
- main
- rhoai-v*
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
REGISTRY: quay.io
IMAGE_NAME: quay.io/opendatahub/llama-stack # tags for the image will be added dynamically
jobs:
build-test-push:
runs-on: ubuntu-latest
env:
INFERENCE_MODEL: meta-llama/Llama-3.2-1B-Instruct
VLLM_URL: http://localhost:8000/v1
strategy:
matrix:
platform: [linux/amd64] # TODO: enable other arch once all pip packages are available.
steps:
- name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- name: Install uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
with:
python-version: 3.12
version: 0.7.6
- name: Set up QEMU
uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
- name: Build image
id: build
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
with:
context: .
file: distribution/Containerfile
platforms: ${{ matrix.platform }}
push: false
tags: ${{ env.IMAGE_NAME }}:${{ github.sha }}
load: true # needed to load for smoke test
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Setup vllm for image test
id: vllm
uses: ./.github/actions/setup-vllm
- name: Start and smoke test LLS distro image
id: smoke-test
shell: bash
run: ./tests/smoke.sh
- name: Integration tests
id: integration-tests
shell: bash
run: ./tests/run_integration_tests.sh
# - name: Gather logs and debugging information
# if: always()
# shell: bash
# run: |
# # Create logs directory
# mkdir -p logs
# docker logs llama-stack > logs/llama-stack.log 2>&1 || echo "Failed to get llama-stack logs" > logs/llama-stack.log
# docker logs vllm > logs/vllm.log 2>&1 || echo "Failed to get vllm logs" > logs/vllm.log
# # Gather system information
# echo "=== System information ==="
# {
# echo "Disk usage:"
# df -h
# echo "Memory usage:"
# free -h
# echo "Docker images:"
# docker images
# echo "Docker containers:"
# docker ps -a
# } > logs/system-info.log 2>&1
# # Gather integration test logs if they exist
# echo "=== Integration test artifacts ==="
# if [ -d "/tmp/llama-stack-integration-tests" ]; then
# find /tmp/llama-stack-integration-tests -name "*.log" -o -name "pytest.log" -o -name "*.out" 2>/dev/null | while read -r file; do
# cp "$file" "logs/$(basename "$file")" || true
# done
# fi
# - name: Upload logs as artifacts
# if: always()
# uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.7.0
# with:
# name: ci-logs-${{ github.sha }}
# path: logs/
# retention-days: 7
# - name: cleanup
# if: always()
# shell: bash
# run: |
# docker rm -f vllm llama-stack
# - name: Log in to Quay.io
# id: login
# if: github.event_name == 'push'
# uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
# with:
# registry: ${{ env.REGISTRY }}
# username: ${{ secrets.QUAY_USERNAME }}
# password: ${{ secrets.QUAY_PASSWORD }}
# - name: Publish image to Quay.io
# id: publish
# if: github.event_name == 'push'
# uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
# with:
# context: .
# file: distribution/Containerfile
# platforms: ${{ matrix.platform }}
# push: true
# tags: ${{ env.IMAGE_NAME }}:${{ github.sha }}${{ github.ref == 'refs/heads/main' && format(',{0}:latest', env.IMAGE_NAME) || '' }} # only update 'latest' tag if push is to the 'main' branch
# cache-from: type=gha
# cache-to: type=gha,mode=max
- name: Setup tmate session
if: ${{ always() }}
uses: mxschmitt/action-tmate@v3