llama-stack-distribution/.github/workflows/vllm-cpu-container.yml at 8763b8411ffb60d22af429d3e32a17050588f91a · opendatahub-io/llama-stack-distribution · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
name: Build, test, and publish vLLM CPU Containers

on:
  pull_request:
    branches:
      - main
      - rhoai-v*
      - konflux-poc*
    types:
      - opened
      - synchronize
    paths:
      - 'vllm/Containerfile'
      - '.github/actions/**'
      - '.github/actions/workflows/vllm-cpu-container.yml'
  push:
    branches:
      - main
      - rhoai-v*
    paths:
      - 'vllm/Containerfile'
      - '.github/actions/**'
      - '.github/actions/workflows/vllm-cpu-container.yml'
  workflow_dispatch:
    inputs:
      inference_model:
        description: 'Inference model to preload onto vLLM image - default is Qwen/Qwen3-0.6B'
        type: string
      embedding_model:
        description: 'Embedding model to preload onto vLLM image - default is ibm-granite/granite-embedding-125m-english'
        type: string

env:
  REGISTRY: quay.io
  IMAGE_NAME: quay.io/opendatahub/vllm-cpu # tags for the image will be added dynamically

jobs:
  build-test-push:
    runs-on: ubuntu-latest
    env:
      INFERENCE_MODEL: ${{ github.event.inputs.inference_model || 'Qwen/Qwen3-0.6B' }}
      EMBEDDING_MODEL: ${{ github.event.inputs.embedding_model || 'ibm-granite/granite-embedding-125m-english' }}
    strategy:
      matrix:
        platform: [linux/amd64] # TODO: enable other arch once all pip packages are available.
    permissions:
      contents: read

    steps:
      - name: Checkout repository
        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1

      - name: Set image tag components
        run: |
          INFERENCE_TEMP="${INFERENCE_MODEL#*/}"
          EMBEDDING_TEMP="${EMBEDDING_MODEL#*/}"
          echo "INFERENCE_TAG=${INFERENCE_TEMP%-*}" >> "$GITHUB_ENV"
          echo "EMBEDDING_TAG=${EMBEDDING_TEMP%-*}" >> "$GITHUB_ENV"

      - name: Install uv
        uses: astral-sh/setup-uv@681c641aba71e4a1c380be3ab5e12ad51f415867 # v7.1.6
        with:
          python-version: 3.12

      - name: Set up QEMU
        uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0

      - name: Free disk space
        uses: ./.github/actions/free-disk-space

      - name: Build image
        id: build
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
        with:
          context: .
          file: vllm/Containerfile
          platforms: ${{ matrix.platform }}
          push: false
          tags: ${{ env.IMAGE_NAME }}:${{ env.INFERENCE_TAG }}-${{ env.EMBEDDING_TAG }}
          load: true  # needed to load for smoke test
          build-args: |
            INFERENCE_MODEL=${{ env.INFERENCE_MODEL }}
            EMBEDDING_MODEL=${{ env.EMBEDDING_MODEL }}

      - name: Setup vllm for inference test
        if: github.event_name != 'workflow_dispatch'
        id: vllm-inference
        uses: ./.github/actions/setup-vllm
        env:
          VLLM_IMAGE: ${{ env.IMAGE_NAME }}:${{ env.INFERENCE_TAG }}-${{ env.EMBEDDING_TAG }}
          VLLM_MODE: 'inference'

      - name: Setup vllm for embedding test
        if: github.event_name != 'workflow_dispatch'
        id: vllm-embedding
        uses: ./.github/actions/setup-vllm
        env:
          VLLM_IMAGE: ${{ env.IMAGE_NAME }}:${{ env.INFERENCE_TAG }}-${{ env.EMBEDDING_TAG }}
          VLLM_MODE: 'embedding'

      - name: Gather logs and debugging information
        if: always()
        shell: bash
        run: |
          # Create logs directory
          mkdir -p logs

          docker logs vllm-inference > logs/vllm-inference.log 2>&1 || echo "Failed to get vllm-inference logs" > logs/vllm-inference.log
          docker logs vllm-embedding > logs/vllm-embedding.log 2>&1 || echo "Failed to get vllm-embedding logs" > logs/vllm-embedding.log

          # Gather system information
          echo "=== System information ==="
          {
            echo "Disk usage:"
            df -h
            echo "Memory usage:"
            free -h
            echo "Docker images:"
            docker images
            echo "Docker containers:"
            docker ps -a
          } > logs/system-info.log 2>&1

      - name: Upload logs as artifacts
        if: always()
        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
        with:
          name: ci-logs-${{ github.sha }}
          path: logs/
          retention-days: 7

      - name: Cleanup vllm containers
        if: always()
        shell: bash
        run: |
          docker rm -f vllm-inference vllm-embedding >/dev/null 2>&1 || true

      - name: Log in to Quay.io
        id: login
        if: contains(fromJSON('["push", "workflow_dispatch"]'), github.event_name)
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ secrets.QUAY_USERNAME }}
          password: ${{ secrets.QUAY_PASSWORD }}

      - name: Publish image to Quay.io
        id: publish
        if: contains(fromJSON('["push", "workflow_dispatch"]'), github.event_name)
        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
        with:
          context: .
          file: vllm/Containerfile
          platforms: ${{ matrix.platform }}
          push: true
          tags: ${{ env.IMAGE_NAME }}:${{ env.INFERENCE_TAG }}-${{ env.EMBEDDING_TAG }}
          build-args: |
            INFERENCE_MODEL=${{ env.INFERENCE_MODEL }}
            EMBEDDING_MODEL=${{ env.EMBEDDING_MODEL }}