forked from sgl-project/sglang
-
Notifications
You must be signed in to change notification settings - Fork 0
182 lines (149 loc) · 6.03 KB
/
release-docker-amd-nightly.yml
File metadata and controls
182 lines (149 loc) · 6.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
name: Release Docker Images Nightly (AMD)
on:
workflow_dispatch:
schedule:
- cron: '0 12 * * *'
concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit). The workflow name is prepended to avoid conflicts between
# different workflows.
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true
jobs:
publish:
if: github.repository == 'sgl-project/sglang'
runs-on: amd-docker-scale
environment: 'prod'
strategy:
fail-fast: false
matrix:
gpu_arch: ['gfx942', 'gfx950']
build_type: ['all']
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required for git describe to find tags
- name: "Set Date"
run: |
echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV
- name: Get version from latest tag
id: version
run: |
# Get the latest version tag sorted by version number (e.g., v0.5.7 -> 0.5.7)
VERSION=$(git tag -l 'v[0-9]*' --sort=-v:refname | head -1 | sed 's/^v//')
if [ -z "$VERSION" ]; then
echo "::error::Could not determine version from git tags"
exit 1
fi
# Get short commit hash of current HEAD
COMMIT_HASH=$(git rev-parse --short HEAD)
# Compose pretend version for setuptools_scm: e.g., 0.5.8.dev20260129+g1a2b3c4
PRETEND_VERSION="${VERSION}.dev${{ env.DATE }}+g${COMMIT_HASH}"
echo "version=${VERSION}" >> $GITHUB_OUTPUT
echo "pretend_version=${PRETEND_VERSION}" >> $GITHUB_OUTPUT
echo "Detected version: ${VERSION}"
echo "Pretend version for pip: ${PRETEND_VERSION}"
- name: Login to Docker Hub (AMD)
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_AMD_USERNAME }}
password: ${{ secrets.DOCKERHUB_AMD_TOKEN }}
- name: Build and Push to rocm/sgl-dev
run: |
version=${{ steps.version.outputs.version }}
pretend_version=${{ steps.version.outputs.pretend_version }}
echo "Version: ${version}"
echo "Pretend version: ${pretend_version}"
if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then
rocm_tag="rocm700-mi30x"
elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then
rocm_tag="rocm700-mi35x"
else
echo "Unsupported gfx arch"
exit 1
fi
tag=v${version}-${rocm_tag}
echo "IMAGE_TAG=${tag}-${{ env.DATE }}" >> $GITHUB_ENV
docker build . -f docker/rocm.Dockerfile --build-arg SGL_BRANCH=${{ github.ref_name }} --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }} --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic --build-arg SETUPTOOLS_SCM_PRETEND_VERSION=${pretend_version} -t rocm/sgl-dev:${tag}-${{ env.DATE }} --no-cache
docker push rocm/sgl-dev:${tag}-${{ env.DATE }}
- name: Login to Docker Hub (lmsys)
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Push to lmsysorg/sglang-rocm
run: |
docker tag rocm/sgl-dev:${{ env.IMAGE_TAG }} lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }}
docker push lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }}
# Temporarily disable docker cache seeding until performant storage is in place
cache:
if: false
# if: always() && github.repository == 'sgl-project/sglang'
runs-on: linux-mi300-gpu-1
environment: 'prod'
needs: publish
strategy:
fail-fast: false
matrix:
gpu_arch: ['gfx942']
build_type: ['all']
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required for git describe to find tags
- name: "Set Date"
run: |
echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV
- name: Get version from latest tag
id: version
run: |
# Get the latest version tag sorted by version number (e.g., v0.5.7 -> 0.5.7)
VERSION=$(git tag -l 'v[0-9]*' --sort=-v:refname | head -1 | sed 's/^v//')
if [ -z "$VERSION" ]; then
echo "::error::Could not determine version from git tags"
exit 1
fi
echo "version=${VERSION}" >> $GITHUB_OUTPUT
echo "Detected version: ${VERSION}"
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_AMD_USERNAME }}
password: ${{ secrets.DOCKERHUB_AMD_TOKEN }}
- name: Pull and Save Docker Image to Cache
run: |
set -euxo pipefail
version=${{ steps.version.outputs.version }}
echo "Version: ${version}"
if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then
rocm_tag="rocm700-mi30x"
else
echo "Unsupported gfx arch"
exit 1
fi
tag=v${version}-${rocm_tag}
if [ "${{ matrix.build_type }}" = "all" ]; then
tag_suffix=""
else
echo "Unsupported build type"
exit 1
fi
image="rocm/sgl-dev:${tag}-${{ env.DATE }}${tag_suffix}"
# Determine target cache file name based on ROCm variant
if [[ "${rocm_tag}" == rocm700* ]]; then
final_path="/home/runner/sgl-data/docker/image-700.tar"
else
echo "Unexpected ROCm tag: ${rocm_tag}"
exit 1
fi
tmp_path="${final_path}.tmp"
echo "Pulling image: ${image}"
docker pull "${image}"
echo "Saving to temp file: ${tmp_path}"
docker save "${image}" -o "${tmp_path}"
echo "Moving to final path: ${final_path}"
mv -f "${tmp_path}" "${final_path}"
echo "Cache populated successfully at ${final_path}"