Skip to content

Commit b008acd

Browse files
authored
Merge branch 'main' into wan22_online
Signed-off-by: Samit <285365963@qq.com>
2 parents ff8b89d + 285d71b commit b008acd

File tree

220 files changed

+10619
-3644
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

220 files changed

+10619
-3644
lines changed

.buildkite/pipeline.yml

Lines changed: 110 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,19 @@ steps:
2020
- label: "Simple Unit Test"
2121
depends_on: image-build
2222
commands:
23-
- pytest -v -s tests/entrypoints/
24-
- pytest -v -s tests/diffusion/cache/
25-
- pytest -v -s tests/model_executor/models/qwen2_5_omni/test_audio_length.py
26-
- pytest -v -s tests/worker/
23+
- |
24+
pytest -v -s \
25+
tests/entrypoints/ \
26+
tests/diffusion/cache/ \
27+
tests/diffusion/lora/ \
28+
tests/model_executor/models/qwen2_5_omni/test_audio_length.py \
29+
tests/worker/ \
30+
tests/distributed/omni_connectors/test_kv_flow.py \
31+
--cov=vllm_omni \
32+
--cov-branch \
33+
--cov-report=term-missing \
34+
--cov-report=html \
35+
--cov-report=xml
2736
agents:
2837
queue: "gpu_1_queue"
2938
plugins:
@@ -75,6 +84,7 @@ steps:
7584
depends_on: image-build
7685
commands:
7786
- pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py
87+
- pytest -s -v tests/e2e/offline_inference/test_diffusion_layerwise_offload.py
7888
agents:
7989
queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
8090
plugins:
@@ -175,34 +185,13 @@ steps:
175185
volumes:
176186
- "/fsx/hf_cache:/fsx/hf_cache"
177187

178-
- label: "Omni Model Test"
179-
timeout_in_minutes: 15
180-
depends_on: image-build
181-
commands:
182-
- export VLLM_LOGGING_LEVEL=DEBUG
183-
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
184-
- pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py
185-
agents:
186-
queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
187-
plugins:
188-
- docker#v5.2.0:
189-
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
190-
always-pull: true
191-
propagate-environment: true
192-
environment:
193-
- "HF_HOME=/fsx/hf_cache"
194-
volumes:
195-
- "/fsx/hf_cache:/fsx/hf_cache"
196188

197-
- label: "Omni Model Test with H100"
198-
timeout_in_minutes: 30
189+
- label: "Benchmark Test"
190+
timeout_in_minutes: 15
199191
depends_on: image-build
200192
commands:
201193
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
202-
- export VLLM_TEST_CLEAN_GPU_MEMORY="1"
203-
- pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py
204-
- pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py
205-
- pytest -s -v tests/e2e/online_serving/test_async_omni.py
194+
- pytest -s -v tests/benchmarks/test_serve_cli.py
206195
agents:
207196
queue: "mithril-h100-pool"
208197
plugins:
@@ -232,12 +221,69 @@ steps:
232221
path: /mnt/hf-cache
233222
type: DirectoryOrCreate
234223

224+
- label: "Omni Model Test"
225+
timeout_in_minutes: 15
226+
depends_on: image-build
227+
commands:
228+
- export VLLM_LOGGING_LEVEL=DEBUG
229+
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
230+
- pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py
231+
agents:
232+
queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
233+
plugins:
234+
- docker#v5.2.0:
235+
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
236+
always-pull: true
237+
propagate-environment: true
238+
environment:
239+
- "HF_HOME=/fsx/hf_cache"
240+
volumes:
241+
- "/fsx/hf_cache:/fsx/hf_cache"
242+
243+
# - label: "Omni Model Test with H100"
244+
# timeout_in_minutes: 30
245+
# depends_on: image-build
246+
# commands:
247+
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
248+
# - export VLLM_TEST_CLEAN_GPU_MEMORY="1"
249+
# - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py
250+
# - pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py
251+
# - pytest -s -v tests/e2e/online_serving/test_async_omni.py
252+
# agents:
253+
# queue: "mithril-h100-pool"
254+
# plugins:
255+
# - kubernetes:
256+
# podSpec:
257+
# containers:
258+
# - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
259+
# resources:
260+
# limits:
261+
# nvidia.com/gpu: 2
262+
# volumeMounts:
263+
# - name: devshm
264+
# mountPath: /dev/shm
265+
# - name: hf-cache
266+
# mountPath: /root/.cache/huggingface
267+
# env:
268+
# - name: HF_HOME
269+
# value: /root/.cache/huggingface
270+
# nodeSelector:
271+
# node.kubernetes.io/instance-type: gpu-h100-sxm
272+
# volumes:
273+
# - name: devshm
274+
# emptyDir:
275+
# medium: Memory
276+
# - name: hf-cache
277+
# hostPath:
278+
# path: /mnt/hf-cache
279+
# type: DirectoryOrCreate
280+
235281
- label: "Diffusion Image Edit Test with H100 (1 GPU)"
236282
timeout_in_minutes: 20
237283
depends_on: image-build
238284
commands:
239285
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
240-
- pytest -s -v tests/e2e/online_serving/test_i2i_multi_image_input.py
286+
- pytest -s -v tests/e2e/online_serving/test_image_gen_edit.py
241287
agents:
242288
queue: "mithril-h100-pool"
243289
plugins:
@@ -266,3 +312,38 @@ steps:
266312
hostPath:
267313
path: /mnt/hf-cache
268314
type: DirectoryOrCreate
315+
316+
# - label: "Bagel Text2Img Model Test with H100"
317+
# timeout_in_minutes: 30
318+
# depends_on: image-build
319+
# commands:
320+
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
321+
# - pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py
322+
# agents:
323+
# queue: "mithril-h100-pool"
324+
# plugins:
325+
# - kubernetes:
326+
# podSpec:
327+
# containers:
328+
# - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
329+
# resources:
330+
# limits:
331+
# nvidia.com/gpu: 1
332+
# volumeMounts:
333+
# - name: devshm
334+
# mountPath: /dev/shm
335+
# - name: hf-cache
336+
# mountPath: /root/.cache/huggingface
337+
# env:
338+
# - name: HF_HOME
339+
# value: /root/.cache/huggingface
340+
# nodeSelector:
341+
# node.kubernetes.io/instance-type: gpu-h100-sxm
342+
# volumes:
343+
# - name: devshm
344+
# emptyDir:
345+
# medium: Memory
346+
# - name: hf-cache
347+
# hostPath:
348+
# path: /mnt/hf-cache
349+
# type: DirectoryOrCreate

.buildkite/scripts/hardware_ci/run-amd-test.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ if [[ $commands == *"--shard-id="* ]]; then
116116
--shm-size=16gb \
117117
--group-add "$render_gid" \
118118
--rm \
119+
-e MIOPEN_DEBUG_CONV_DIRECT=0 \
120+
-e MIOPEN_DEBUG_CONV_GEMM=0 \
121+
-e VLLM_ROCM_USE_AITER=1 \
119122
-e HIP_VISIBLE_DEVICES="${GPU}" \
120123
-e HF_TOKEN \
121124
-e AWS_ACCESS_KEY_ID \
@@ -148,6 +151,9 @@ else
148151
--shm-size=16gb \
149152
--group-add "$render_gid" \
150153
--rm \
154+
-e MIOPEN_DEBUG_CONV_DIRECT=0 \
155+
-e MIOPEN_DEBUG_CONV_GEMM=0 \
156+
-e VLLM_ROCM_USE_AITER=1 \
151157
-e HF_TOKEN \
152158
-e AWS_ACCESS_KEY_ID \
153159
-e AWS_SECRET_ACCESS_KEY \

.buildkite/scripts/simple_test.sh

Lines changed: 0 additions & 58 deletions
This file was deleted.

.buildkite/test-amd.yaml

Lines changed: 44 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,32 @@ steps:
88
grade: Blocking
99
commands:
1010
- export GPU_ARCHS=gfx942
11-
- export MIOPEN_DEBUG_CONV_DIRECT=0
12-
- export MIOPEN_DEBUG_CONV_GEMM=0
13-
- export VLLM_ROCM_USE_AITER=1
14-
- export VLLM_ROCM_USE_AITER_MHA=1
15-
- export VLLM_ROCM_USE_AITER_LINEAR=0
16-
- export VLLM_ROCM_USE_AITER_RMSNORM=0
1711
- pytest -s -v tests/e2e/offline_inference/test_t2i_model.py
1812

13+
- label: "Diffusion Images API LoRA E2E"
14+
timeout_in_minutes: 20
15+
agent_pool: mi325_1
16+
depends_on: amd-build
17+
mirror_hardwares: [amdproduction]
18+
grade: Blocking
19+
commands:
20+
- export GPU_ARCHS=gfx942
21+
- export VLLM_LOGGING_LEVEL=DEBUG
22+
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
23+
- pytest -s -v tests/e2e/online_serving/test_images_generations_lora.py
24+
25+
- label: "Diffusion Model CPU offloading Test"
26+
timeout_in_minutes: 20
27+
agent_pool: mi325_1
28+
depends_on: amd-build
29+
mirror_hardwares: [amdproduction]
30+
grade: Blocking
31+
commands:
32+
- export GPU_ARCHS=gfx942
33+
- export VLLM_LOGGING_LEVEL=DEBUG
34+
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
35+
- pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py
36+
1937
- label: "Diffusion Cache Backend Test"
2038
timeout_in_minutes: 15
2139
agent_pool: mi325_1
@@ -26,34 +44,37 @@ steps:
2644
- export GPU_ARCHS=gfx942
2745
- export VLLM_LOGGING_LEVEL=DEBUG
2846
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
29-
- export MIOPEN_DEBUG_CONV_DIRECT=0
30-
- export MIOPEN_DEBUG_CONV_GEMM=0
31-
- export VLLM_ROCM_USE_AITER=1
32-
- export VLLM_ROCM_USE_AITER_MHA=1
33-
- export VLLM_ROCM_USE_AITER_LINEAR=0
34-
- export VLLM_ROCM_USE_AITER_RMSNORM=0
3547
- pytest -s -v tests/e2e/offline_inference/test_cache_dit.py tests/e2e/offline_inference/test_teacache.py
3648

37-
- label: "Diffusion Parallelism Test"
38-
timeout_in_minutes: 15
49+
- label: "Diffusion Sequence Parallelism Test"
50+
timeout_in_minutes: 20
3951
agent_pool: mi325_2
4052
depends_on: amd-build
4153
mirror_hardwares: [amdproduction]
4254
grade: Blocking
4355
commands:
44-
- export MIOPEN_DEBUG_CONV_DIRECT=0
45-
- export MIOPEN_DEBUG_CONV_GEMM=0
56+
- export GPU_ARCHS=gfx942
57+
- export VLLM_LOGGING_LEVEL=DEBUG
58+
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
4659
- pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py
4760

61+
- label: "Diffusion Tensor Parallelism Test"
62+
timeout_in_minutes: 20
63+
agent_pool: mi325_2
64+
depends_on: amd-build
65+
commands:
66+
- export GPU_ARCHS=gfx942
67+
- export VLLM_LOGGING_LEVEL=DEBUG
68+
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
69+
- pytest -s -v tests/e2e/offline_inference/test_zimage_tensor_parallel.py
70+
4871
- label: "Diffusion GPU Worker Test"
4972
timeout_in_minutes: 20
5073
agent_pool: mi325_2
5174
depends_on: amd-build
5275
mirror_hardwares: [amdproduction]
5376
grade: Blocking
5477
commands:
55-
- export MIOPEN_DEBUG_CONV_DIRECT=0
56-
- export MIOPEN_DEBUG_CONV_GEMM=0
5778
- pytest -s -v tests/diffusion/test_diffusion_worker.py
5879

5980
- label: "Omni Model Test Qwen2-5-Omni"
@@ -66,12 +87,6 @@ steps:
6687
- export GPU_ARCHS=gfx942
6788
- export VLLM_LOGGING_LEVEL=DEBUG
6889
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
69-
- export MIOPEN_DEBUG_CONV_DIRECT=0
70-
- export MIOPEN_DEBUG_CONV_GEMM=0
71-
- export VLLM_ROCM_USE_AITER=1
72-
- export VLLM_ROCM_USE_AITER_MHA=1
73-
- export VLLM_ROCM_USE_AITER_LINEAR=0
74-
- export VLLM_ROCM_USE_AITER_RMSNORM=0
7590
- pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py
7691

7792
- label: "Omni Model Test Qwen3-Omni"
@@ -83,9 +98,10 @@ steps:
8398
commands:
8499
- export VLLM_LOGGING_LEVEL=DEBUG
85100
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
86-
- export MIOPEN_DEBUG_CONV_DIRECT=0
87-
- export MIOPEN_DEBUG_CONV_GEMM=0
88-
- pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py tests/e2e/online_serving/test_qwen3_omni.py
101+
- export VLLM_TEST_CLEAN_GPU_MEMORY="1"
102+
- pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py
103+
- pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py
104+
- pytest -s -v tests/e2e/online_serving/test_async_omni.py
89105

90106
- label: "Diffusion Image Edit Test"
91107
timeout_in_minutes: 15
@@ -97,10 +113,4 @@ steps:
97113
- export GPU_ARCHS=gfx942
98114
- export VLLM_LOGGING_LEVEL=DEBUG
99115
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
100-
- export MIOPEN_DEBUG_CONV_DIRECT=0
101-
- export MIOPEN_DEBUG_CONV_GEMM=0
102-
- export VLLM_ROCM_USE_AITER=1
103-
- export VLLM_ROCM_USE_AITER_MHA=1
104-
- export VLLM_ROCM_USE_AITER_LINEAR=0
105-
- export VLLM_ROCM_USE_AITER_RMSNORM=0
106-
- pytest -s -v tests/e2e/online_serving/test_i2i_multi_image_input.py
116+
- pytest -s -v tests/e2e/online_serving/test_image_gen_edit.py

0 commit comments

Comments
 (0)