Skip to content

Commit f201d79

Browse files
committed
rebase to 0.6.1
2 parents 581c529 + 9ba0817 commit f201d79

File tree

200 files changed

+9167
-3138
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

200 files changed

+9167
-3138
lines changed

.buildkite/run-amd-test.sh

+24-1
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,36 @@ mkdir -p ${HF_CACHE}
7171
HF_MOUNT="/root/.cache/huggingface"
7272

7373
commands=$@
74+
echo "Commands:$commands"
75+
#ignore certain kernels tests
76+
if [[ $commands == *" kernels "* ]]; then
77+
commands="${commands} \
78+
--ignore=kernels/test_attention.py \
79+
--ignore=kernels/test_attention_selector.py \
80+
--ignore=kernels/test_blocksparse_attention.py \
81+
--ignore=kernels/test_causal_conv1d.py \
82+
--ignore=kernels/test_cutlass.py \
83+
--ignore=kernels/test_encoder_decoder_attn.py \
84+
--ignore=kernels/test_flash_attn.py \
85+
--ignore=kernels/test_flashinfer.py \
86+
--ignore=kernels/test_int8_quant.py \
87+
--ignore=kernels/test_machete_gemm.py \
88+
--ignore=kernels/test_mamba_ssm.py \
89+
--ignore=kernels/test_marlin_gemm.py \
90+
--ignore=kernels/test_moe.py \
91+
--ignore=kernels/test_prefix_prefill.py \
92+
--ignore=kernels/test_rand.py \
93+
--ignore=kernels/test_sampler.py"
94+
fi
95+
7496
PARALLEL_JOB_COUNT=8
7597
# check if the command contains shard flag, we will run all shards in parallel because the host have 8 GPUs.
7698
if [[ $commands == *"--shard-id="* ]]; then
7799
for GPU in $(seq 0 $(($PARALLEL_JOB_COUNT-1))); do
78100
#replace shard arguments
79-
commands=${@//"--shard-id= "/"--shard-id=${GPU} "}
101+
commands=${commands//"--shard-id= "/"--shard-id=${GPU} "}
80102
commands=${commands//"--num-shards= "/"--num-shards=${PARALLEL_JOB_COUNT} "}
103+
echo "Shard ${GPU} commands:$commands"
81104
docker run \
82105
--device /dev/kfd --device /dev/dri \
83106
--network host \

.buildkite/run-cpu-test-ppc64le.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@ trap remove_docker_container EXIT
1111
remove_docker_container
1212

1313
# Run the image, setting --shm-size=4g for tensor parallel.
14+
source /etc/environment
1415
#docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test cpu-test
15-
docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN --name cpu-test cpu-test
16+
docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN=$HF_TOKEN --name cpu-test cpu-test
1617

1718
# Run basic model test
1819
docker exec cpu-test bash -c "

.buildkite/run-cpu-test.sh

+10-6
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,16 @@ docker exec cpu-test-avx2 bash -c "python3 examples/offline_inference.py"
2323
# Run basic model test
2424
docker exec cpu-test bash -c "
2525
pip install pytest matplotlib einops transformers_stream_generator
26-
pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py \
27-
--ignore=tests/models/test_oot_registration.py \
28-
--ignore=tests/models/test_registry.py \
29-
--ignore=tests/models/test_fp8.py \
30-
--ignore=tests/models/test_jamba.py \
31-
--ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
26+
pytest -v -s tests/models/decoder_only/language \
27+
--ignore=tests/models/test_fp8.py \
28+
--ignore=tests/models/decoder_only/language/test_jamba.py \
29+
--ignore=tests/models/decoder_only/language/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
30+
31+
# Run compressed-tensor test
32+
docker exec cpu-test bash -c "
33+
pytest -s -v \
34+
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_static_setup \
35+
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_dynanmic_per_token"
3236

3337
# online inference
3438
docker exec cpu-test bash -c "

.buildkite/test-pipeline.yaml

+50-26
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ steps:
5050
- tests/worker
5151
commands:
5252
- pytest -v -s async_engine # Async Engine
53+
- NUM_SCHEDULER_STEPS=4 pytest -v -s async_engine/test_async_llm_engine.py
5354
- pytest -v -s test_inputs.py
5455
- pytest -v -s multimodal
5556
- pytest -v -s test_utils.py # Utils
@@ -91,7 +92,7 @@ steps:
9192
- pytest -v -s entrypoints/llm/test_lazy_outlines.py # it needs a clean process
9293
- pytest -v -s entrypoints/openai
9394
- pytest -v -s entrypoints/test_chat_utils.py
94-
95+
- pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
9596

9697
- label: Distributed Tests (4 GPUs) # 10min
9798
working_dir: "/vllm-workspace/tests"
@@ -162,30 +163,13 @@ steps:
162163
- python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
163164
- python3 offline_inference_encoder_decoder.py
164165

165-
- label: Models Test # 1hr10min
166-
source_file_dependencies:
167-
- vllm/
168-
- tests/models
169-
commands:
170-
- pip install -e ./plugins/vllm_add_dummy_model
171-
- pytest -v -s models/test_oot_registration.py # it needs a clean process
172-
- pytest -v -s models -m \"not vlm\" --ignore=models/test_oot_registration.py
173-
174166
- label: torch compile integration test
175167
source_file_dependencies:
176168
- vllm/
177169
commands:
178170
- pytest -v -s ./compile/test_full_graph.py
179171
- pytest -v -s ./compile/test_wrapper.py
180172

181-
182-
- label: Vision Language Models Test # 42min
183-
#mirror_hardwares: [amd]
184-
source_file_dependencies:
185-
- vllm/
186-
commands:
187-
- pytest -v -s models -m vlm
188-
189173
- label: Prefix Caching Test # 7min
190174
#mirror_hardwares: [amd]
191175
source_file_dependencies:
@@ -217,7 +201,8 @@ steps:
217201
commands:
218202
# See https://github.com/vllm-project/vllm/issues/5152
219203
- export VLLM_ATTENTION_BACKEND=XFORMERS
220-
- pytest -v -s spec_decode
204+
- pytest -v -s spec_decode/e2e/test_multistep_correctness.py
205+
- pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py
221206

222207
- label: LoRA Test %N # 30min each
223208
mirror_hardwares: [amd]
@@ -228,6 +213,7 @@ steps:
228213
parallelism: 4
229214

230215
- label: Kernels Test %N # 30min each
216+
mirror_hardwares: [amd]
231217
source_file_dependencies:
232218
- csrc/
233219
- vllm/attention
@@ -282,6 +268,45 @@ steps:
282268
commands:
283269
- pytest -v -s tool_use
284270

271+
##### models test #####
272+
273+
- label: Basic Models Test # 3min
274+
source_file_dependencies:
275+
- vllm/
276+
- tests/models
277+
commands:
278+
- pip install -e ./plugins/vllm_add_dummy_model
279+
- pytest -v -s models/test_oot_registration.py # it needs a clean process
280+
- pytest -v -s models/*.py --ignore=models/test_oot_registration.py
281+
282+
- label: Decoder-only Language Models Test # 1h3min
283+
#mirror_hardwares: [amd]
284+
source_file_dependencies:
285+
- vllm/
286+
- tests/models/decoder_only/language
287+
commands:
288+
- pytest -v -s models/decoder_only/language
289+
290+
- label: Decoder-only Multi-Modal Models Test # 56min
291+
#mirror_hardwares: [amd]
292+
source_file_dependencies:
293+
- vllm/
294+
- tests/models/decoder_only/audio_language
295+
- tests/models/decoder_only/vision_language
296+
commands:
297+
- pytest -v -s models/decoder_only/audio_language
298+
- pytest -v -s models/decoder_only/vision_language
299+
300+
- label: Other Models Test # 5min
301+
#mirror_hardwares: [amd]
302+
source_file_dependencies:
303+
- vllm/
304+
- tests/models/embedding/language
305+
- tests/models/encoder_decoder/language
306+
commands:
307+
- pytest -v -s models/embedding/language
308+
- pytest -v -s models/encoder_decoder/language
309+
285310
##### 1 GPU test #####
286311
##### multi gpus test #####
287312

@@ -307,11 +332,11 @@ steps:
307332
- tests/distributed/
308333
commands:
309334
- # the following commands are for the first node, with ip 192.168.10.10 (ray environment already set up)
310-
- VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py
335+
- VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep -q 'Same node test passed'
311336
- VLLM_MULTI_NODE=1 pytest -v -s distributed/test_multi_node_assignment.py
312337
- VLLM_MULTI_NODE=1 pytest -v -s distributed/test_pipeline_parallel.py
313338
- # the following commands are for the second node, with ip 192.168.10.11 (ray environment already set up)
314-
- VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py
339+
- VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep -q 'Same node test passed'
315340

316341
- label: Distributed Tests (2 GPUs) # 28min
317342
#mirror_hardwares: [amd]
@@ -324,11 +349,10 @@ steps:
324349
- vllm/model_executor/models/
325350
- tests/distributed/
326351
commands:
327-
- VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py
328-
- TARGET_TEST_SUITE=L4 pytest -v -s distributed/test_basic_distributed_correctness.py
329-
- pytest -v -s distributed/test_basic_distributed_correctness_enc_dec.py
330-
- pytest -v -s distributed/test_chunked_prefill_distributed.py
331-
- pytest -v -s distributed/test_multimodal_broadcast.py
352+
- VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep -q 'Same node test passed'
353+
- TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m distributed_2_gpus
354+
# Avoid importing model tests that cause CUDA reinitialization error
355+
- pytest models/encoder_decoder/language/test_bart.py models/decoder_only/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
332356
- pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
333357
- pip install -e ./plugins/vllm_add_dummy_model
334358
- pytest -v -s distributed/test_distributed_oot.py

.github/ISSUE_TEMPLATE/400-bug report.yml

+9
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,15 @@ body:
3030
</details>
3131
validations:
3232
required: true
33+
- type: textarea
34+
attributes:
35+
label: Model Input Dumps
36+
description: |
37+
If you are facing crashing due to illegal memory access or other issues with model execution, vLLM may dump the problematic input of the model. In this case, you will see the message `Error in model execution (input dumped to /tmp/err_xxx.pkl)`. If you see this message, please zip the file (because GitHub doesn't support .pkl file format) and upload it here. This will help us to reproduce the issue and facilitate the debugging process.
38+
placeholder: |
39+
Upload the dumped input file.
40+
validations:
41+
required: false
3342
- type: textarea
3443
attributes:
3544
label: 🐛 Describe the bug

.github/PULL_REQUEST_TEMPLATE.md

+10
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,16 @@ FIX #xxxx (*link existing issues this PR will resolve*)
3939
<li>Please add documentation to <code>docs/source/</code> if the PR modifies the user-facing behaviors of vLLM. It helps vLLM user understand and utilize the new features or changes.</li>
4040
</ul>
4141

42+
<h3>Adding or changing kernels</h3>
43+
<p>Each custom kernel needs a schema and one or more implementations to be registered with PyTorch.</p>
44+
<ul>
45+
<li>Make sure custom ops are registered following PyTorch guidelines: <a href="https://pytorch.org/tutorials/advanced/cpp_custom_ops.html#cpp-custom-ops-tutorial">Custom C++ and CUDA Operators</a> and <a href="https://docs.google.com/document/d/1_W62p8WJOQQUzPsJYa7s701JXt0qf2OfLub2sbkHOaU">The Custom Operators Manual</a></li>
46+
<li>Custom operations that return <code>Tensors</code> require meta-functions. Meta-functions should be implemented and registered in python so that dynamic dims can be handled automatically. See above documents for a description of meta-functions.</li>
47+
<li>Use <a href="https://pytorch.org/docs/stable/library.html#torch.library.opcheck"><code>torch.libary.opcheck()</code></a> to test the function registration and meta-function for any registered ops. See <code>tests/kernels</code> for examples.</li>
48+
<li>When changing the C++ signature of an existing op, the schema must be updated to reflect the changes.</li>
49+
<li>If a new custom type is needed, see the following document: <a href="https://docs.google.com/document/d/18fBMPuOJ0fY5ZQ6YyrHUppw9FA332CpNtgB6SOIgyuA">Custom Class Support in PT2</a>.
50+
</ul>
51+
4252
<h3>Notes for Large Changes</h3>
4353
<p>Please keep the changes as concise as possible. For major architectural changes (>500 LOC excluding kernel/data/config/test), we would expect a GitHub issue (RFC) discussing the technical design and justification. Otherwise, we will tag it with <code>rfc-required</code> and might not go through the PR.</p>
4454

CMakeLists.txt

+13-2
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,13 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
194194
FetchContent_Declare(
195195
cutlass
196196
GIT_REPOSITORY https://github.com/nvidia/cutlass.git
197-
# CUTLASS 3.5.1
198-
GIT_TAG 06b21349bcf6ddf6a1686a47a137ad1446579db9
197+
GIT_TAG v3.5.1
199198
GIT_PROGRESS TRUE
199+
200+
# Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history.
201+
# Important: If GIT_SHALLOW is enabled then GIT_TAG works only with branch names and tags.
202+
# So if the GIT_TAG above is updated to a commit hash, GIT_SHALLOW must be set to FALSE
203+
GIT_SHALLOW TRUE
200204
)
201205
FetchContent_MakeAvailable(cutlass)
202206

@@ -230,6 +234,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
230234
"-gencode arch=compute_90a,code=sm_90a")
231235
endif()
232236

237+
233238
#
234239
# Machete kernels
235240

@@ -288,6 +293,12 @@ define_gpu_extension_target(
288293
USE_SABI 3
289294
WITH_SOABI)
290295

296+
# If CUTLASS is compiled on NVCC >= 12.5, it by default uses
297+
# cudaGetDriverEntryPointByVersion as a wrapper to avoid directly calling the
298+
# driver API. This causes problems when linking with earlier versions of CUDA.
299+
# Setting this variable sidesteps the issue by calling the driver directly.
300+
target_compile_definitions(_C PRIVATE CUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1)
301+
291302
#
292303
# _moe_C extension
293304
#

Dockerfile

+1
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
145145
&& echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
146146
&& apt-get update -y \
147147
&& apt-get install -y ccache software-properties-common git curl sudo vim python3-pip \
148+
&& apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
148149
&& add-apt-repository ppa:deadsnakes/ppa \
149150
&& apt-get update -y \
150151
&& apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev \

Dockerfile.cpu

+18-1
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,14 @@
22

33
FROM ubuntu:22.04 AS cpu-test-1
44

5+
ENV CCACHE_DIR=/root/.cache/ccache
6+
7+
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
8+
59
RUN --mount=type=cache,target=/var/cache/apt \
610
apt-get update -y \
711
&& apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \
12+
&& apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
813
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
914

1015
# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html
@@ -25,6 +30,19 @@ RUN --mount=type=cache,target=/root/.cache/pip \
2530
pip install --upgrade pip && \
2631
pip install -r requirements-build.txt
2732

33+
# install oneDNN
34+
RUN git clone -b rls-v3.5 https://github.com/oneapi-src/oneDNN.git
35+
36+
RUN --mount=type=cache,target=/root/.cache/ccache \
37+
cmake -B ./oneDNN/build -S ./oneDNN -G Ninja -DONEDNN_LIBRARY_TYPE=STATIC \
38+
-DONEDNN_BUILD_DOC=OFF \
39+
-DONEDNN_BUILD_EXAMPLES=OFF \
40+
-DONEDNN_BUILD_TESTS=OFF \
41+
-DONEDNN_BUILD_GRAPH=OFF \
42+
-DONEDNN_ENABLE_WORKLOAD=INFERENCE \
43+
-DONEDNN_ENABLE_PRIMITIVE=MATMUL && \
44+
cmake --build ./oneDNN/build --target install --config Release
45+
2846
FROM cpu-test-1 AS build
2947

3048
WORKDIR /workspace/vllm
@@ -40,7 +58,6 @@ COPY ./ ./
4058
ARG VLLM_CPU_DISABLE_AVX512
4159
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
4260

43-
ENV CCACHE_DIR=/root/.cache/ccache
4461
RUN --mount=type=cache,target=/root/.cache/pip \
4562
--mount=type=cache,target=/root/.cache/ccache \
4663
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \

Dockerfile.neuron

+3-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ FROM $BASE_IMAGE
66
RUN echo "Base image is $BASE_IMAGE"
77

88
# Install some basic utilities
9-
RUN apt-get update && apt-get install python3 python3-pip -y
9+
RUN apt-get update \
10+
&& apt-get install python3 python3-pip -y \
11+
&& apt-get install -y ffmpeg libsm6 libxext6 libgl1
1012

1113
### Mount Point ###
1214
# When launching the container, mount the code directory to /app

Dockerfile.openvino

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
FROM ubuntu:22.04 AS dev
55

66
RUN apt-get update -y && \
7-
apt-get install -y python3-pip git
7+
apt-get install -y python3-pip git && \
8+
apt-get install -y ffmpeg libsm6 libxext6 libgl1
89
WORKDIR /workspace
910

1011
# copy requirements

Dockerfile.ppc64le

+2-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ USER root
44

55
ENV PATH="/usr/local/cargo/bin:$PATH:/opt/conda/bin/"
66

7-
RUN apt-get update -y && apt-get install -y git wget vim libnuma-dev libsndfile-dev libprotobuf-dev build-essential
7+
RUN apt-get update -y && apt-get install -y git wget curl vim libnuma-dev libsndfile-dev libprotobuf-dev build-essential ffmpeg libsm6 libxext6 libgl1
88

99
# Some packages in requirements-cpu are installed here
1010
# IBM provides optimized packages for ppc64le processors in the open-ce project for mamba
@@ -16,7 +16,7 @@ COPY ./ /workspace/vllm
1616
WORKDIR /workspace/vllm
1717

1818
# These packages will be in rocketce eventually
19-
RUN pip install -v cmake torch==2.3.1 uvloop==0.20.0 -r requirements-cpu.txt --prefer-binary --extra-index-url https://repo.fury.io/mgiessing
19+
RUN pip install -v cmake xformers torch==2.3.1 uvloop==0.20.0 -r requirements-cpu.txt --prefer-binary --extra-index-url https://repo.fury.io/mgiessing
2020

2121
RUN VLLM_TARGET_DEVICE=cpu python3 setup.py install
2222

@@ -25,4 +25,3 @@ WORKDIR /workspace/
2525
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
2626

2727
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
28-

Dockerfile.tpu

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ ARG BASE_IMAGE="us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:night
44
FROM $BASE_IMAGE
55
WORKDIR /workspace
66

7+
# Install some basic utilities
8+
RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 libgl1
9+
710
# Install the TPU and Pallas dependencies.
811
RUN python3 -m pip install torch_xla[tpu] -f https://storage.googleapis.com/libtpu-releases/index.html
912
RUN python3 -m pip install torch_xla[pallas] -f https://storage.googleapis.com/jax-releases/jax_nightly_releases.html -f https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html

Dockerfile.xpu

+1-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
99
chmod 644 /usr/share/keyrings/intel-graphics.gpg
1010

1111
RUN apt-get update -y \
12-
&& apt-get install -y curl libicu70 lsb-release git wget vim numactl python3 python3-pip
13-
12+
&& apt-get install -y curl libicu70 lsb-release git wget vim numactl python3 python3-pip ffmpeg libsm6 libxext6 libgl1
1413
COPY ./ /workspace/vllm
1514

1615
WORKDIR /workspace/vllm

0 commit comments

Comments
 (0)