Skip to content

Commit 2e31674

Browse files
committed
fix: update gpt-oss image
1 parent bbaa173 commit 2e31674

2 files changed

Lines changed: 9 additions & 4 deletions

File tree

gpt-oss-single.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ services:
2626
- --gpu-memory-utilization=0.95
2727
- --max-num-seqs=32
2828
- --max-num-batched-tokens=8192
29-
- --max-model-len=32768
29+
- --max-model-len=131072
3030
- --no-enable-prefix-caching
3131
- --speculative-config={"method":"eagle3","model":"nvidia/gpt-oss-120b-Eagle3-v2","num_speculative_tokens":3}
3232
- --async-scheduling

small-models.yaml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ x-sglang-env:
6262

6363
x-gpt-oss-common: &gpt-oss-common
6464
<<: *vllm-common
65-
image: vllm/vllm-openai@sha256:014a95f21c9edf6abe0aea6b07353f96baa4ec291c427bb1176dc7c93a85845c
65+
image: vllm/vllm-openai@sha256:6766ce0c459e24b76f3e9ba14ffc0442131ef4248c904efdcbf0d89e38be01fe
6666
command: >
6767
openai/gpt-oss-120b
6868
--tensor-parallel-size 1
@@ -73,14 +73,19 @@ x-gpt-oss-common: &gpt-oss-common
7373
--tool-call-parser openai
7474
--enable-auto-tool-choice
7575
--max-model-len 128K
76-
--max-num-batched-tokens 16K
76+
--max-num-batched-tokens 8192
7777
--speculative-config '{"model":"nvidia/gpt-oss-120b-Eagle3-v2","num_speculative_tokens":3,"method":"eagle3","draft_tensor_parallel_size":1}'
7878
--load-format runai_streamer
7979
--model-loader-extra-config '{"distributed":true, "concurrency":48}'
8080
volumes:
8181
- hugginface_cache:/root/.cache/huggingface
8282
- vllm_cache:/root/.cache/vllm
83-
environment: *vllm-env
83+
environment:
84+
- HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN}
85+
- VLLM_LOGGING_LEVEL=INFO
86+
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
87+
- VLLM_MXFP4_USE_MARLIN=1
88+
- VLLM_CACHE_ROOT=/root/.cache/vllm
8489

8590
x-flux-common: &flux-common
8691
<<: *vllm-common

0 commit comments

Comments
 (0)