File tree Expand file tree Collapse file tree 2 files changed +2
-2
lines changed
Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -28,7 +28,7 @@ RUN export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
2828RUN python use_existing_torch.py && \
2929 pip install -r requirements/build.txt && \
3030 pip install --no-build-isolation -e .
31-
31+ RUN pip install "fastsafetensors>=0.1.10"
3232# Expose port 8000 for vLLM API server
3333EXPOSE 8000
3434
Original file line number Diff line number Diff line change @@ -10,4 +10,4 @@ docker run --runtime nvidia --gpus all \
1010 --env " HUGGING_FACE_HUB_TOKEN=<YOUR_HUGGINGFACE_TOKEN>" \
1111 --env " TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas" \
1212 vllm:25.10 \
13- vllm serve nvidia/Nemotron-Nano-VL-12B-V2-FP4-QAD --trust-remote-code --quantization modelopt_fp4 --max-model-len 24000 --gpu-memory-utilization 0.3
13+ vllm serve nvidia/Nemotron-Nano-VL-12B-V2-FP4-QAD --load-format fastsafetensors -- trust-remote-code --quantization modelopt_fp4 --max-model-len 24000 --gpu-memory-utilization 0.3
You can’t perform that action at this time.
0 commit comments