We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent b709f53 commit 274213bCopy full SHA for 274213b
test/vllm/ec2/utils/run_vllm_on_arm64.sh
@@ -63,7 +63,7 @@ docker run --rm \
63
$DLC_IMAGE \
64
-c "python3 /vllm/examples/offline_inference/basic/generate.py \
65
--model ${MODEL_NAME} \
66
- --dtype half \
+ --dtype float32 \
67
--tensor-parallel-size 1 \
68
--max-model-len 2048"
69
@@ -79,7 +79,7 @@ docker run -d \
79
--gpus=all \
80
81
-c "vllm serve ${MODEL_NAME} \
82
- --dtype half
83
--tensor-parallel-size 1"
84
85
wait_for_api
0 commit comments