@@ -58,6 +58,7 @@ VLLM_EXTRA="${BFCL_VLLM_EXTRA:-}"
5858ARM_A_PORT=" ${BFCL_ARM_A_PORT:- } " # pure-vLLM OpenAI port
5959ARM_B_GRPC_PORT=" ${BFCL_ARM_B_GRPC_PORT:- } " # vLLM gRPC worker port
6060ARM_B_GW_PORT=" ${BFCL_ARM_B_GW_PORT:- } " # SMG OpenAI gateway port
61+ ARM_B_METRICS_PORT=" ${BFCL_ARM_B_METRICS_PORT:- } " # SMG Prometheus port (defaults to 29000 — collides when arms/legs share a host)
6162
6263# Executables (override for venv / box paths).
6364VLLM_BIN=" ${VLLM_BIN:- vllm} " # `vllm serve` console script
@@ -123,6 +124,7 @@ case "$ARM" in
123124 b)
124125 ARM_B_GRPC_PORT=" ${ARM_B_GRPC_PORT:- $(free_port)} "
125126 ARM_B_GW_PORT=" ${ARM_B_GW_PORT:- $(free_port)} "
127+ ARM_B_METRICS_PORT=" ${ARM_B_METRICS_PORT:- $(free_port)} "
126128 # 1) vLLM gRPC worker (raw-token; SMG will own template+parsing).
127129 declare -a wcmd=(
128130 CUDA_VISIBLE_DEVICES=" $GPU " " $VLLM_PYTHON " -m vllm.entrypoints.grpc_server
@@ -144,6 +146,9 @@ case "$ARM" in
144146 --model-path " $MODEL_SRC "
145147 --worker-urls " grpc://127.0.0.1:$ARM_B_GRPC_PORT "
146148 --host 0.0.0.0 --port " $ARM_B_GW_PORT "
149+ # Free port, not the fixed 29000 default — else a second SMG on the same
150+ # host (concurrent arm/leg) panics with "metrics server bind failed".
151+ --prometheus-port " $ARM_B_METRICS_PORT "
147152 )
148153 # Empty => omit, so SMG auto-detects (e.g. gpt-oss → harmony pipeline).
149154 [ -n " $SMG_TOOL_PARSER " ] && smg_cmd+=(--tool-call-parser " $SMG_TOOL_PARSER " )
0 commit comments