use open port at runtime

wuhang2014 · wuhang2014 · commit 0e1105fe2ce5 · 2026-02-03T18:47:13.000+08:00
Signed-off-by: wuhang &lt;wuhang6@huawei.com&gt;
diff --git a/vllm_omni/entrypoints/cli/serve.py b/vllm_omni/entrypoints/cli/serve.py
@@ -31,7 +31,6 @@
     load_stage_configs_from_yaml,
     resolve_model_config_path,
 )
-from vllm_omni.entrypoints.zmq_utils import ZmqQueueSpec
 
 logger = init_logger(__name__)
 
@@ -356,13 +355,40 @@ def run_headless(args: argparse.Namespace) -> None:
 
     omni_master_address = getattr(args, "omni_master_address", None) or "127.0.0.1"
     omni_master_port = int(getattr(args, "omni_master_port", 5555) or 5555)
-    base_port = omni_master_port + 1
-    in_endpoint = f"tcp://{omni_master_address}:{base_port + single_stage_id * 2}"
-    out_endpoint = f"tcp://{omni_master_address}:{base_port + single_stage_id * 2 + 1}"
 
-    in_q_spec = ZmqQueueSpec(endpoint=in_endpoint, socket_type=zmq.PULL, bind=False)
-    out_q_spec = ZmqQueueSpec(endpoint=out_endpoint, socket_type=zmq.PUSH, bind=False)
+    # Perform handshake with orchestrator to get dynamically allocated endpoints
     zmq_ctx = zmq.Context()
+    handshake_socket = zmq_ctx.socket(zmq.REQ)
+    handshake_socket.linger = 0
+    handshake_endpoint = f"tcp://{omni_master_address}:{omni_master_port}"
+
+    try:
+        handshake_socket.connect(handshake_endpoint)
+        handshake_msg = {"type": "handshake", "stage_id": single_stage_id}
+        handshake_socket.send_pyobj(handshake_msg)
+
+        # Wait for response with timeout
+        if handshake_socket.poll(timeout=10000):  # 10 second timeout
+            response = handshake_socket.recv_pyobj()
+            if not response.get("ok", False):
+                error_msg = response.get("error", "unknown error")
+                raise RuntimeError(f"Handshake failed for stage-{single_stage_id}: {error_msg}")
+
+            in_q_spec = response.get("in_spec")
+            out_q_spec = response.get("out_spec")
+
+            if in_q_spec is None or out_q_spec is None:
+                raise RuntimeError(f"Handshake response missing specs for stage-{single_stage_id}")
+
+            logger.info(
+                f"[Headless] Stage-{single_stage_id} received endpoints via handshake: "
+                f"in={in_q_spec.endpoint}, out={out_q_spec.endpoint}"
+            )
+        else:
+            raise TimeoutError(f"Handshake timeout for stage-{single_stage_id} at {handshake_endpoint}")
+
+    finally:
+        handshake_socket.close(0)
     in_q = None
     out_q = None
 
diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py
@@ -18,6 +18,7 @@
 from tqdm.auto import tqdm
 from vllm import SamplingParams
 from vllm.logger import init_logger
+from vllm.utils.network_utils import get_open_port
 
 from vllm_omni.distributed.omni_connectors import (
     get_stage_connector_config,
@@ -163,7 +164,7 @@ def __init__(self, model: str, **kwargs: Any) -> None:
         self._zmq_handshake_socket: zmq.Socket | None = None
         self._zmq_handshake_thread: threading.Thread | None = None
         self._zmq_handshake_stop: threading.Event | None = None
-        self._zmq_handshake_specs: dict[int, ZmqQueueSpec] = {}
+        self._zmq_handshake_specs: dict[int, tuple[ZmqQueueSpec, ZmqQueueSpec]] = {}
         self._zmq_handshake_seen: set[int] = set()
         self._total_stage_count: int = 0
         self._single_stage_id: int | None = None
@@ -264,6 +265,9 @@ def _initialize_stages(self, model: str, kwargs: dict[str, Any]) -> None:
 
         base_engine_args = {"tokenizer": tokenizer} if tokenizer is not None else None
 
+        # TODO(wuhang):
+        # Remove kwargs as parameters in the future.
+        # Use dataclass directly.
         parallel_keys = [
             "tensor_parallel_size",
             "pipeline_parallel_size",
@@ -329,8 +333,6 @@ def _build_stage(idx_cfg: tuple[int, Any]) -> tuple[int, OmniStage]:
             idx, cfg = idx_cfg
             return idx, OmniStage(cfg, stage_init_timeout=stage_init_timeout)
 
-        logger.info(f"====== stage configs:\n{pformat(OmegaConf.to_container(self.stage_configs))}")
-
         with ThreadPoolExecutor(max_workers=min(len(self.stage_configs), max(1, os.cpu_count() or 1))) as executor:
             futures = [executor.submit(_build_stage, (idx, cfg)) for idx, cfg in enumerate(self.stage_configs)]
             results: list[tuple[int, OmniStage]] = []
@@ -373,17 +375,33 @@ def _start_stages(self, model: str) -> None:
         if self.worker_backend != "ray":
             self._ensure_zmq_handshake_server()
 
-        base_port = int(self._zmq_master_port or 5555) + 1
+        # Pre-allocate ports for all stages using dynamic port allocation
+        stage_ports: dict[int, tuple[int, int]] = {}
         if self.worker_backend != "ray":
-            self._zmq_handshake_specs = {}
             total_stages = self._total_stage_count or len(self.stage_list)
             for sid in range(total_stages):
-                out_endpoint = f"tcp://{self._zmq_master_address}:{base_port + sid * 2 + 1}"
-                self._zmq_handshake_specs[sid] = ZmqQueueSpec(
+                in_port = get_open_port()
+                out_port = get_open_port()
+                stage_ports[sid] = (in_port, out_port)
+                logger.debug(f"[{self._name}] Allocated ports for stage-{sid}: in={in_port}, out={out_port}")
+
+            # Build handshake specs with allocated ports
+            self._zmq_handshake_specs = {}
+            for sid in range(total_stages):
+                in_port, out_port = stage_ports[sid]
+                in_endpoint = f"tcp://{self._zmq_master_address}:{in_port}"
+                out_endpoint = f"tcp://{self._zmq_master_address}:{out_port}"
+                in_spec = ZmqQueueSpec(
+                    endpoint=in_endpoint,
+                    socket_type=zmq.PULL,
+                    bind=False,
+                )
+                out_spec = ZmqQueueSpec(
                     endpoint=out_endpoint,
                     socket_type=zmq.PUSH,
                     bind=False,
                 )
+                self._zmq_handshake_specs[sid] = (in_spec, out_spec)
 
         for stage_id, stage in enumerate[OmniStage](self.stage_list):
             if self.worker_backend == "ray":
@@ -392,8 +410,9 @@ def _start_stages(self, model: str) -> None:
                 in_spec = None
                 out_spec = None
             else:
-                in_endpoint = f"tcp://{self._zmq_master_address}:{base_port + stage_id * 2}"
-                out_endpoint = f"tcp://{self._zmq_master_address}:{base_port + stage_id * 2 + 1}"
+                in_port, out_port = stage_ports[stage_id]
+                in_endpoint = f"tcp://{self._zmq_master_address}:{in_port}"
+                out_endpoint = f"tcp://{self._zmq_master_address}:{out_port}"
                 in_q = ZmqQueue(self._zmq_ctx, zmq.PUSH, bind=in_endpoint)
                 out_q = ZmqQueue(self._zmq_ctx, zmq.PULL, bind=out_endpoint)
                 in_spec = ZmqQueueSpec(endpoint=in_endpoint, socket_type=zmq.PULL, bind=False)
@@ -635,12 +654,17 @@ def _serve() -> None:
                 try:
                     if isinstance(msg, dict) and msg.get("type") == "handshake":
                         stage_id = int(msg.get("stage_id"))
-                        out_spec = self._zmq_handshake_specs.get(stage_id)
-                        if out_spec is None:
+                        specs = self._zmq_handshake_specs.get(stage_id)
+                        if specs is None:
                             resp = {"ok": False, "error": f"unknown stage_id: {stage_id}"}
                         else:
                             self._zmq_handshake_seen.add(stage_id)
-                            resp = {"ok": True, "out_spec": out_spec}
+                            in_spec, out_spec = specs
+                            resp = {
+                                "ok": True,
+                                "in_spec": in_spec,
+                                "out_spec": out_spec,
+                            }
                             logger.info(
                                 "[%s] Handshake received from stage-%s",
                                 self._name,
diff --git a/vllm_omni/entrypoints/omni_stage.py b/vllm_omni/entrypoints/omni_stage.py
@@ -53,7 +53,6 @@
     ZmqQueue,
     ZmqQueueSpec,
     create_zmq_queue,
-    request_zmq_out_spec,
 )
 from vllm_omni.inputs.data import OmniDiffusionSamplingParams, OmniPromptType, OmniSamplingParams, OmniTokensPrompt
 from vllm_omni.outputs import OmniRequestOutput
@@ -326,9 +325,6 @@ def init_stage_worker(
             "engine_input_source": self.engine_input_source,
             "final_output": self.final_output,
             "final_output_type": self.final_output_type,
-            "zmq_master_address": self._zmq_master_address,
-            "zmq_master_port": self._zmq_master_port,
-            "zmq_use_handshake": self._zmq_use_handshake,
         }
         try:
             old_env = os.environ.get("VLLM_LOGGING_PREFIX")
@@ -576,21 +572,6 @@ def _stage_worker(
     if stage_type != "diffusion":
         _resolve_worker_cls(engine_args)
 
-    zmq_master_address = stage_payload.get("zmq_master_address")
-    zmq_master_port = stage_payload.get("zmq_master_port")
-    use_zmq_handshake = bool(stage_payload.get("zmq_use_handshake", False))
-
-    if use_zmq_handshake and zmq_master_address and zmq_master_port:
-        try:
-            master_endpoint = f"tcp://{zmq_master_address}:{int(zmq_master_port)}"
-            out_q = request_zmq_out_spec(master_endpoint, stage_id)
-        except Exception as e:
-            logger.warning(
-                "[Stage-%s] ZMQ handshake failed, falling back to provided out_q spec: %s",
-                stage_id,
-                e,
-            )
-
     # Resolve ZMQ queue specs if needed
     zmq_ctx = None
     if isinstance(in_q, ZmqQueueSpec) or isinstance(out_q, ZmqQueueSpec):
@@ -1145,24 +1126,10 @@ async def _stage_worker_async(
     stage_type = stage_payload.get("stage_type", "llm")
     final_output = stage_payload.get("final_output", False)
     final_output_type = stage_payload.get("final_output_type", None)
-    zmq_master_address = stage_payload.get("zmq_master_address")
-    zmq_master_port = stage_payload.get("zmq_master_port")
-    use_zmq_handshake = bool(stage_payload.get("zmq_use_handshake", False))
 
     if stage_type != "diffusion":
         _resolve_worker_cls(engine_args)
 
-    if use_zmq_handshake and zmq_master_address and zmq_master_port:
-        try:
-            master_endpoint = f"tcp://{zmq_master_address}:{int(zmq_master_port)}"
-            out_q = request_zmq_out_spec(master_endpoint, stage_id)
-        except Exception as e:
-            logger.warning(
-                "[Stage-%s] ZMQ handshake failed, falling back to provided out_q spec: %s",
-                stage_id,
-                e,
-            )
-
     # Resolve ZMQ queue specs if needed
     zmq_ctx = None
     if isinstance(in_q, ZmqQueueSpec) or isinstance(out_q, ZmqQueueSpec):
diff --git a/vllm_omni/entrypoints/zmq_utils.py b/vllm_omni/entrypoints/zmq_utils.py
@@ -97,7 +97,11 @@ def request_zmq_out_spec(
     *,
     timeout_ms: int = 30000,
 ) -> ZmqQueueSpec:
-    """Request the output queue spec for a stage via the master handshake."""
+    """Request the output queue spec for a stage via the master handshake.
+
+    Note: This function only returns out_spec for backward compatibility.
+    Use request_zmq_specs() to get both in_spec and out_spec.
+    """
 
     ctx = zmq.Context.instance()
     sock = ctx.socket(zmq.REQ)
@@ -124,3 +128,57 @@ def request_zmq_out_spec(
     if isinstance(out_spec, dict):
         return ZmqQueueSpec(**out_spec)
     raise RuntimeError(f"Invalid out_spec type: {type(out_spec)}")
+
+
+def request_zmq_specs(
+    master_endpoint: str,
+    stage_id: int,
+    *,
+    timeout_ms: int = 30000,
+) -> tuple[ZmqQueueSpec, ZmqQueueSpec]:
+    """Request both input and output queue specs for a stage via the master handshake.
+
+    Returns:
+        tuple[ZmqQueueSpec, ZmqQueueSpec]: A tuple of (in_spec, out_spec)
+    """
+
+    ctx = zmq.Context.instance()
+    sock = ctx.socket(zmq.REQ)
+    sock.linger = 0
+    sock.rcvtimeo = int(timeout_ms)
+    sock.sndtimeo = int(timeout_ms)
+    sock.connect(master_endpoint)
+    try:
+        sock.send_pyobj({"type": "handshake", "stage_id": int(stage_id)})
+        resp = sock.recv_pyobj()
+    finally:
+        try:
+            sock.close(0)
+        except Exception:
+            pass
+
+    if not isinstance(resp, dict):
+        raise RuntimeError(f"Invalid handshake response: {type(resp)}")
+
+    in_spec_data = resp.get("in_spec")
+    out_spec_data = resp.get("out_spec")
+
+    if in_spec_data is None or out_spec_data is None:
+        raise RuntimeError(f"Handshake response missing specs: {resp}")
+
+    # Convert to ZmqQueueSpec if needed
+    if isinstance(in_spec_data, ZmqQueueSpec):
+        in_spec = in_spec_data
+    elif isinstance(in_spec_data, dict):
+        in_spec = ZmqQueueSpec(**in_spec_data)
+    else:
+        raise RuntimeError(f"Invalid in_spec type: {type(in_spec_data)}")
+
+    if isinstance(out_spec_data, ZmqQueueSpec):
+        out_spec = out_spec_data
+    elif isinstance(out_spec_data, dict):
+        out_spec = ZmqQueueSpec(**out_spec_data)
+    else:
+        raise RuntimeError(f"Invalid out_spec type: {type(out_spec_data)}")
+
+    return in_spec, out_spec