44from __future__ import annotations
55
66import inspect
7+ import logging
8+ import re
9+ import shutil
710import socket
11+ import tempfile
812from pathlib import Path
913from typing import Any
1014
1418from sglang_omni_v1 .pipeline .stage .input import InputHandler
1519from sglang_omni_v1 .utils import import_string
1620
21+ logger = logging .getLogger (__name__ )
1722
18- def compile_pipeline (config : PipelineConfig ) -> tuple [Coordinator , list [Stage ]]:
19- """Build the coordinator and stage objects from the pipeline configuration."""
20- stages_cfg , name_map , entry_stage = config .apply_fusion ()
21- endpoints = _allocate_endpoints (config , stages = stages_cfg )
2223
23- coordinator = Coordinator (
24- completion_endpoint = endpoints [ "completion" ],
25- abort_endpoint = endpoints [ "abort" ],
26- entry_stage = entry_stage ,
27- terminal_stages = config . terminal_stages or None ,
28- )
24+ class IpcRuntimeDir :
25+ """Runtime-owned IPC directory for one pipeline instance."""
26+
27+ def __init__ ( self , path : Path ):
28+ self . path = path
29+ self . _closed = False
2930
30- stage_endpoints = {s .name : endpoints [f"stage_{ s .name } " ] for s in stages_cfg }
31+ def __enter__ (self ) -> IpcRuntimeDir :
32+ return self
3133
32- stages : list [Stage ] = []
33- for stage_cfg in stages_cfg :
34- stage = _compile_stage (
35- stage_cfg , config , stage_endpoints , endpoints , name_map = name_map
34+ def __exit__ (self , exc_type , exc , tb ) -> None :
35+ self .close ()
36+
37+ def close (self ) -> None :
38+ if self ._closed :
39+ return
40+ self ._closed = True
41+ try :
42+ shutil .rmtree (self .path )
43+ except FileNotFoundError :
44+ return
45+ except OSError as exc :
46+ logger .warning ("Failed to remove IPC runtime dir %s: %s" , self .path , exc )
47+
48+
49+ def create_ipc_runtime_dir (config : PipelineConfig ) -> IpcRuntimeDir | None :
50+ """Create a per-run IPC namespace for one pipeline instance."""
51+ if config .endpoints .scheme != "ipc" :
52+ return None
53+
54+ base_root = Path (config .endpoints .base_path )
55+ base_root .mkdir (parents = True , exist_ok = True )
56+
57+ namespace_prefix = re .sub (r"[^0-9a-z]+" , "-" , config .name .lower ()).strip ("-" )
58+ if not namespace_prefix :
59+ namespace_prefix = "pipeline"
60+ path = Path (tempfile .mkdtemp (prefix = f"{ namespace_prefix } -" , dir = base_root ))
61+ return IpcRuntimeDir (path )
62+
63+
64+ def prepare_pipeline_runtime (
65+ config : PipelineConfig ,
66+ * ,
67+ ipc_runtime_dir : IpcRuntimeDir | None = None ,
68+ ) -> tuple [
69+ list [StageConfig ], dict [str , str ], str , dict [str , str ], IpcRuntimeDir | None , bool
70+ ]:
71+ """Prepare fused stages and endpoint allocation for one runtime."""
72+ runtime_dir = ipc_runtime_dir
73+ created_runtime_dir = None
74+ if runtime_dir is None :
75+ runtime_dir = create_ipc_runtime_dir (config )
76+ created_runtime_dir = runtime_dir
77+ owns_runtime_dir = created_runtime_dir is not None
78+
79+ try :
80+ stages_cfg , name_map , entry_stage = config .apply_fusion ()
81+ endpoints = _allocate_endpoints (
82+ config ,
83+ stages = stages_cfg ,
84+ ipc_base_dir = runtime_dir .path if runtime_dir else None ,
3685 )
37- coordinator .register_stage (stage .name , stage .control_plane .recv_endpoint )
38- stages .append (stage )
39-
40- # Wire streaming targets
41- stage_map = {stage .name : stage for stage in stages }
42- cfg_map = {s .name : s for s in stages_cfg }
43- for stage_cfg in stages_cfg :
44- stage = stage_map .get (stage_cfg .name )
45- if stage is not None :
86+ except Exception :
87+ if created_runtime_dir is not None :
88+ created_runtime_dir .close ()
89+ raise
90+
91+ return stages_cfg , name_map , entry_stage , endpoints , runtime_dir , owns_runtime_dir
92+
93+
94+ def compile_pipeline_core (
95+ config : PipelineConfig ,
96+ * ,
97+ ipc_runtime_dir : IpcRuntimeDir | None = None ,
98+ ) -> tuple [Coordinator , list [Stage ], IpcRuntimeDir | None ]:
99+ """Build the coordinator and stage objects from the pipeline configuration."""
100+ stages_cfg , name_map , entry_stage , endpoints , runtime_dir , owns_runtime_dir = (
101+ prepare_pipeline_runtime (
102+ config ,
103+ ipc_runtime_dir = ipc_runtime_dir ,
104+ )
105+ )
106+
107+ try :
108+ coordinator = Coordinator (
109+ completion_endpoint = endpoints ["completion" ],
110+ abort_endpoint = endpoints ["abort" ],
111+ entry_stage = entry_stage ,
112+ terminal_stages = config .terminal_stages or None ,
113+ )
114+
115+ stage_endpoints = {s .name : endpoints [f"stage_{ s .name } " ] for s in stages_cfg }
116+
117+ stages : list [Stage ] = []
118+ for stage_cfg in stages_cfg :
119+ stage = _compile_stage (
120+ stage_cfg , config , stage_endpoints , endpoints , name_map = name_map
121+ )
122+ coordinator .register_stage (stage .name , stage .control_plane .recv_endpoint )
123+ stages .append (stage )
124+
125+ stage_map = {stage .name : stage for stage in stages }
126+ cfg_map = {s .name : s for s in stages_cfg }
127+ for stage_cfg in stages_cfg :
128+ stage = stage_map .get (stage_cfg .name )
129+ if stage is None :
130+ continue
46131 _wire_stream_targets (
47132 stage ,
48133 stage_cfg ,
49134 stage_map ,
50135 gpu_placement = config .gpu_placement ,
51136 cfg_map = cfg_map ,
52137 )
138+ except Exception :
139+ if owns_runtime_dir and runtime_dir is not None :
140+ runtime_dir .close ()
141+ raise
142+
143+ return coordinator , stages , runtime_dir
144+
145+
146+ def compile_pipeline (config : PipelineConfig ) -> tuple [Coordinator , list [Stage ]]:
147+ """Build coordinator and stages directly from a pipeline config.
148+
149+ IPC pipelines need explicit runtime-directory ownership so multiple
150+ replicas cannot bind the same local sockets.
151+ """
152+ if config .endpoints .scheme == "ipc" :
153+ raise ValueError (
154+ "compile_pipeline() does not manage IPC runtime-dir ownership. "
155+ "Use compile_pipeline_core(...) or MultiProcessPipelineRunner."
156+ )
53157
158+ coordinator , stages , _ = compile_pipeline_core (config )
54159 return coordinator , stages
55160
56161
@@ -252,6 +357,7 @@ def _allocate_endpoints(
252357 config : PipelineConfig ,
253358 * ,
254359 stages : list [StageConfig ],
360+ ipc_base_dir : Path | None = None ,
255361) -> dict [str , str ]:
256362 endpoints : dict [str , str ] = {}
257363
@@ -261,8 +367,9 @@ def _allocate_endpoints(
261367 endpoints ["abort" ] = config .abort_endpoint
262368
263369 if config .endpoints .scheme == "ipc" :
264- base_dir = Path (config .endpoints .base_path ) / config .name
265- base_dir .mkdir (parents = True , exist_ok = True )
370+ if ipc_base_dir is None :
371+ raise ValueError ("IPC endpoint allocation requires an IPC runtime dir" )
372+ base_dir = ipc_base_dir
266373 endpoints .setdefault ("completion" , f"ipc://{ base_dir } /completion.sock" )
267374 endpoints .setdefault ("abort" , f"ipc://{ base_dir } /abort.sock" )
268375 for s in stages :
0 commit comments