Skip to content

Commit 5222aa7

Browse files
committed
refactor(multimodal): tighten review comments, dedup SHM probe name
Trim the rationale comments from the prior two review commits down to the non-obvious "why" only, and reuse next_mm_shm_name() for the /dev/shm writability probe instead of duplicating the unix-nanos block. Signed-off-by: Simo Lin <linsimo.mark@gmail.com>
1 parent 5c72d3d commit 5222aa7

4 files changed

Lines changed: 9 additions & 22 deletions

File tree

grpc_servicer/smg_grpc_servicer/mm_shm.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,8 @@ def tensor_payload_bytes_from_shm(shm_handle, shm_dir: str = DEFAULT_SHM_DIR) ->
4848
path = os.path.join(shm_dir, name)
4949
fd = None
5050
try:
51-
# O_NOFOLLOW: /dev/shm is world-writable, so a same-host attacker could
52-
# plant a symlink at the (validated) name pointing at an arbitrary file;
53-
# refuse to follow it so a crafted handle can't read/unlink outside SHM.
51+
# O_NOFOLLOW: /dev/shm is world-writable; refuse to follow a symlink planted
52+
# at the validated name (would otherwise read/unlink an arbitrary file).
5453
fd = os.open(path, os.O_RDONLY | os.O_NOFOLLOW)
5554
raw = os.pread(fd, int(shm_handle.nbytes), int(shm_handle.offset))
5655
finally:

model_gateway/src/routers/grpc/common/stages/request_execution.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -569,9 +569,8 @@ impl RequestExecutionStage {
569569
debug!("vLLM PD: prefill completed, sending decode request");
570570

571571
// Decode reuses proto_request as-is; same request_id as the prefill leg is
572-
// load-bearing for NIXL P/D correlation on vLLM < 0.13. Multimodal stays
573-
// inline for PD (see resolve_mm_shm_enabled), so the decode leg can still
574-
// recompute the prompt locally when KV relay is skipped (e.g. n>1 NIXL).
572+
// load-bearing for NIXL P/D correlation on vLLM < 0.13. Multimodal is inline
573+
// for PD, so decode keeps its inputs for the recompute path.
575574
let mut decode_request = proto_request;
576575
if let Some(rank) = workers.decode_worker().and_then(|w| w.dp_rank()) {
577576
decode_request.set_data_parallel_rank(rank as i32);

model_gateway/src/routers/grpc/multimodal.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1750,20 +1750,15 @@ fn resolve_mm_shm_enabled(
17501750
transport: Option<&MmTransportConfig>,
17511751
) -> bool {
17521752
log_mm_transport_config_once(transport);
1753-
// SHM is single-consumer (the servicer unlinks each segment after reading),
1754-
// but a PD request has two legs: prefill always reads, and decode re-reads
1755-
// whenever it recomputes the prompt instead of resuming from transferred KV.
1756-
// Force inline for disaggregated requests to avoid both the double-read and
1757-
// dropping pixels the decode leg still needs.
1753+
// SHM segments are single-consumer (unlinked after one read); a PD decode leg
1754+
// may re-read or recompute, so force inline for disaggregated requests.
17581755
if matches!(workers, Some(WorkerSelection::Dual { .. })) {
17591756
return false;
17601757
}
17611758
let global_mode = transport
17621759
.map(|transport| transport.mode.as_str())
17631760
.unwrap_or(DEFAULT_MM_TENSOR_TRANSPORT);
1764-
// A per-worker override wins, but only when it's a recognized mode; an
1765-
// invalid override is logged and ignored so a typo can't silently disable a
1766-
// valid global shm/auto.
1761+
// Ignore an unrecognized per-worker override so a typo can't disable a valid global.
17671762
let mode = match worker_transport_mode_override(workers) {
17681763
Some(mode) if matches!(mode.as_str(), "inline" | "shm" | "auto") => mode,
17691764
Some(invalid) => {

model_gateway/src/routers/grpc/proto_wrapper.rs

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -503,14 +503,8 @@ fn write_mm_shm(data: &[u8]) -> std::io::Result<common::ShmHandle> {
503503
pub fn mm_shm_dev_writable() -> bool {
504504
static WRITABLE: OnceLock<bool> = OnceLock::new();
505505
*WRITABLE.get_or_init(|| {
506-
// pid alone collides across separate PID namespaces that share /dev/shm
507-
// (each can be PID 1); add a nanosecond stamp so the `create_new` probe
508-
// doesn't spuriously fail and cache `false` for a writable /dev/shm.
509-
let nanos = SystemTime::now()
510-
.duration_since(UNIX_EPOCH)
511-
.map(|duration| duration.as_nanos())
512-
.unwrap_or_default();
513-
let name = format!("{}probe-{}-{}", MM_SHM_NAME_PREFIX, process::id(), nanos);
506+
// Unique name; pid alone collides across PID namespaces sharing /dev/shm.
507+
let name = next_mm_shm_name();
514508
let path = mm_shm_path(&name);
515509
// `create_new` (no clobber) + owner-only mode: /dev/shm is world-writable,
516510
// so plain create(truncate) is open to symlink/clobber attacks and the

0 commit comments

Comments
 (0)