Skip to content

Commit c8d20c6

Browse files
rebel-jinhwanclaude
authored andcommitted
refactor(worker): drop local get_allowed_cpu_core_node_list, use vLLM 0.22 helpers
vLLM 0.22 provides get_visible_memory_node()/get_allowed_cpu_list() in vllm.utils.cpu_resource_utils, so the local lscpu-based reimplementation is no longer needed. Update tests to patch the new injection points. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
1 parent b607c6e commit c8d20c6

2 files changed

Lines changed: 47 additions & 74 deletions

File tree

tests/torch_compile/unit/v1/worker/test_utils.py

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -406,10 +406,12 @@ def _simple_cpu_list(self):
406406
_make_cpu(7, 3, 1), # NUMA 1, core 3
407407
]
408408

409-
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
410-
def test_basic_single_rank(self, mock_get_allowed_cpus):
409+
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
410+
@patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
411+
def test_basic_single_rank(self, mock_nodes, mock_cpus):
411412
cpus = self._simple_cpu_list()
412-
mock_get_allowed_cpus.return_value = ([0, 1], cpus)
413+
mock_nodes.return_value = [0, 1]
414+
mock_cpus.return_value = cpus
413415

414416
parallel_cfg = _make_parallel_config(tp_size=1)
415417
result = get_autobind_cpu_ids(
@@ -425,10 +427,12 @@ def test_basic_single_rank(self, mock_get_allowed_cpus):
425427
any(c.id == cid and c.numa_node == 0 for c in cpus) for cid in cpu_ids
426428
)
427429

428-
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
429-
def test_rank_round_robins_numa_nodes(self, mock_get_allowed_cpus):
430+
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
431+
@patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
432+
def test_rank_round_robins_numa_nodes(self, mock_nodes, mock_cpus):
430433
cpus = self._simple_cpu_list()
431-
mock_get_allowed_cpus.return_value = ([0, 1], cpus)
434+
mock_nodes.return_value = [0, 1]
435+
mock_cpus.return_value = cpus
432436
parallel_cfg = _make_parallel_config(tp_size=2)
433437

434438
r0 = get_autobind_cpu_ids(0, 0, parallel_cfg, lambda cpus: cpus)
@@ -439,20 +443,24 @@ def test_rank_round_robins_numa_nodes(self, mock_get_allowed_cpus):
439443
r1_ids = set(int(x) for x in r1.split(","))
440444
assert r0_ids.isdisjoint(r1_ids), "Ranks should not share CPUs"
441445

442-
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
443-
def test_no_available_numa_returns_all(self, mock_get_allowed_cpus):
446+
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
447+
@patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
448+
def test_no_available_numa_returns_all(self, mock_nodes, mock_cpus):
444449
"""If allowed NUMA nodes don't have CPUs, return 'all'."""
445-
mock_get_allowed_cpus.return_value = ([], [])
450+
mock_nodes.return_value = []
451+
mock_cpus.return_value = []
446452

447453
parallel_cfg = _make_parallel_config()
448454
result = get_autobind_cpu_ids(0, 0, parallel_cfg, lambda cpus: cpus)
449455
assert result == "all"
450456

451-
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
452-
def test_cpu_selector_filters_threads(self, mock_get_allowed_cpus):
457+
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
458+
@patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
459+
def test_cpu_selector_filters_threads(self, mock_nodes, mock_cpus):
453460
"""cpu_selector=lambda cpus: cpus[:1] should pick one thread per core."""
454461
cpus = self._simple_cpu_list()
455-
mock_get_allowed_cpus.return_value = ([0, 1], cpus)
462+
mock_nodes.return_value = [0, 1]
463+
mock_cpus.return_value = cpus
456464

457465
parallel_cfg = _make_parallel_config(tp_size=1)
458466
result = get_autobind_cpu_ids(
@@ -465,12 +473,14 @@ def test_cpu_selector_filters_threads(self, mock_get_allowed_cpus):
465473
# NUMA 0 has 2 cores, should get 2 CPUs (one per core)
466474
assert len(cpu_ids) == 2
467475

468-
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
469-
def test_multiple_ranks_same_numa_exclusive_allocation(self, mock_get_allowed_cpus):
476+
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
477+
@patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
478+
def test_multiple_ranks_same_numa_exclusive_allocation(self, mock_nodes, mock_cpus):
470479
"""When 2 ranks map to the same NUMA node, CPUs are split."""
471480
# Single NUMA node with 4 cores, 1 thread each
472481
cpus = [_make_cpu(i, i, 0) for i in range(4)]
473-
mock_get_allowed_cpus.return_value = ([0], cpus)
482+
mock_nodes.return_value = [0]
483+
mock_cpus.return_value = cpus
474484

475485
parallel_cfg = _make_parallel_config(tp_size=2)
476486

@@ -482,11 +492,13 @@ def test_multiple_ranks_same_numa_exclusive_allocation(self, mock_get_allowed_cp
482492
assert r0_ids.isdisjoint(r1_ids)
483493
assert len(r0_ids) + len(r1_ids) == 4
484494

485-
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
486-
def test_uneven_cpu_split(self, mock_get_allowed_cpus):
495+
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
496+
@patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
497+
def test_uneven_cpu_split(self, mock_nodes, mock_cpus):
487498
"""3 CPUs split between 2 ranks: one gets 2, other gets 1."""
488499
cpus = [_make_cpu(i, i, 0) for i in range(3)]
489-
mock_get_allowed_cpus.return_value = ([0], cpus)
500+
mock_nodes.return_value = [0]
501+
mock_cpus.return_value = cpus
490502

491503
parallel_cfg = _make_parallel_config(tp_size=2)
492504

@@ -497,11 +509,13 @@ def test_uneven_cpu_split(self, mock_get_allowed_cpus):
497509
r1_count = len(r1.split(","))
498510
assert {r0_count, r1_count} == {1, 2}
499511

500-
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
501-
def test_dp_rank_affects_binding(self, mock_get_allowed_cpus):
512+
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
513+
@patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
514+
def test_dp_rank_affects_binding(self, mock_nodes, mock_cpus):
502515
"""Data parallelism changes rank_across_dp calculation."""
503516
cpus = [_make_cpu(i, i, 0) for i in range(8)]
504-
mock_get_allowed_cpus.return_value = ([0], cpus)
517+
mock_nodes.return_value = [0]
518+
mock_cpus.return_value = cpus
505519

506520
dp_cfg = SimpleNamespace(
507521
tensor_parallel_size=1,
@@ -517,11 +531,13 @@ def test_dp_rank_affects_binding(self, mock_get_allowed_cpus):
517531
# With single NUMA node, both ranks share, so rank 1 gets second half
518532
assert len(cpu_ids) == 4
519533

520-
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
521-
def test_empty_allocation_returns_all(self, mock_get_allowed_cpus):
534+
@patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
535+
@patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
536+
def test_empty_allocation_returns_all(self, mock_nodes, mock_cpus):
522537
"""If cpu_selector returns empty lists, should fallback to 'all'."""
523538
cpus = [_make_cpu(0, 0, 0)]
524-
mock_get_allowed_cpus.return_value = ([0], cpus)
539+
mock_nodes.return_value = [0]
540+
mock_cpus.return_value = cpus
525541

526542
# 2 ranks but only 1 CPU in the only NUMA node
527543
parallel_cfg = _make_parallel_config(tp_size=2)

vllm_rbln/v1/worker/utils.py

Lines changed: 7 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,11 @@
2424
from vllm.config import ModelConfig, ParallelConfig
2525
from vllm.model_executor.models.utils import extract_layer_index
2626
from vllm.platforms import CpuArchEnum, current_platform
27-
from vllm.platforms.cpu import CpuPlatform
28-
from vllm.utils.cpu_resource_utils import LogicalCPUInfo
27+
from vllm.utils.cpu_resource_utils import (
28+
LogicalCPUInfo,
29+
get_allowed_cpu_list,
30+
get_visible_memory_node,
31+
)
2932
from vllm.v1.worker.block_table import MultiGroupBlockTable
3033

3134
import vllm_rbln.rbln_envs as envs
@@ -268,53 +271,6 @@ def check_oom(available_dram_bytes: int) -> None:
268271
return available_dram_bytes
269272

270273

271-
# NOTE(RBLN): CpuPlatform.get_allowed_cpu_core_node_list() was removed in
272-
# https://github.com/vllm-project/vllm/pull/36487. Reimplemented here so
273-
# get_autobind_cpu_ids() can still resolve allowed NUMA nodes and logical CPUs.
274-
def get_allowed_cpu_core_node_list() -> tuple[list[int], list[LogicalCPUInfo]]:
275-
import json
276-
import re
277-
import subprocess
278-
279-
assert platform.system() == "Linux"
280-
281-
# Init LogicalCPUInfo from lscpu
282-
lscpu_output = subprocess.check_output(
283-
"lscpu -J -e=CPU,CORE,NODE", shell=True, text=True
284-
)
285-
lscpu_output = re.sub(r'"node":\s*-\s*(,|\n)', r'"node": 0\1', lscpu_output)
286-
logical_cpu_list: list[LogicalCPUInfo] = json.loads(
287-
lscpu_output, object_hook=LogicalCPUInfo.json_decoder
288-
)["cpus"]
289-
290-
# Filter CPUs with invalid attributes
291-
logical_cpu_list = [
292-
x for x in logical_cpu_list if -1 not in (x.id, x.physical_core, x.numa_node)
293-
]
294-
295-
# Filter allowed CPUs
296-
if hasattr(os, "sched_getaffinity"):
297-
allowed_cpu_id_list = os.sched_getaffinity(0)
298-
else:
299-
raise NotImplementedError("Unsupported OS")
300-
logical_cpu_list = [x for x in logical_cpu_list if x.id in allowed_cpu_id_list]
301-
302-
# Get allowed NUMA nodes
303-
allowed_numa_nodes = set()
304-
for x in logical_cpu_list:
305-
allowed_numa_nodes.add(x.numa_node) # type: ignore
306-
allowed_numa_nodes_list = sorted(allowed_numa_nodes)
307-
308-
env_key = CpuPlatform.device_control_env_var
309-
if env_key in os.environ and os.environ[env_key] != "":
310-
visible_nodes = [int(s) for s in os.environ[env_key].split(",")]
311-
allowed_numa_nodes_list = [
312-
x for x in sorted(list(set(visible_nodes))) if x in allowed_numa_nodes
313-
]
314-
315-
return allowed_numa_nodes_list, logical_cpu_list
316-
317-
318274
def get_autobind_cpu_ids(
319275
rank: int,
320276
local_rank: int,
@@ -332,7 +288,8 @@ def get_autobind_cpu_ids(
332288
Returns:
333289
Comma-separated string of CPU IDs, or "all" or "nobind".
334290
"""
335-
allowed_numa_nodes, logical_cpu_list = get_allowed_cpu_core_node_list()
291+
allowed_numa_nodes = get_visible_memory_node()
292+
logical_cpu_list = get_allowed_cpu_list()
336293

337294
# Calculate rank_across_dp for CPU binding
338295
# This ensures different DP groups get different CPU allocations

0 commit comments

Comments
 (0)