refactor(worker): drop local get_allowed_cpu_core_node_list, use vLLM 0.22 helpers

rebel-jinhwan · claude · rebel-eunji · commit c8d20c6a862c · 2026-06-11T10:22:23.000+09:00
vLLM 0.22 provides get_visible_memory_node()/get_allowed_cpu_list() in
vllm.utils.cpu_resource_utils, so the local lscpu-based reimplementation
is no longer needed. Update tests to patch the new injection points.

Co-Authored-By: Claude Fable 5 &lt;noreply@anthropic.com&gt;
diff --git a/tests/torch_compile/unit/v1/worker/test_utils.py b/tests/torch_compile/unit/v1/worker/test_utils.py
@@ -406,10 +406,12 @@ def _simple_cpu_list(self):
             _make_cpu(7, 3, 1),  # NUMA 1, core 3
         ]
 
-    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
-    def test_basic_single_rank(self, mock_get_allowed_cpus):
+    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
+    @patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
+    def test_basic_single_rank(self, mock_nodes, mock_cpus):
         cpus = self._simple_cpu_list()
-        mock_get_allowed_cpus.return_value = ([0, 1], cpus)
+        mock_nodes.return_value = [0, 1]
+        mock_cpus.return_value = cpus
 
         parallel_cfg = _make_parallel_config(tp_size=1)
         result = get_autobind_cpu_ids(
@@ -425,10 +427,12 @@ def test_basic_single_rank(self, mock_get_allowed_cpus):
             any(c.id == cid and c.numa_node == 0 for c in cpus) for cid in cpu_ids
         )
 
-    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
-    def test_rank_round_robins_numa_nodes(self, mock_get_allowed_cpus):
+    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
+    @patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
+    def test_rank_round_robins_numa_nodes(self, mock_nodes, mock_cpus):
         cpus = self._simple_cpu_list()
-        mock_get_allowed_cpus.return_value = ([0, 1], cpus)
+        mock_nodes.return_value = [0, 1]
+        mock_cpus.return_value = cpus
         parallel_cfg = _make_parallel_config(tp_size=2)
 
         r0 = get_autobind_cpu_ids(0, 0, parallel_cfg, lambda cpus: cpus)
@@ -439,20 +443,24 @@ def test_rank_round_robins_numa_nodes(self, mock_get_allowed_cpus):
         r1_ids = set(int(x) for x in r1.split(","))
         assert r0_ids.isdisjoint(r1_ids), "Ranks should not share CPUs"
 
-    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
-    def test_no_available_numa_returns_all(self, mock_get_allowed_cpus):
+    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
+    @patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
+    def test_no_available_numa_returns_all(self, mock_nodes, mock_cpus):
         """If allowed NUMA nodes don't have CPUs, return 'all'."""
-        mock_get_allowed_cpus.return_value = ([], [])
+        mock_nodes.return_value = []
+        mock_cpus.return_value = []
 
         parallel_cfg = _make_parallel_config()
         result = get_autobind_cpu_ids(0, 0, parallel_cfg, lambda cpus: cpus)
         assert result == "all"
 
-    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
-    def test_cpu_selector_filters_threads(self, mock_get_allowed_cpus):
+    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
+    @patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
+    def test_cpu_selector_filters_threads(self, mock_nodes, mock_cpus):
         """cpu_selector=lambda cpus: cpus[:1] should pick one thread per core."""
         cpus = self._simple_cpu_list()
-        mock_get_allowed_cpus.return_value = ([0, 1], cpus)
+        mock_nodes.return_value = [0, 1]
+        mock_cpus.return_value = cpus
 
         parallel_cfg = _make_parallel_config(tp_size=1)
         result = get_autobind_cpu_ids(
@@ -465,12 +473,14 @@ def test_cpu_selector_filters_threads(self, mock_get_allowed_cpus):
         # NUMA 0 has 2 cores, should get 2 CPUs (one per core)
         assert len(cpu_ids) == 2
 
-    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
-    def test_multiple_ranks_same_numa_exclusive_allocation(self, mock_get_allowed_cpus):
+    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
+    @patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
+    def test_multiple_ranks_same_numa_exclusive_allocation(self, mock_nodes, mock_cpus):
         """When 2 ranks map to the same NUMA node, CPUs are split."""
         # Single NUMA node with 4 cores, 1 thread each
         cpus = [_make_cpu(i, i, 0) for i in range(4)]
-        mock_get_allowed_cpus.return_value = ([0], cpus)
+        mock_nodes.return_value = [0]
+        mock_cpus.return_value = cpus
 
         parallel_cfg = _make_parallel_config(tp_size=2)
 
@@ -482,11 +492,13 @@ def test_multiple_ranks_same_numa_exclusive_allocation(self, mock_get_allowed_cp
         assert r0_ids.isdisjoint(r1_ids)
         assert len(r0_ids) + len(r1_ids) == 4
 
-    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
-    def test_uneven_cpu_split(self, mock_get_allowed_cpus):
+    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
+    @patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
+    def test_uneven_cpu_split(self, mock_nodes, mock_cpus):
         """3 CPUs split between 2 ranks: one gets 2, other gets 1."""
         cpus = [_make_cpu(i, i, 0) for i in range(3)]
-        mock_get_allowed_cpus.return_value = ([0], cpus)
+        mock_nodes.return_value = [0]
+        mock_cpus.return_value = cpus
 
         parallel_cfg = _make_parallel_config(tp_size=2)
 
@@ -497,11 +509,13 @@ def test_uneven_cpu_split(self, mock_get_allowed_cpus):
         r1_count = len(r1.split(","))
         assert {r0_count, r1_count} == {1, 2}
 
-    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
-    def test_dp_rank_affects_binding(self, mock_get_allowed_cpus):
+    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
+    @patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
+    def test_dp_rank_affects_binding(self, mock_nodes, mock_cpus):
         """Data parallelism changes rank_across_dp calculation."""
         cpus = [_make_cpu(i, i, 0) for i in range(8)]
-        mock_get_allowed_cpus.return_value = ([0], cpus)
+        mock_nodes.return_value = [0]
+        mock_cpus.return_value = cpus
 
         dp_cfg = SimpleNamespace(
             tensor_parallel_size=1,
@@ -517,11 +531,13 @@ def test_dp_rank_affects_binding(self, mock_get_allowed_cpus):
         # With single NUMA node, both ranks share, so rank 1 gets second half
         assert len(cpu_ids) == 4
 
-    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_core_node_list")
-    def test_empty_allocation_returns_all(self, mock_get_allowed_cpus):
+    @patch("vllm_rbln.v1.worker.utils.get_allowed_cpu_list")
+    @patch("vllm_rbln.v1.worker.utils.get_visible_memory_node")
+    def test_empty_allocation_returns_all(self, mock_nodes, mock_cpus):
         """If cpu_selector returns empty lists, should fallback to 'all'."""
         cpus = [_make_cpu(0, 0, 0)]
-        mock_get_allowed_cpus.return_value = ([0], cpus)
+        mock_nodes.return_value = [0]
+        mock_cpus.return_value = cpus
 
         # 2 ranks but only 1 CPU in the only NUMA node
         parallel_cfg = _make_parallel_config(tp_size=2)
diff --git a/vllm_rbln/v1/worker/utils.py b/vllm_rbln/v1/worker/utils.py
@@ -24,8 +24,11 @@
 from vllm.config import ModelConfig, ParallelConfig
 from vllm.model_executor.models.utils import extract_layer_index
 from vllm.platforms import CpuArchEnum, current_platform
-from vllm.platforms.cpu import CpuPlatform
-from vllm.utils.cpu_resource_utils import LogicalCPUInfo
+from vllm.utils.cpu_resource_utils import (
+    LogicalCPUInfo,
+    get_allowed_cpu_list,
+    get_visible_memory_node,
+)
 from vllm.v1.worker.block_table import MultiGroupBlockTable
 
 import vllm_rbln.rbln_envs as envs
@@ -268,53 +271,6 @@ def check_oom(available_dram_bytes: int) -> None:
     return available_dram_bytes
 
 
-# NOTE(RBLN): CpuPlatform.get_allowed_cpu_core_node_list() was removed in
-# https://github.com/vllm-project/vllm/pull/36487. Reimplemented here so
-# get_autobind_cpu_ids() can still resolve allowed NUMA nodes and logical CPUs.
-def get_allowed_cpu_core_node_list() -> tuple[list[int], list[LogicalCPUInfo]]:
-    import json
-    import re
-    import subprocess
-
-    assert platform.system() == "Linux"
-
-    # Init LogicalCPUInfo from lscpu
-    lscpu_output = subprocess.check_output(
-        "lscpu -J -e=CPU,CORE,NODE", shell=True, text=True
-    )
-    lscpu_output = re.sub(r'"node":\s*-\s*(,|\n)', r'"node": 0\1', lscpu_output)
-    logical_cpu_list: list[LogicalCPUInfo] = json.loads(
-        lscpu_output, object_hook=LogicalCPUInfo.json_decoder
-    )["cpus"]
-
-    # Filter CPUs with invalid attributes
-    logical_cpu_list = [
-        x for x in logical_cpu_list if -1 not in (x.id, x.physical_core, x.numa_node)
-    ]
-
-    # Filter allowed CPUs
-    if hasattr(os, "sched_getaffinity"):
-        allowed_cpu_id_list = os.sched_getaffinity(0)
-    else:
-        raise NotImplementedError("Unsupported OS")
-    logical_cpu_list = [x for x in logical_cpu_list if x.id in allowed_cpu_id_list]
-
-    # Get allowed NUMA nodes
-    allowed_numa_nodes = set()
-    for x in logical_cpu_list:
-        allowed_numa_nodes.add(x.numa_node)  # type: ignore
-    allowed_numa_nodes_list = sorted(allowed_numa_nodes)
-
-    env_key = CpuPlatform.device_control_env_var
-    if env_key in os.environ and os.environ[env_key] != "":
-        visible_nodes = [int(s) for s in os.environ[env_key].split(",")]
-        allowed_numa_nodes_list = [
-            x for x in sorted(list(set(visible_nodes))) if x in allowed_numa_nodes
-        ]
-
-    return allowed_numa_nodes_list, logical_cpu_list
-
-
 def get_autobind_cpu_ids(
     rank: int,
     local_rank: int,
@@ -332,7 +288,8 @@ def get_autobind_cpu_ids(
     Returns:
         Comma-separated string of CPU IDs, or "all" or "nobind".
     """
-    allowed_numa_nodes, logical_cpu_list = get_allowed_cpu_core_node_list()
+    allowed_numa_nodes = get_visible_memory_node()
+    logical_cpu_list = get_allowed_cpu_list()
 
     # Calculate rank_across_dp for CPU binding
     # This ensures different DP groups get different CPU allocations