Skip to content

Commit 64529b2

Browse files
rapsealkclaude
andcommitted
refactor(agent): Resolve remaining review comments on CpusetMems pinning
- Query libnuma.node_of_cpu() only for allocated cores instead of calling self.list_devices(), which iterates every core on the host on each container start. Drops per-start overhead from O(host_cores) to O(allocated_cores). - Reword the CpusetMems comment to reflect that omission for multi-node allocations is a deliberate policy choice (defer to kernel default NUMA placement) rather than a Docker HostConfig API limitation. - Update the NUMA-locality unit tests to patch libnuma.node_of_cpu directly, matching the new implementation seam. Refs #11217 Refs #11222 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent e4ba8ee commit 64529b2

2 files changed

Lines changed: 36 additions & 64 deletions

File tree

src/ai/backend/agent/docker/intrinsic.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -437,17 +437,16 @@ async def generate_docker_args(
437437
"Cpus": len(cores),
438438
"CpusetCpus": ",".join(sorted_core_ids),
439439
}
440-
devices = await self.list_devices()
441-
core_to_node = {int(dev.device_id): dev.numa_node for dev in devices}
442440
allocated_nodes: set[int] = set()
443441
for core in cores:
444-
node = core_to_node.get(core)
445-
if node is None or node < 0:
442+
node = libnuma.node_of_cpu(core)
443+
if node < 0:
446444
allocated_nodes.clear()
447445
break
448446
allocated_nodes.add(node)
449-
# Pin memory to the NUMA node only when the allocation is fully node-local;
450-
# Docker does not expose a multi-node cpuset.mems equivalent via HostConfig.
447+
# Pin memory only when the CPU allocation is fully node-local.
448+
# For multi-node CPU allocations, intentionally leave CpusetMems unset
449+
# so Docker/kernel default NUMA memory placement policy can apply.
451450
if len(allocated_nodes) == 1:
452451
host_config["CpusetMems"] = str(next(iter(allocated_nodes)))
453452
return {

tests/unit/agent/test_docker_intrinsic.py

Lines changed: 31 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,12 @@
1313

1414
from ai.backend.agent.docker.intrinsic import (
1515
ContainerNetStat,
16-
CPUDevice,
1716
CPUPlugin,
1817
MemoryPlugin,
1918
read_proc_net_dev,
2019
)
2120
from ai.backend.agent.stats import StatModes
22-
from ai.backend.common.types import DeviceId, DeviceName, SlotName
21+
from ai.backend.common.types import DeviceId, SlotName
2322

2423

2524
class BaseDockerIntrinsicTest:
@@ -625,36 +624,31 @@ def cpu_plugin(self) -> CPUPlugin:
625624
plugin._docker = AsyncMock()
626625
return plugin
627626

628-
@staticmethod
629-
def _make_device(core_id: int, numa_node: int | None) -> CPUDevice:
630-
return CPUDevice(
631-
device_id=DeviceId(str(core_id)),
632-
hw_location="root",
633-
memory_size=0,
634-
processing_units=1,
635-
numa_node=numa_node,
636-
device_name=DeviceName("cpu"),
637-
)
638-
639627
@staticmethod
640628
def _device_alloc(core_ids: list[int]) -> dict[SlotName, dict[DeviceId, Decimal]]:
641629
return {
642630
SlotName("cpu"): {DeviceId(str(cid)): Decimal("1") for cid in core_ids},
643631
}
644632

633+
@staticmethod
634+
@contextmanager
635+
def _patch_node_of_cpu(core_to_node: dict[int, int]) -> Generator[None, None, None]:
636+
"""Patch libnuma.node_of_cpu; return -1 for any core missing from the map
637+
(matches real libnuma's behavior for unknown cores when NUMA info is
638+
unavailable)."""
639+
with patch(
640+
"ai.backend.agent.docker.intrinsic.libnuma.node_of_cpu",
641+
side_effect=lambda core: core_to_node.get(core, -1),
642+
):
643+
yield
644+
645645
async def test_single_node_allocation_sets_cpuset_mems(
646646
self,
647647
cpu_plugin: CPUPlugin,
648648
) -> None:
649649
"""When all allocated cores are on the same NUMA node, CpusetMems is pinned
650650
to that node as a string."""
651-
devices = [
652-
self._make_device(0, 0),
653-
self._make_device(1, 0),
654-
self._make_device(2, 1),
655-
self._make_device(3, 1),
656-
]
657-
with patch.object(CPUPlugin, "list_devices", AsyncMock(return_value=devices)):
651+
with self._patch_node_of_cpu({0: 0, 1: 0, 2: 1, 3: 1}):
658652
result = await cpu_plugin.generate_docker_args(
659653
AsyncMock(),
660654
self._device_alloc([0, 1]),
@@ -670,15 +664,9 @@ async def test_multi_node_allocation_omits_cpuset_mems(
670664
self,
671665
cpu_plugin: CPUPlugin,
672666
) -> None:
673-
"""When cores span multiple NUMA nodes, CpusetMems must be omitted
674-
because Docker's HostConfig cannot express a multi-node cpuset.mems."""
675-
devices = [
676-
self._make_device(0, 0),
677-
self._make_device(1, 0),
678-
self._make_device(2, 1),
679-
self._make_device(3, 1),
680-
]
681-
with patch.object(CPUPlugin, "list_devices", AsyncMock(return_value=devices)):
667+
"""When cores span multiple NUMA nodes, CpusetMems must be omitted so that
668+
the Docker/kernel default NUMA memory placement policy can apply."""
669+
with self._patch_node_of_cpu({0: 0, 1: 0, 2: 1, 3: 1}):
682670
result = await cpu_plugin.generate_docker_args(
683671
AsyncMock(),
684672
self._device_alloc([0, 2]),
@@ -691,49 +679,34 @@ async def test_multi_node_allocation_omits_cpuset_mems(
691679
assert host_config["CpusetCpus"] == "0,2"
692680

693681
@pytest.mark.parametrize(
694-
("missing_core_numa", "case_id"),
682+
("core_to_node", "case_id"),
695683
[
696-
(None, "unknown_node"),
697-
(-1, "negative_node"),
684+
({0: 0}, "unknown_node"),
685+
({0: 0, 1: -1}, "negative_node"),
698686
],
699687
)
700688
async def test_unknown_or_negative_node_omits_cpuset_mems(
701689
self,
702690
cpu_plugin: CPUPlugin,
703-
missing_core_numa: int | None,
691+
core_to_node: dict[int, int],
704692
case_id: str,
705693
) -> None:
706-
"""When any allocated core maps to an unknown (None) or negative NUMA node,
707-
CpusetMems must be omitted.
694+
"""When any allocated core maps to an unknown (libnuma returns -1) or
695+
explicitly negative NUMA node, CpusetMems must be omitted.
708696
709-
For the `unknown_node` case, we simulate a core missing from the device list
710-
(so `core_to_node.get(core)` returns None). For the `negative_node` case, we
711-
include a device with numa_node = -1.
697+
`unknown_node` covers the case where libnuma cannot resolve a core (the
698+
patched side_effect returns -1 for unmapped cores). `negative_node` covers
699+
the case where libnuma reports -1 for a known core (NUMA info unavailable).
700+
Both collapse to the same `node < 0` branch in the SUT.
712701
"""
713-
if missing_core_numa is None:
714-
# Core 5 is allocated but not present in the device list.
715-
devices = [
716-
self._make_device(0, 0),
717-
self._make_device(1, 0),
718-
]
719-
allocated_cores = [0, 5]
720-
expected_cpuset_cpus = "0,5"
721-
else:
722-
devices = [
723-
self._make_device(0, 0),
724-
self._make_device(1, missing_core_numa),
725-
]
726-
allocated_cores = [0, 1]
727-
expected_cpuset_cpus = "0,1"
728-
729-
with patch.object(CPUPlugin, "list_devices", AsyncMock(return_value=devices)):
702+
with self._patch_node_of_cpu(core_to_node):
730703
result = await cpu_plugin.generate_docker_args(
731704
AsyncMock(),
732-
self._device_alloc(allocated_cores),
705+
self._device_alloc([0, 1]),
733706
)
734707

735708
host_config = result["HostConfig"]
736709
assert "CpusetMems" not in host_config, f"case={case_id}"
737710
# Sanity: core-list plumbing still works.
738-
assert host_config["Cpus"] == len(allocated_cores)
739-
assert host_config["CpusetCpus"] == expected_cpuset_cpus
711+
assert host_config["Cpus"] == 2
712+
assert host_config["CpusetCpus"] == "0,1"

0 commit comments

Comments
 (0)