Skip to content

Commit e4ba8ee

Browse files
rapsealkclaude
andcommitted
test(agent): Cover CpusetMems NUMA-locality branching in CPUPlugin
Adds unit coverage for generate_docker_args() setting HostConfig.CpusetMems only when the CPU allocation is fully within a single NUMA node, and omitting the key for multi-node or unknown/negative NUMA mappings. Refs #11217 Refs #11222 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent b52b7f5 commit e4ba8ee

1 file changed

Lines changed: 127 additions & 0 deletions

File tree

tests/unit/agent/test_docker_intrinsic.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from collections.abc import Generator
55
from contextlib import contextmanager
66
from dataclasses import dataclass
7+
from decimal import Decimal
78
from pathlib import Path
89
from typing import Any
910
from unittest.mock import AsyncMock, MagicMock, patch
@@ -12,11 +13,13 @@
1213

1314
from ai.backend.agent.docker.intrinsic import (
1415
ContainerNetStat,
16+
CPUDevice,
1517
CPUPlugin,
1618
MemoryPlugin,
1719
read_proc_net_dev,
1820
)
1921
from ai.backend.agent.stats import StatModes
22+
from ai.backend.common.types import DeviceId, DeviceName, SlotName
2023

2124

2225
class BaseDockerIntrinsicTest:
@@ -610,3 +613,127 @@ def test_raises_oserror_for_nonexistent_pid(self) -> None:
610613
"""Raises OSError when /proc/[pid]/net/dev does not exist."""
611614
with pytest.raises(OSError):
612615
read_proc_net_dev(999999999)
616+
617+
618+
class TestCPUPluginGenerateDockerArgsNumaLocality:
619+
"""Tests for CPUPlugin.generate_docker_args() NUMA-locality CpusetMems logic."""
620+
621+
@pytest.fixture
622+
def cpu_plugin(self) -> CPUPlugin:
623+
plugin = CPUPlugin.__new__(CPUPlugin)
624+
plugin.local_config = {"agent": {"docker-mode": "default"}}
625+
plugin._docker = AsyncMock()
626+
return plugin
627+
628+
@staticmethod
629+
def _make_device(core_id: int, numa_node: int | None) -> CPUDevice:
630+
return CPUDevice(
631+
device_id=DeviceId(str(core_id)),
632+
hw_location="root",
633+
memory_size=0,
634+
processing_units=1,
635+
numa_node=numa_node,
636+
device_name=DeviceName("cpu"),
637+
)
638+
639+
@staticmethod
640+
def _device_alloc(core_ids: list[int]) -> dict[SlotName, dict[DeviceId, Decimal]]:
641+
return {
642+
SlotName("cpu"): {DeviceId(str(cid)): Decimal("1") for cid in core_ids},
643+
}
644+
645+
async def test_single_node_allocation_sets_cpuset_mems(
646+
self,
647+
cpu_plugin: CPUPlugin,
648+
) -> None:
649+
"""When all allocated cores are on the same NUMA node, CpusetMems is pinned
650+
to that node as a string."""
651+
devices = [
652+
self._make_device(0, 0),
653+
self._make_device(1, 0),
654+
self._make_device(2, 1),
655+
self._make_device(3, 1),
656+
]
657+
with patch.object(CPUPlugin, "list_devices", AsyncMock(return_value=devices)):
658+
result = await cpu_plugin.generate_docker_args(
659+
AsyncMock(),
660+
self._device_alloc([0, 1]),
661+
)
662+
663+
host_config = result["HostConfig"]
664+
assert host_config["CpusetMems"] == "0"
665+
# Sanity: core-list plumbing still works.
666+
assert host_config["Cpus"] == 2
667+
assert host_config["CpusetCpus"] == "0,1"
668+
669+
async def test_multi_node_allocation_omits_cpuset_mems(
670+
self,
671+
cpu_plugin: CPUPlugin,
672+
) -> None:
673+
"""When cores span multiple NUMA nodes, CpusetMems must be omitted
674+
because Docker's HostConfig cannot express a multi-node cpuset.mems."""
675+
devices = [
676+
self._make_device(0, 0),
677+
self._make_device(1, 0),
678+
self._make_device(2, 1),
679+
self._make_device(3, 1),
680+
]
681+
with patch.object(CPUPlugin, "list_devices", AsyncMock(return_value=devices)):
682+
result = await cpu_plugin.generate_docker_args(
683+
AsyncMock(),
684+
self._device_alloc([0, 2]),
685+
)
686+
687+
host_config = result["HostConfig"]
688+
assert "CpusetMems" not in host_config
689+
# Sanity: core-list plumbing still works.
690+
assert host_config["Cpus"] == 2
691+
assert host_config["CpusetCpus"] == "0,2"
692+
693+
@pytest.mark.parametrize(
694+
("missing_core_numa", "case_id"),
695+
[
696+
(None, "unknown_node"),
697+
(-1, "negative_node"),
698+
],
699+
)
700+
async def test_unknown_or_negative_node_omits_cpuset_mems(
701+
self,
702+
cpu_plugin: CPUPlugin,
703+
missing_core_numa: int | None,
704+
case_id: str,
705+
) -> None:
706+
"""When any allocated core maps to an unknown (None) or negative NUMA node,
707+
CpusetMems must be omitted.
708+
709+
For the `unknown_node` case, we simulate a core missing from the device list
710+
(so `core_to_node.get(core)` returns None). For the `negative_node` case, we
711+
include a device with numa_node = -1.
712+
"""
713+
if missing_core_numa is None:
714+
# Core 5 is allocated but not present in the device list.
715+
devices = [
716+
self._make_device(0, 0),
717+
self._make_device(1, 0),
718+
]
719+
allocated_cores = [0, 5]
720+
expected_cpuset_cpus = "0,5"
721+
else:
722+
devices = [
723+
self._make_device(0, 0),
724+
self._make_device(1, missing_core_numa),
725+
]
726+
allocated_cores = [0, 1]
727+
expected_cpuset_cpus = "0,1"
728+
729+
with patch.object(CPUPlugin, "list_devices", AsyncMock(return_value=devices)):
730+
result = await cpu_plugin.generate_docker_args(
731+
AsyncMock(),
732+
self._device_alloc(allocated_cores),
733+
)
734+
735+
host_config = result["HostConfig"]
736+
assert "CpusetMems" not in host_config, f"case={case_id}"
737+
# Sanity: core-list plumbing still works.
738+
assert host_config["Cpus"] == len(allocated_cores)
739+
assert host_config["CpusetCpus"] == expected_cpuset_cpus

0 commit comments

Comments
 (0)