2121from ai .backend .common .types import DeviceId , SlotName
2222
2323
24+ @contextmanager
25+ def _patched_libnuma (
26+ core_to_node : dict [int , int ],
27+ num_nodes : int ,
28+ ) -> Generator [None , None , None ]:
29+ """Patch ``libnuma.num_nodes`` and ``libnuma.node_of_cpu`` used by
30+ ``CPUPlugin._resolve_node_local_mem``. ``node_of_cpu`` returns ``-1`` for
31+ cores missing from the map, matching real libnuma's behavior when NUMA
32+ info is unavailable.
33+ """
34+ with (
35+ patch (
36+ "ai.backend.agent.docker.intrinsic.libnuma.num_nodes" ,
37+ return_value = num_nodes ,
38+ ),
39+ patch (
40+ "ai.backend.agent.docker.intrinsic.libnuma.node_of_cpu" ,
41+ side_effect = lambda core : core_to_node .get (core , - 1 ),
42+ ),
43+ ):
44+ yield
45+
46+
2447class BaseDockerIntrinsicTest :
2548 """Shared fixtures for Docker intrinsic plugin tests."""
2649
@@ -614,8 +637,74 @@ def test_raises_oserror_for_nonexistent_pid(self) -> None:
614637 read_proc_net_dev (999999999 )
615638
616639
640+ @dataclass (frozen = True )
641+ class _NumaScenario :
642+ num_nodes : int
643+ core_to_node : dict [int , int ]
644+ cores : list [int ]
645+ expected_cpuset_mems : str | None
646+
647+
648+ _NUMA_SCENARIOS : dict [str , _NumaScenario ] = {
649+ # All allocated cores on node 0 → pin memory to "0".
650+ "node_local_allocation" : _NumaScenario (
651+ num_nodes = 2 ,
652+ core_to_node = {0 : 0 , 1 : 0 , 2 : 1 , 3 : 1 },
653+ cores = [0 , 1 ],
654+ expected_cpuset_mems = "0" ,
655+ ),
656+ # Cores span nodes 0 and 1 → let the kernel default policy apply.
657+ "multi_node_allocation" : _NumaScenario (
658+ num_nodes = 2 ,
659+ core_to_node = {0 : 0 , 1 : 0 , 2 : 1 , 3 : 1 },
660+ cores = [0 , 2 ],
661+ expected_cpuset_mems = None ,
662+ ),
663+ # libnuma can't resolve core 1 → side_effect returns -1 for unmapped cores.
664+ "unknown_core" : _NumaScenario (
665+ num_nodes = 2 ,
666+ core_to_node = {0 : 0 },
667+ cores = [0 , 1 ],
668+ expected_cpuset_mems = None ,
669+ ),
670+ # libnuma reports -1 for a known core (NUMA info unavailable).
671+ "negative_node_id" : _NumaScenario (
672+ num_nodes = 2 ,
673+ core_to_node = {0 : 0 , 1 : - 1 },
674+ cores = [0 , 1 ],
675+ expected_cpuset_mems = None ,
676+ ),
677+ # Non-NUMA host (macOS, Docker Desktop, WSL, Linux w/o libnuma.so):
678+ # num_nodes==1 must short-circuit before inspecting per-core nodes so
679+ # containers are not unconditionally pinned to CpusetMems="0".
680+ "non_numa_host" : _NumaScenario (
681+ num_nodes = 1 ,
682+ core_to_node = {0 : 0 , 1 : 0 },
683+ cores = [0 , 1 ],
684+ expected_cpuset_mems = None ,
685+ ),
686+ }
687+
688+
689+ @pytest .fixture (params = list (_NUMA_SCENARIOS ), ids = list (_NUMA_SCENARIOS ))
690+ def numa_scenario (request : pytest .FixtureRequest ) -> _NumaScenario :
691+ return _NUMA_SCENARIOS [request .param ]
692+
693+
694+ class TestCPUPluginResolveNodeLocalMem :
695+ """Unit tests for ``CPUPlugin._resolve_node_local_mem`` NUMA-locality logic."""
696+
697+ def test_returns_expected_cpuset_mems (self , numa_scenario : _NumaScenario ) -> None :
698+ with _patched_libnuma (numa_scenario .core_to_node , numa_scenario .num_nodes ):
699+ result = CPUPlugin ._resolve_node_local_mem (numa_scenario .cores )
700+
701+ assert result == numa_scenario .expected_cpuset_mems
702+
703+
617704class TestCPUPluginGenerateDockerArgsNumaLocality :
618- """Tests for CPUPlugin.generate_docker_args() NUMA-locality CpusetMems logic."""
705+ """Integration tests for ``CPUPlugin.generate_docker_args`` covering the
706+ end-to-end wiring of ``_resolve_node_local_mem`` into ``HostConfig``.
707+ """
619708
620709 @pytest .fixture
621710 def cpu_plugin (self ) -> CPUPlugin :
@@ -627,122 +716,21 @@ def _device_alloc(core_ids: list[int]) -> dict[SlotName, dict[DeviceId, Decimal]
627716 SlotName ("cpu" ): {DeviceId (str (cid )): Decimal ("1" ) for cid in core_ids },
628717 }
629718
630- @staticmethod
631- @contextmanager
632- def _patch_node_of_cpu (
633- core_to_node : dict [int , int ],
634- * ,
635- num_nodes : int = 2 ,
636- ) -> Generator [None , None , None ]:
637- """Patch libnuma.node_of_cpu; return -1 for any core missing from the map
638- (matches real libnuma's behavior for unknown cores when NUMA info is
639- unavailable).
640-
641- Also patches libnuma.num_nodes to report a multi-node host by default
642- so the NUMA-aware branch is exercised. Tests covering the non-NUMA
643- short-circuit can pass ``num_nodes=1``.
644- """
645- with (
646- patch (
647- "ai.backend.agent.docker.intrinsic.libnuma.num_nodes" ,
648- return_value = num_nodes ,
649- ),
650- patch (
651- "ai.backend.agent.docker.intrinsic.libnuma.node_of_cpu" ,
652- side_effect = lambda core : core_to_node .get (core , - 1 ),
653- ),
654- ):
655- yield
656-
657- async def test_single_node_allocation_sets_cpuset_mems (
658- self ,
659- cpu_plugin : CPUPlugin ,
660- ) -> None :
661- """When all allocated cores are on the same NUMA node, CpusetMems is pinned
662- to that node as a string."""
663- with self ._patch_node_of_cpu ({0 : 0 , 1 : 0 , 2 : 1 , 3 : 1 }):
664- result = await cpu_plugin .generate_docker_args (
665- AsyncMock (),
666- self ._device_alloc ([0 , 1 ]),
667- )
668-
669- host_config = result ["HostConfig" ]
670- assert host_config ["CpusetMems" ] == "0"
671- # Sanity: core-list plumbing still works.
672- assert host_config ["Cpus" ] == 2
673- assert host_config ["CpusetCpus" ] == "0,1"
674-
675- async def test_multi_node_allocation_omits_cpuset_mems (
676- self ,
677- cpu_plugin : CPUPlugin ,
678- ) -> None :
679- """When cores span multiple NUMA nodes, CpusetMems must be omitted so that
680- the Docker/kernel default NUMA memory placement policy can apply."""
681- with self ._patch_node_of_cpu ({0 : 0 , 1 : 0 , 2 : 1 , 3 : 1 }):
682- result = await cpu_plugin .generate_docker_args (
683- AsyncMock (),
684- self ._device_alloc ([0 , 2 ]),
685- )
686-
687- host_config = result ["HostConfig" ]
688- assert "CpusetMems" not in host_config
689- # Sanity: core-list plumbing still works.
690- assert host_config ["Cpus" ] == 2
691- assert host_config ["CpusetCpus" ] == "0,2"
692-
693- @pytest .mark .parametrize (
694- "core_to_node" ,
695- [
696- {0 : 0 },
697- {0 : 0 , 1 : - 1 },
698- ],
699- ids = ["unknown_node" , "negative_node" ],
700- )
701- async def test_unknown_or_negative_node_omits_cpuset_mems (
702- self ,
703- cpu_plugin : CPUPlugin ,
704- core_to_node : dict [int , int ],
705- ) -> None :
706- """When any allocated core maps to an unknown (libnuma returns -1) or
707- explicitly negative NUMA node, CpusetMems must be omitted.
708-
709- `unknown_node` covers the case where libnuma cannot resolve a core (the
710- patched side_effect returns -1 for unmapped cores). `negative_node` covers
711- the case where libnuma reports -1 for a known core (NUMA info unavailable).
712- Both collapse to the same `node < 0` branch in the SUT.
713- """
714- with self ._patch_node_of_cpu (core_to_node ):
715- result = await cpu_plugin .generate_docker_args (
716- AsyncMock (),
717- self ._device_alloc ([0 , 1 ]),
718- )
719-
720- host_config = result ["HostConfig" ]
721- assert "CpusetMems" not in host_config
722- # Sanity: core-list plumbing still works.
723- assert host_config ["Cpus" ] == 2
724- assert host_config ["CpusetCpus" ] == "0,1"
725-
726- async def test_non_numa_host_omits_cpuset_mems (
719+ async def test_host_config_matches_scenario (
727720 self ,
728721 cpu_plugin : CPUPlugin ,
722+ numa_scenario : _NumaScenario ,
729723 ) -> None :
730- """On non-NUMA / non-Linux hosts (macOS, Docker Desktop, WSL, Linux
731- without libnuma.so) libnuma.num_nodes() reports 1 and node_of_cpu()
732- hardcodes 0. The plugin must short-circuit before inspecting per-core
733- nodes so containers are not unconditionally pinned to CpusetMems="0".
734- """
735- # node_of_cpu would return 0 for every core on a non-NUMA host; assert
736- # we never reach that branch by mapping cores to a bogus node that
737- # would otherwise produce a stale CpusetMems assignment.
738- with self ._patch_node_of_cpu ({0 : 0 , 1 : 0 }, num_nodes = 1 ):
724+ with _patched_libnuma (numa_scenario .core_to_node , numa_scenario .num_nodes ):
739725 result = await cpu_plugin .generate_docker_args (
740726 AsyncMock (),
741- self ._device_alloc ([ 0 , 1 ] ),
727+ self ._device_alloc (numa_scenario . cores ),
742728 )
743729
744730 host_config = result ["HostConfig" ]
745- assert "CpusetMems" not in host_config
746- # Sanity: core-list plumbing still works.
747- assert host_config ["Cpus" ] == 2
748- assert host_config ["CpusetCpus" ] == "0,1"
731+ assert host_config ["Cpus" ] == len (numa_scenario .cores )
732+ assert host_config ["CpusetCpus" ] == "," .join (str (c ) for c in sorted (numa_scenario .cores ))
733+ if numa_scenario .expected_cpuset_mems is None :
734+ assert "CpusetMems" not in host_config
735+ else :
736+ assert host_config ["CpusetMems" ] == numa_scenario .expected_cpuset_mems
0 commit comments