|
| 1 | +# Copied from: https://github.com/joblib/loky/blob/master/loky/backend/context.py |
| 2 | + |
| 3 | +############################################################################### |
| 4 | +# Basic context management with LokyContext |
| 5 | +# |
| 6 | +# author: Thomas Moreau and Olivier Grisel |
| 7 | +# |
| 8 | +# adapted from multiprocessing/context.py |
| 9 | +# * Create a context ensuring loky uses only objects that are compatible |
| 10 | +# * Add LokyContext to the list of context of multiprocessing so loky can be |
| 11 | +# used with multiprocessing.set_start_method |
| 12 | +# * Implement a CFS-aware amd physical-core aware cpu_count function. |
| 13 | +# |
| 14 | +import math |
| 15 | +import os |
| 16 | +import subprocess |
| 17 | +import sys |
| 18 | +import traceback |
| 19 | +import warnings |
| 20 | +from concurrent.futures.process import _MAX_WINDOWS_WORKERS |
| 21 | + |
| 22 | +# Cache for the number of physical cores to avoid repeating subprocess calls. |
| 23 | +# It should not change during the lifetime of the program. |
| 24 | +physical_cores_cache = None |
| 25 | + |
| 26 | + |
| 27 | +def cpu_count(only_physical_cores=False): |
| 28 | + """Return the number of CPUs the current process can use. |
| 29 | +
|
| 30 | + The returned number of CPUs accounts for: |
| 31 | + * the number of CPUs in the system, as given by |
| 32 | + ``multiprocessing.cpu_count``; |
| 33 | + * the CPU affinity settings of the current process |
| 34 | + (available on some Unix systems); |
| 35 | + * Cgroup CPU bandwidth limit (available on Linux only, typically |
| 36 | + set by docker and similar container orchestration systems); |
| 37 | + * the value of the LOKY_MAX_CPU_COUNT environment variable if defined. |
| 38 | + and is given as the minimum of these constraints. |
| 39 | +
|
| 40 | + If ``only_physical_cores`` is True, return the number of physical cores |
| 41 | + instead of the number of logical cores (hyperthreading / SMT). Note that |
| 42 | + this option is not enforced if the number of usable cores is controlled in |
| 43 | + any other way such as: process affinity, Cgroup restricted CPU bandwidth |
| 44 | + or the LOKY_MAX_CPU_COUNT environment variable. If the number of physical |
| 45 | + cores is not found, return the number of logical cores. |
| 46 | +
|
| 47 | + Note that on Windows, the returned number of CPUs cannot exceed 61 (or 60 for |
| 48 | + Python < 3.10), see: |
| 49 | + https://bugs.python.org/issue26903. |
| 50 | +
|
| 51 | + It is also always larger or equal to 1. |
| 52 | + """ |
| 53 | + # Note: os.cpu_count() is allowed to return None in its docstring |
| 54 | + os_cpu_count = os.cpu_count() or 1 |
| 55 | + if sys.platform == "win32": |
| 56 | + # On Windows, attempting to use more than 61 CPUs would result in a |
| 57 | + # OS-level error. See https://bugs.python.org/issue26903. According to |
| 58 | + # https://learn.microsoft.com/en-us/windows/win32/procthread/processor-groups |
| 59 | + # it might be possible to go beyond with a lot of extra work but this |
| 60 | + # does not look easy. |
| 61 | + os_cpu_count = min(os_cpu_count, _MAX_WINDOWS_WORKERS) |
| 62 | + |
| 63 | + cpu_count_user = _cpu_count_user(os_cpu_count) |
| 64 | + aggregate_cpu_count = max(min(os_cpu_count, cpu_count_user), 1) |
| 65 | + |
| 66 | + if not only_physical_cores: |
| 67 | + return aggregate_cpu_count |
| 68 | + |
| 69 | + if cpu_count_user < os_cpu_count: |
| 70 | + # Respect user setting |
| 71 | + return max(cpu_count_user, 1) |
| 72 | + |
| 73 | + cpu_count_physical, exception = _count_physical_cores() |
| 74 | + if cpu_count_physical != "not found": |
| 75 | + return cpu_count_physical |
| 76 | + |
| 77 | + # Fallback to default behavior |
| 78 | + if exception is not None: |
| 79 | + # warns only the first time |
| 80 | + warnings.warn( |
| 81 | + "Could not find the number of physical cores for the " |
| 82 | + f"following reason:\n{exception}\n" |
| 83 | + "Returning the number of logical cores instead. You can " |
| 84 | + "silence this warning by setting LOKY_MAX_CPU_COUNT to " |
| 85 | + "the number of cores you want to use.", |
| 86 | + stacklevel=2, |
| 87 | + ) |
| 88 | + traceback.print_tb(exception.__traceback__) |
| 89 | + |
| 90 | + return aggregate_cpu_count |
| 91 | + |
| 92 | + |
| 93 | +def _cpu_count_cgroup(os_cpu_count): |
| 94 | + # Cgroup CPU bandwidth limit available in Linux since 2.6 kernel |
| 95 | + cpu_max_fname = "/sys/fs/cgroup/cpu.max" |
| 96 | + cfs_quota_fname = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us" |
| 97 | + cfs_period_fname = "/sys/fs/cgroup/cpu/cpu.cfs_period_us" |
| 98 | + if os.path.exists(cpu_max_fname): |
| 99 | + # cgroup v2 |
| 100 | + # https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html |
| 101 | + with open(cpu_max_fname) as fh: |
| 102 | + cpu_quota_us, cpu_period_us = fh.read().strip().split() |
| 103 | + elif os.path.exists(cfs_quota_fname) and os.path.exists(cfs_period_fname): |
| 104 | + # cgroup v1 |
| 105 | + # https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html#management |
| 106 | + with open(cfs_quota_fname) as fh: |
| 107 | + cpu_quota_us = fh.read().strip() |
| 108 | + with open(cfs_period_fname) as fh: |
| 109 | + cpu_period_us = fh.read().strip() |
| 110 | + else: |
| 111 | + # No Cgroup CPU bandwidth limit (e.g. non-Linux platform) |
| 112 | + cpu_quota_us = "max" |
| 113 | + cpu_period_us = 100_000 # unused, for consistency with default values |
| 114 | + |
| 115 | + if cpu_quota_us == "max": |
| 116 | + # No active Cgroup quota on a Cgroup-capable platform |
| 117 | + return os_cpu_count |
| 118 | + else: |
| 119 | + cpu_quota_us = int(cpu_quota_us) |
| 120 | + cpu_period_us = int(cpu_period_us) |
| 121 | + if cpu_quota_us > 0 and cpu_period_us > 0: |
| 122 | + return math.ceil(cpu_quota_us / cpu_period_us) |
| 123 | + else: # pragma: no cover |
| 124 | + # Setting a negative cpu_quota_us value is a valid way to disable |
| 125 | + # cgroup CPU bandwidth limits |
| 126 | + return os_cpu_count |
| 127 | + |
| 128 | + |
| 129 | +def _cpu_count_affinity(os_cpu_count): |
| 130 | + # Number of available CPUs given affinity settings |
| 131 | + if hasattr(os, "sched_getaffinity"): |
| 132 | + try: |
| 133 | + return len(os.sched_getaffinity(0)) |
| 134 | + except NotImplementedError: |
| 135 | + pass |
| 136 | + |
| 137 | + # This can happen for platforms that do not implement any kind of CPU |
| 138 | + # infinity such as macOS-based platforms. |
| 139 | + return os_cpu_count |
| 140 | + |
| 141 | + |
| 142 | +def _cpu_count_user(os_cpu_count): |
| 143 | + """Number of user defined available CPUs""" |
| 144 | + cpu_count_affinity = _cpu_count_affinity(os_cpu_count) |
| 145 | + |
| 146 | + cpu_count_cgroup = _cpu_count_cgroup(os_cpu_count) |
| 147 | + |
| 148 | + # User defined soft-limit passed as a loky specific environment variable. |
| 149 | + cpu_count_loky = int(os.environ.get("LOKY_MAX_CPU_COUNT", os_cpu_count)) |
| 150 | + |
| 151 | + return min(cpu_count_affinity, cpu_count_cgroup, cpu_count_loky) |
| 152 | + |
| 153 | + |
| 154 | +def _count_physical_cores(): |
| 155 | + """Return a tuple (number of physical cores, exception) |
| 156 | +
|
| 157 | + If the number of physical cores is found, exception is set to None. |
| 158 | + If it has not been found, return ("not found", exception). |
| 159 | +
|
| 160 | + The number of physical cores is cached to avoid repeating subprocess calls. |
| 161 | + """ |
| 162 | + exception = None |
| 163 | + |
| 164 | + # First check if the value is cached |
| 165 | + global physical_cores_cache |
| 166 | + if physical_cores_cache is not None: |
| 167 | + return physical_cores_cache, exception |
| 168 | + |
| 169 | + # Not cached yet, find it |
| 170 | + try: |
| 171 | + if sys.platform == "linux": |
| 172 | + cpu_info = subprocess.run( |
| 173 | + "lscpu --parse=core".split(), capture_output=True, text=True |
| 174 | + ) |
| 175 | + cpu_info = cpu_info.stdout.splitlines() |
| 176 | + cpu_info = {line for line in cpu_info if not line.startswith("#")} |
| 177 | + cpu_count_physical = len(cpu_info) |
| 178 | + elif sys.platform == "win32": |
| 179 | + cpu_info = subprocess.run( |
| 180 | + "wmic CPU Get NumberOfCores /Format:csv".split(), |
| 181 | + capture_output=True, |
| 182 | + text=True, |
| 183 | + ) |
| 184 | + cpu_info = cpu_info.stdout.splitlines() |
| 185 | + cpu_info = [ |
| 186 | + l.split(",")[1] for l in cpu_info if (l and l != "Node,NumberOfCores") |
| 187 | + ] |
| 188 | + cpu_count_physical = sum(map(int, cpu_info)) |
| 189 | + elif sys.platform == "darwin": |
| 190 | + cpu_info = subprocess.run( |
| 191 | + "sysctl -n hw.physicalcpu".split(), |
| 192 | + capture_output=True, |
| 193 | + text=True, |
| 194 | + ) |
| 195 | + cpu_info = cpu_info.stdout |
| 196 | + cpu_count_physical = int(cpu_info) |
| 197 | + else: |
| 198 | + raise NotImplementedError(f"unsupported platform: {sys.platform}") |
| 199 | + |
| 200 | + # if cpu_count_physical < 1, we did not find a valid value |
| 201 | + if cpu_count_physical < 1: |
| 202 | + raise ValueError(f"found {cpu_count_physical} physical cores < 1") |
| 203 | + |
| 204 | + except Exception as e: |
| 205 | + exception = e |
| 206 | + cpu_count_physical = "not found" |
| 207 | + |
| 208 | + # Put the result in cache |
| 209 | + physical_cores_cache = cpu_count_physical |
| 210 | + |
| 211 | + return cpu_count_physical, exception |
0 commit comments