Skip to content

Commit 9bfae94

Browse files
r-barnesmeta-codesync[bot]
authored andcommitted
Set per-process memory limits on getdeps build subprocesses
Summary: getdeps computes build parallelism from available RAM but never enforces the per-job memory budget. If a compiler or linker exceeds its expected usage, the Linux OOM killer can terminate arbitrary processes including the user's terminal. Add RLIMIT_AS enforcement via preexec_fn on build subprocesses. The limit is set to 3x job_weight_mib to account for virtual address space being larger than resident memory. This causes runaway processes to get std::bad_alloc/ENOMEM instead of triggering the OOM killer. The limit is inherited by all child processes (ninja, compiler invocations). Reviewed By: jbower-fb, bigfootjon Differential Revision: D94983478 fbshipit-source-id: 44ac784351f55e1b1a67ef43c8beb1ddf805d596
1 parent 7e16705 commit 9bfae94

File tree

2 files changed

+93
-13
lines changed

2 files changed

+93
-13
lines changed

build/fbcode_builder/getdeps/builder.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from .dyndeps import create_dyn_dep_munger
2525
from .envfuncs import add_path_entry, Env, path_search
2626
from .fetcher import copy_if_different, is_public_commit
27-
from .runcmd import run_cmd
27+
from .runcmd import make_memory_limit_preexec_fn, run_cmd
2828

2929
if typing.TYPE_CHECKING:
3030
from .buildopts import BuildOptions
@@ -100,6 +100,7 @@ def _run_cmd(
100100
env=None,
101101
use_cmd_prefix: bool = True,
102102
allow_fail: bool = False,
103+
preexec_fn=None,
103104
) -> int:
104105
if env:
105106
e = self.env.copy()
@@ -120,6 +121,7 @@ def _run_cmd(
120121
cwd=cwd or self.build_dir,
121122
log_file=log_file,
122123
allow_fail=allow_fail,
124+
preexec_fn=preexec_fn,
123125
)
124126

125127
def _reconfigure(self, reconfigure: bool) -> bool:
@@ -229,7 +231,7 @@ def build(self, reconfigure: bool) -> None:
229231
dep_munger.emit_dev_run_script(script_path, dep_dirs)
230232

231233
@property
232-
def num_jobs(self) -> int:
234+
def _job_weight_mib(self) -> int:
233235
# This is a hack, but we don't have a "defaults manifest" that we can
234236
# customize per platform.
235237
# TODO: Introduce some sort of defaults config that can select by
@@ -241,14 +243,25 @@ def num_jobs(self) -> int:
241243
# 1.5 GiB is a lot to assume, but it's typical of Facebook-style C++.
242244
# Some manifests are even heavier and should override.
243245
default_job_weight = 1536
244-
return self.build_opts.get_num_jobs(
245-
int(
246-
self.manifest.get(
247-
"build", "job_weight_mib", default_job_weight, ctx=self.ctx
248-
)
246+
return int(
247+
self.manifest.get(
248+
"build", "job_weight_mib", default_job_weight, ctx=self.ctx
249249
)
250250
)
251251

252+
@property
253+
def num_jobs(self) -> int:
254+
return self.build_opts.get_num_jobs(self._job_weight_mib)
255+
256+
@property
257+
def memory_limit_preexec_fn(self):
258+
"""Return a preexec_fn that caps per-process virtual memory.
259+
260+
Uses the same job_weight_mib that controls parallelism, so the memory
261+
limit is consistent with the parallelism budget.
262+
"""
263+
return make_memory_limit_preexec_fn(self._job_weight_mib)
264+
252265
def run_tests(
253266
self,
254267
schedule_type,
@@ -933,6 +946,7 @@ def _build(self, reconfigure: bool) -> None:
933946
str(self.num_jobs),
934947
],
935948
env=env,
949+
preexec_fn=self.memory_limit_preexec_fn,
936950
)
937951

938952
def _build_targets(self, targets: typing.Sequence[str]) -> None:
@@ -969,7 +983,7 @@ def _build_targets(self, targets: typing.Sequence[str]) -> None:
969983
]
970984
)
971985

972-
self._check_cmd(cmd, env=env)
986+
self._check_cmd(cmd, env=env, preexec_fn=self.memory_limit_preexec_fn)
973987

974988
def _get_missing_test_executables(
975989
self, test_filter: Optional[str], env: Env, ctest: Optional[str]

build/fbcode_builder/getdeps/runcmd.py

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
# pyre-unsafe
77

8+
from __future__ import annotations
9+
810
import os
911
import select
1012
import subprocess
@@ -19,6 +21,53 @@ class RunCommandError(Exception):
1921
pass
2022

2123

24+
def make_memory_limit_preexec_fn(
25+
job_weight_mib: int,
26+
) -> object | None:
27+
"""Create a preexec_fn that sets a per-process virtual memory limit.
28+
29+
When getdeps spawns build commands (cmake -> ninja -> N compiler processes),
30+
the parallelism is computed from available RAM divided by job_weight_mib.
31+
However, there is no enforcement of that budget: if a compiler or linker
32+
process exceeds its expected memory usage, the system can run out of RAM
33+
and the Linux OOM killer may terminate arbitrary processes — including the
34+
user's shell or terminal.
35+
36+
This function returns a callable suitable for subprocess.Popen's preexec_fn
37+
parameter. It runs in each child process after fork() but before exec(),
38+
setting RLIMIT_AS (virtual address space limit) so that a runaway process
39+
gets a failed allocation (std::bad_alloc / ENOMEM) instead of triggering
40+
the OOM killer. The limit is inherited by all descendant processes (ninja,
41+
compiler invocations, etc.).
42+
43+
The per-process limit is set to job_weight_mib * 10. The 10x multiplier
44+
accounts for the fact that RLIMIT_AS caps virtual address space, which is
45+
typically 2-4x larger than resident (physical) memory for C++ compilers
46+
due to memory-mapped files, shared libraries, and address space reservations
47+
that don't consume physical RAM. The multiplier is intentionally generous:
48+
the goal is a safety net that catches genuine runaways before the OOM killer
49+
fires, not a tight per-job budget.
50+
51+
Only applies on Linux, where the OOM killer is the problem. Returns None
52+
on other platforms.
53+
"""
54+
if sys.platform != "linux":
55+
return None
56+
57+
# Each job is budgeted job_weight_mib of physical RAM. Virtual address
58+
# space is typically 2-4x RSS. Use 10x as a generous safety net: tight
59+
# enough to stop a runaway process before the OOM killer fires, but loose
60+
# enough to avoid false positives from normal virtual memory overhead.
61+
limit_bytes = job_weight_mib * 10 * 1024 * 1024
62+
63+
def _set_memory_limit():
64+
import resource
65+
66+
resource.setrlimit(resource.RLIMIT_AS, (limit_bytes, limit_bytes))
67+
68+
return _set_memory_limit
69+
70+
2271
def _print_env_diff(env, log_fn) -> None:
2372
current_keys = set(os.environ.keys())
2473
wanted_env = set(env.keys())
@@ -48,7 +97,9 @@ def check_cmd(cmd, **kwargs) -> None:
4897
raise RuntimeError(f"Failure exit code {rc} for command {cmd}")
4998

5099

51-
def run_cmd(cmd, env=None, cwd=None, allow_fail: bool = False, log_file=None) -> int:
100+
def run_cmd(
101+
cmd, env=None, cwd=None, allow_fail: bool = False, log_file=None, preexec_fn=None
102+
) -> int:
52103
def log_to_stdout(msg):
53104
sys.stdout.buffer.write(msg.encode(errors="surrogateescape"))
54105

@@ -60,15 +111,25 @@ def log_function(msg):
60111
log_to_stdout(msg)
61112

62113
return _run_cmd(
63-
cmd, env=env, cwd=cwd, allow_fail=allow_fail, log_fn=log_function
114+
cmd,
115+
env=env,
116+
cwd=cwd,
117+
allow_fail=allow_fail,
118+
log_fn=log_function,
119+
preexec_fn=preexec_fn,
64120
)
65121
else:
66122
return _run_cmd(
67-
cmd, env=env, cwd=cwd, allow_fail=allow_fail, log_fn=log_to_stdout
123+
cmd,
124+
env=env,
125+
cwd=cwd,
126+
allow_fail=allow_fail,
127+
log_fn=log_to_stdout,
128+
preexec_fn=preexec_fn,
68129
)
69130

70131

71-
def _run_cmd(cmd, env, cwd, allow_fail, log_fn) -> int:
132+
def _run_cmd(cmd, env, cwd, allow_fail, log_fn, preexec_fn=None) -> int:
72133
log_fn("---\n")
73134
try:
74135
cmd_str = " \\\n+ ".join(shellquote(arg) for arg in cmd)
@@ -106,7 +167,12 @@ def _run_cmd(cmd, env, cwd, allow_fail, log_fn) -> int:
106167

107168
try:
108169
p = subprocess.Popen(
109-
cmd, env=env, cwd=cwd, stdout=stdout, stderr=subprocess.STDOUT
170+
cmd,
171+
env=env,
172+
cwd=cwd,
173+
stdout=stdout,
174+
stderr=subprocess.STDOUT,
175+
preexec_fn=preexec_fn,
110176
)
111177
except (TypeError, ValueError, OSError) as exc:
112178
log_fn("error running `%s`: %s" % (cmd_str, exc))

0 commit comments

Comments
 (0)