Skip to content

Commit e27b6eb

Browse files
andrasbacsaiclaude
andcommitted
feat(builder): tighten sandbox + reap orphan transient units
Two improvements on top of the live-deploy fixes from earlier today: 1. Revert ProtectSystem=full → strict with an explicit ReadWritePaths allowlist. Under `full`, /etc and /var were writable to a root builder subprocess, which let a buildah/git CVE in a malicious repo persist via /etc/cron.d, sudoers, systemd drop-ins, etc. Under `strict`, the only writable paths are the per-build work dir, /var/lib/containers (buildah image store), /run/containers, /run/netavark, and /run/lock (netavark lock). /tmp and /var/tmp stay ephemeral via PrivateTmp; /home and /root via ProtectHome. Also adds a defense-in-depth set that doesn't require path enumeration: NoNewPrivileges, RestrictSUIDSGID, LockPersonality, RestrictRealtime, RestrictNamespaces, SystemCallArchitectures=native. CapabilityBoundingSet and SystemCallFilter are deferred until the ReadWritePaths set is confirmed stable in the field. 2. Add BuilderCtx::reap_orphan_units() called on coold startup. systemd-run --pipe --wait tears the transient unit down on SIGTERM (graceful coold stop), but SIGKILL / OOM / host crash leaves the unit running under PID 1 until RuntimeMaxSec hits. The sweeper enumerates coolify-build-*.service on startup, stops any that are still active, and resets failed state so subsequent dispatches can reuse names cleanly. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent b6db897 commit e27b6eb

2 files changed

Lines changed: 77 additions & 7 deletions

File tree

coold/src/builder/mod.rs

Lines changed: 76 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,53 @@ impl BuilderCtx {
7272
tokio::fs::create_dir_all(&self.work_root).await
7373
}
7474

75+
/// Stop any `coolify-build-*.service` transient units left behind by a
76+
/// prior coold run. A clean coold shutdown lets `systemd-run --pipe
77+
/// --wait` tear its unit down via SIGTERM propagation, but SIGKILL / OOM
78+
/// / host crash leaves the unit running under PID 1 until RuntimeMaxSec
79+
/// hits. This sweeper reclaims them on startup.
80+
pub async fn reap_orphan_units() {
81+
let out = match Command::new("systemctl")
82+
.args([
83+
"list-units",
84+
"--all",
85+
"--no-legend",
86+
"--plain",
87+
"--type=service",
88+
"coolify-build-*.service",
89+
])
90+
.output()
91+
.await
92+
{
93+
Ok(o) => o,
94+
Err(e) => {
95+
warn!(error = %e, "systemctl list-units failed; skipping orphan sweep");
96+
return;
97+
}
98+
};
99+
let stdout = String::from_utf8_lossy(&out.stdout);
100+
let names: Vec<String> = stdout
101+
.lines()
102+
.filter_map(|l| l.split_whitespace().next().map(str::to_owned))
103+
.filter(|n| n.starts_with("coolify-build-") && n.ends_with(".service"))
104+
.collect();
105+
if names.is_empty() {
106+
return;
107+
}
108+
warn!(count = names.len(), units = ?names,
109+
"reaping orphaned builder units from a prior coold run");
110+
let _ = Command::new("systemctl")
111+
.arg("stop")
112+
.args(&names)
113+
.status()
114+
.await;
115+
let _ = Command::new("systemctl")
116+
.arg("reset-failed")
117+
.args(&names)
118+
.status()
119+
.await;
120+
}
121+
75122
/// Spawn a build. Returns immediately after the subprocess is started;
76123
/// the reader task stays alive in a detached tokio task and sends the
77124
/// final `Response` frame on `tx` when the subprocess exits.
@@ -156,23 +203,45 @@ impl BuilderCtx {
156203
.arg(format!("CPUQuota={}", self.cpu_quota))
157204
.arg("-p")
158205
.arg("PrivateTmp=yes")
159-
// ProtectSystem=full keeps /usr, /boot, /efi read-only but leaves
160-
// /var, /run, /etc writable — buildah needs to touch
161-
// /var/lib/containers/storage and /run/containers (netavark locks,
162-
// image overlay mountpoints), so "strict" breaks the build with
163-
// "open /run/lock/netavark.lock: read-only file system".
206+
// ProtectSystem=strict mounts / read-only except /dev, /proc, /sys
207+
// and the explicit ReadWritePaths below. Full allowlist:
208+
// * `work_dir` — git clone + generated Containerfile
209+
// * /var/lib/containers — buildah image store + overlays
210+
// * /run/containers — buildah/libpod runtime state
211+
// * /run/netavark — network-plugin state
212+
// * /run/lock — netavark.lock
213+
// /tmp + /var/tmp are ephemeral via PrivateTmp; /home and /root
214+
// are hidden via ProtectHome.
164215
.arg("-p")
165-
.arg("ProtectSystem=full")
216+
.arg("ProtectSystem=strict")
166217
.arg("-p")
167218
.arg("ProtectHome=yes")
168219
.arg("-p")
169220
.arg(format!("ReadWritePaths={}", work_dir.display()))
170221
.arg("-p")
171-
.arg("ReadWritePaths=/var/lib/containers/storage")
222+
.arg("ReadWritePaths=/var/lib/containers")
172223
.arg("-p")
173224
.arg("ReadWritePaths=/run/containers")
174225
.arg("-p")
226+
.arg("ReadWritePaths=/run/netavark")
227+
.arg("-p")
175228
.arg("ReadWritePaths=/run/lock")
229+
// Defense-in-depth. Builder runs as root for buildah's benefit,
230+
// so lock down everything not strictly required. CAP_* trim and
231+
// SystemCallFilter are deferred until the ReadWritePaths set is
232+
// stable (easier to iterate on one dimension at a time).
233+
.arg("-p")
234+
.arg("NoNewPrivileges=yes")
235+
.arg("-p")
236+
.arg("RestrictSUIDSGID=yes")
237+
.arg("-p")
238+
.arg("LockPersonality=yes")
239+
.arg("-p")
240+
.arg("RestrictRealtime=yes")
241+
.arg("-p")
242+
.arg("RestrictNamespaces=yes")
243+
.arg("-p")
244+
.arg("SystemCallArchitectures=native")
176245
.arg("--")
177246
.arg(&self.builder_bin)
178247
.arg(&req_path)

coold/src/grpc/client.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ pub async fn run(config: Config, podman: PodmanClient) -> Result<()> {
4949
ctx.ensure_work_root()
5050
.await
5151
.with_context(|| format!("mkdir -p {}", config.builder_work_dir.display()))?;
52+
BuilderCtx::reap_orphan_units().await;
5253
info!(
5354
work_dir = %config.builder_work_dir.display(),
5455
capacity = config.builder_capacity,

0 commit comments

Comments
 (0)