Skip to content

Commit 30d9113

Browse files
andrasbacsaiclaude
andcommitted
test(e2e): live-server harness for broker routing + builder lifecycle
Adds a Rust integration-test crate `e2e-tests` that drives Redis via ssh + redis-cli on the central host and asserts on-box state via `buildah images` and `systemctl is-active`. No broker/coold code is linked — tests exercise the black-box contract. Six scenarios, all `#[ignore]`: pin_to_builder_host - image lands on pinned host only pin_to_coold_only_host_returns_503 - caps=[coold] → 503 unknown_host_id_returns_503 - host not connected → 503 load_balance_picks_builder_host - host_id empty → broker picks builder-capable host build_cancel_emits_stage_cancel - cancel via build:cmd → code=499 stage=cancel, unit gone coold_restart_adopts_in_flight_build - systemctl restart coold mid- build; unit survives; new coold adopts; cancel routes; workdir cleaned Excluded from default `cargo build --workspace` via `default-members` so nightly stays fast. Run with: cargo test -p e2e-tests -- --ignored --test-threads=1 `--test-threads=1` is mandatory: concurrent dispatches overwhelm builder_capacity and race on shared host state. Live run: 6/6 PASS in ~28s against two throw-away VMs. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 8ac89a1 commit 30d9113

5 files changed

Lines changed: 435 additions & 1 deletion

File tree

Cargo.lock

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
[workspace]
2-
members = ["coold", "broker", "builder", "builder-core", "proto"]
2+
members = ["coold", "broker", "builder", "builder-core", "e2e-tests", "proto"]
3+
# e2e-tests is a live-server test harness; exclude it from default builds
4+
# so nightly + local `cargo build --workspace` stay fast. Run explicitly
5+
# with `cargo test -p e2e-tests -- --ignored --test-threads=1`.
6+
default-members = ["coold", "broker", "builder", "builder-core", "proto"]
37
resolver = "2"
48

59
[workspace.package]

e2e-tests/Cargo.toml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
[package]
2+
name = "e2e-tests"
3+
version = "0.0.0"
4+
publish = false
5+
edition.workspace = true
6+
rust-version.workspace = true
7+
description = "Live-server black-box tests for coold/broker/builder. Disabled by default (#[ignore]); run with `cargo test -p e2e-tests -- --ignored --nocapture`."
8+
license.workspace = true
9+
10+
[dependencies]
11+
serde = { workspace = true }
12+
serde_json = { workspace = true }
13+
14+
[[test]]
15+
name = "builder"
16+
path = "tests/builder.rs"
17+
18+
[lib]
19+
path = "src/lib.rs"

e2e-tests/src/lib.rs

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
//! Live-server test harness for the coold/broker/builder stack.
2+
//!
3+
//! Tests are written as Rust integration tests under `tests/`, marked
4+
//! `#[ignore]` so default `cargo test` skips them. Run with:
5+
//!
6+
//! ```text
7+
//! BUILDER_HOST=<host-a> \
8+
//! COOLD_ONLY_HOST=<host-b> \
9+
//! BUILDER_MGMT=<wg0-ip-of-host-a> \
10+
//! COOLD_ONLY_MGMT=<wg0-ip-of-host-b> \
11+
//! CENTRAL_HOST=<central-host> \
12+
//! SSH_KEY=~/.ssh/<key> \
13+
//! cargo test -p e2e-tests -- --ignored --test-threads=1
14+
//! ```
15+
//!
16+
//! `--test-threads=1` is mandatory: the tests dispatch real builds against
17+
//! a shared cluster, and running them in parallel overwhelms the
18+
//! `COOLD_BUILDER_CAPACITY` semaphore and races on `buildah images` state
19+
//! shared across hosts.
20+
//!
21+
//! The harness drives Redis via `ssh + redis-cli` on the central host and
22+
//! asserts remote state via `buildah images` and `systemctl is-active`.
23+
//! No broker/coold code is linked — tests exercise the black-box contract.
24+
25+
use std::process::Command;
26+
use std::thread;
27+
use std::time::{Duration, Instant};
28+
29+
use serde::Deserialize;
30+
31+
pub struct Env {
32+
pub builder_host: String,
33+
pub cool_only_host: String,
34+
pub builder_mgmt: String,
35+
pub cool_only_mgmt: String,
36+
pub central_host: String,
37+
pub ssh_key: String,
38+
pub ssh_user: String,
39+
}
40+
41+
impl Env {
42+
pub fn from_env() -> Self {
43+
Self {
44+
builder_host: must("BUILDER_HOST"),
45+
cool_only_host: must("COOLD_ONLY_HOST"),
46+
builder_mgmt: must("BUILDER_MGMT"),
47+
cool_only_mgmt: must("COOLD_ONLY_MGMT"),
48+
central_host: must("CENTRAL_HOST"),
49+
ssh_key: must("SSH_KEY"),
50+
ssh_user: std::env::var("SSH_USER").unwrap_or_else(|_| "root".into()),
51+
}
52+
}
53+
54+
pub fn ssh(&self, host: &str, cmd: &str) -> Result<String, String> {
55+
let out = Command::new("ssh")
56+
.args([
57+
"-i",
58+
&self.ssh_key,
59+
"-o",
60+
"StrictHostKeyChecking=accept-new",
61+
"-o",
62+
"BatchMode=yes",
63+
"-o",
64+
"ConnectTimeout=10",
65+
&format!("{}@{}", self.ssh_user, host),
66+
cmd,
67+
])
68+
.output()
69+
.map_err(|e| format!("spawn ssh: {e}"))?;
70+
if !out.status.success() {
71+
return Err(format!(
72+
"ssh {host}: exit {:?}: {}",
73+
out.status.code(),
74+
String::from_utf8_lossy(&out.stderr).trim()
75+
));
76+
}
77+
Ok(String::from_utf8_lossy(&out.stdout).into_owned())
78+
}
79+
80+
pub fn redis_xadd(&self, payload: &str) -> Result<(), String> {
81+
// single-quote the payload in the remote shell; payload is JSON so
82+
// it already uses double quotes and will survive the single-quote
83+
// wrapping.
84+
self.ssh(
85+
&self.central_host,
86+
&format!("redis-cli XADD build:cmd '*' payload '{}'", payload),
87+
)
88+
.map(|_| ())
89+
}
90+
91+
pub fn redis_lpop(&self, request_id: &str) -> Result<Option<String>, String> {
92+
let out = self.ssh(
93+
&self.central_host,
94+
&format!("redis-cli LPOP build:resp:{request_id}"),
95+
)?;
96+
let trimmed = out.trim();
97+
if trimmed.is_empty() || trimmed == "(nil)" {
98+
Ok(None)
99+
} else {
100+
Ok(Some(trimmed.to_owned()))
101+
}
102+
}
103+
104+
pub fn wait_build_resp(&self, request_id: &str, timeout: Duration) -> BuildResponse {
105+
let deadline = Instant::now() + timeout;
106+
while Instant::now() < deadline {
107+
match self.redis_lpop(request_id) {
108+
Ok(Some(line)) => {
109+
return serde_json::from_str(&line)
110+
.unwrap_or_else(|e| panic!("parse response {line:?}: {e}"))
111+
}
112+
Ok(None) => thread::sleep(Duration::from_secs(2)),
113+
Err(e) => panic!("LPOP build:resp:{request_id}: {e}"),
114+
}
115+
}
116+
panic!("no build:resp:{request_id} within {timeout:?}");
117+
}
118+
119+
pub fn has_image(&self, host: &str, tag: &str) -> bool {
120+
let cmd = format!("buildah images 2>/dev/null | grep -q '{tag}' && echo Y || echo N");
121+
self.ssh(host, &cmd).map(|s| s.contains('Y')).unwrap_or(false)
122+
}
123+
124+
pub fn unit_active(&self, host: &str, request_id: &str) -> bool {
125+
let cmd = format!("systemctl is-active coolify-build-{request_id}.service 2>&1");
126+
self.ssh(host, &cmd)
127+
.map(|s| s.trim() == "active")
128+
.unwrap_or(false)
129+
}
130+
131+
pub fn restart_coold(&self, host: &str) -> Result<(), String> {
132+
self.ssh(host, "systemctl restart coold").map(|_| ())
133+
}
134+
135+
pub fn clean_image(&self, host: &str, tag: &str) {
136+
let _ = self.ssh(
137+
host,
138+
&format!("buildah rmi -f localhost/{tag} 2>/dev/null || true"),
139+
);
140+
}
141+
142+
pub fn work_dir_exists(&self, host: &str, request_id: &str) -> bool {
143+
let cmd = format!(
144+
"test -d /var/lib/coolify-builder/work/{request_id} && echo Y || echo N"
145+
);
146+
self.ssh(host, &cmd).map(|s| s.contains('Y')).unwrap_or(false)
147+
}
148+
}
149+
150+
fn must(key: &str) -> String {
151+
std::env::var(key).unwrap_or_else(|_| panic!("env {key} required"))
152+
}
153+
154+
/// Lowercase request_id suitable for use as an OCI image tag (OCI rejects
155+
/// uppercase in repository names).
156+
pub fn uniq_req_id(prefix: &str) -> String {
157+
let nanos = std::time::SystemTime::now()
158+
.duration_since(std::time::UNIX_EPOCH)
159+
.unwrap()
160+
.as_nanos();
161+
format!("{}-{nanos}", prefix.to_lowercase())
162+
}
163+
164+
pub fn build_envelope(
165+
request_id: &str,
166+
host_id: &str,
167+
repo_url: &str,
168+
git_ref: &str,
169+
target: &str,
170+
output_dir: &str,
171+
) -> String {
172+
let mut obj = serde_json::json!({
173+
"request_id": request_id,
174+
"command": {
175+
"type": "static_build",
176+
"repo_url": repo_url,
177+
"git_ref": git_ref,
178+
"target_image": target,
179+
"output_dir": output_dir,
180+
}
181+
});
182+
if !host_id.is_empty() {
183+
obj["host_id"] = serde_json::Value::String(host_id.to_owned());
184+
}
185+
obj.to_string()
186+
}
187+
188+
pub fn cancel_envelope(request_id: &str) -> String {
189+
serde_json::json!({
190+
"request_id": request_id,
191+
"command": { "type": "cancel" },
192+
})
193+
.to_string()
194+
}
195+
196+
#[derive(Debug, Deserialize)]
197+
pub struct BuildResponse {
198+
#[serde(default)]
199+
pub request_id: String,
200+
pub status: String,
201+
#[serde(default)]
202+
pub digest: String,
203+
#[serde(default)]
204+
pub registry_ref: String,
205+
#[serde(default)]
206+
pub duration_ms: u64,
207+
#[serde(default)]
208+
pub code: u32,
209+
#[serde(default)]
210+
pub message: String,
211+
#[serde(default)]
212+
pub stage: String,
213+
}
214+
215+
pub fn wait_until<F: FnMut() -> bool>(mut cond: F, timeout: Duration) -> bool {
216+
let deadline = Instant::now() + timeout;
217+
while Instant::now() < deadline {
218+
if cond() {
219+
return true;
220+
}
221+
thread::sleep(Duration::from_secs(1));
222+
}
223+
false
224+
}

0 commit comments

Comments
 (0)