Skip to content

Commit 8f60dce

Browse files
committed
test(e2e): generalize cases for cloud profile + runtime supportedImages probe
Make every case in scripts/test/e2e/cases/ runnable against any profile pointing at any deployed stack (local boxlite serve, Tokyo e2e-ci, Singapore dev), removing the hard-codes that only worked against a local boxlite serve. (1) Profile name. Every per-file _profile() helper used to hardcode "p1"; now reads BOXLITE_E2E_PROFILE env (default "p1" for backwards compat). 8 case files updated (test_c_entry, test_go_entry, test_node_entry, test_error_code_mapping, test_errors, test_quota_enforcement, test_runner_concurrency, test_shutdown). (2) Box id format. UUID_RE only accepted the 36-char UUID form, but the local runtime mints 12-char Base62 and a REST server may return a ULID. Replace with BOX_ID_RE matching all three (UUID / ULID / 12-char Base62). 4 case files updated (test_c_entry, test_cli_entry, test_cli_detach_recovery, test_go_entry, test_node_entry). (3) Image allowlist. The previous design relied on the workflow yml injecting BOXLITE_E2E_IMAGE at the value the deployed API accepted — every supportedImages allowlist update (#758 → digest-pinned; #800 → tag-pinned :20260605-p0-r3) required a workflow update to keep create_box from cascading 38 4xx FAILs. conftest now probes the deployed API at session start (POST /boxes with a sentinel out-of-allowlist image; parse the 400 body's "Supported images: a, b, c" list; pick the first) and pins the discovered value back to os.environ so the C / Go / Node entry-driver subprocesses inherit it. BOXLITE_E2E_IMAGE remains as an override for reproducibility audits. (4) Whoami assertion. test_cli_whoami_against_local_api hard-coded "boxlite-admin" + "http://localhost:3000" — only ever passed against a local boxlite serve. Now reads the active profile's URL from credentials.toml and asserts it appears in whoami output + "Not logged in" doesn't. (5) Path-bypass guard gating. The C / Go / Node / CLI entry smokes all ended with a runner_hits_for_box assert that requires journalctl access to the host running boxlite-runner. On the cloud profiles that runner lives on a remote EC2; the autouse Python fixture already bypasses via BOXLITE_E2E_SKIP_PATH_VERIFY but the subprocess smokes did not. conftest exposes path_verify_skipped() (single truthy reading of the env) and the 4 entry tests + detach_recovery gate their hits-check on it. Box id / driver output assertions still run. (6) Image / drain robustness in test_exec_timeout: drain(ex) is moved behind a short asyncio.wait_for and reordered after ex.wait() — the REST runner's stream pumps don't reliably observe stream closure when the workload terminates via SIGKILL, so the previous shape blocked indefinitely on cloud.
1 parent a75891f commit 8f60dce

12 files changed

Lines changed: 296 additions & 72 deletions

scripts/test/e2e/cases/conftest.py

Lines changed: 134 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,14 @@
1212
from __future__ import annotations
1313

1414
import asyncio
15+
import json
1516
import os
17+
import re
1618
import sys
1719
import time
1820
import tomllib
21+
import urllib.error
22+
import urllib.request
1923
from pathlib import Path
2024

2125
import pytest
@@ -27,10 +31,125 @@
2731
from path_verification import runner_journal_seek, runner_hits_for_box
2832

2933
DEFAULT_PROFILE = os.environ.get("BOXLITE_E2E_PROFILE", "p1")
30-
DEFAULT_IMAGE = os.environ.get("BOXLITE_E2E_IMAGE", "alpine:3.23")
3134
CRED_PATH = Path.home() / ".boxlite" / "credentials.toml"
3235

3336

37+
def _discover_supported_image() -> str:
38+
"""Resolve the box image at session start.
39+
40+
Precedence:
41+
1. `BOXLITE_E2E_IMAGE` env — explicit override (CI sets this, local
42+
devs can pin a known-good ref). Returned as-is, no validation.
43+
2. Probe — POST /boxes with an obviously-out-of-allowlist image
44+
against the active credential profile's API, parse the 400
45+
body's `"Supported images: a, b, c"` list, return the first.
46+
The first entry is the server's default (curated-images.constant
47+
`assertSupportedImage(undefined)` returns `supported[0]`),
48+
which is the safest pick across reboots / image-allowlist
49+
rotations.
50+
3. Fallback `alpine:3.23` — if probe fails (network down, auth
51+
broken, body shape changed). Tests downstream will still 400
52+
loudly so the regression is visible.
53+
54+
The discovered value is also written back to `os.environ
55+
['BOXLITE_E2E_IMAGE']` so the C / Go / Node SDK entry drivers'
56+
subprocess env inherits it without each test re-implementing the
57+
probe.
58+
"""
59+
explicit = os.environ.get("BOXLITE_E2E_IMAGE", "").strip()
60+
if explicit:
61+
return explicit
62+
if not CRED_PATH.exists():
63+
return "alpine:3.23"
64+
try:
65+
data = tomllib.loads(CRED_PATH.read_text())
66+
p = data.get("profiles", {}).get(DEFAULT_PROFILE)
67+
if not p:
68+
return "alpine:3.23"
69+
url = f"{p['url'].rstrip('/')}/v1/{p.get('path_prefix') or ''}/boxes".replace("//boxes", "/boxes")
70+
req = urllib.request.Request(
71+
url,
72+
method="POST",
73+
headers={
74+
"Authorization": f"Bearer {p['api_key']}",
75+
"Content-Type": "application/json",
76+
},
77+
# Send a deliberately-unsupported image so the API answers
78+
# with its full supportedImages list. cpus/memory are
79+
# required by the DTO but never reached — image validation
80+
# rejects first.
81+
data=json.dumps({
82+
"image": "__e2e_probe_not_in_allowlist__",
83+
"cpus": 1,
84+
"memory_mib": 256,
85+
}).encode(),
86+
)
87+
try:
88+
urllib.request.urlopen(req, timeout=10).read()
89+
except urllib.error.HTTPError as e:
90+
if e.code == 400:
91+
body = json.loads(e.read())
92+
# Message shape:
93+
# "Unsupported image 'X'. Supported images: a, b, c"
94+
m = re.search(
95+
r"Supported images:\s*(.+?)\s*$",
96+
body.get("message", ""),
97+
)
98+
if m:
99+
images = [s.strip() for s in m.group(1).split(",") if s.strip()]
100+
if images:
101+
return images[0]
102+
except Exception:
103+
pass
104+
return "alpine:3.23"
105+
106+
107+
DEFAULT_IMAGE = _discover_supported_image()
108+
# Pin the discovered value into the env so the C / Go / Node entry
109+
# drivers' subprocess inherit it without re-running the probe.
110+
os.environ["BOXLITE_E2E_IMAGE"] = DEFAULT_IMAGE
111+
112+
# test_path_verification.py is a LOCAL-only meta-test: case 1 asserts the
113+
# credentials.toml URL contains ":3000" (the local API port), and case 2
114+
# reads the host's `boxlite-runner` systemd journal via journalctl. Both
115+
# can't run on a remote profile pointing at the Tokyo ELB. Drop them
116+
# from pytest collection on any non-default profile so the cloud gate
117+
# reports them as "not collected" rather than producing a SKIP entry.
118+
if DEFAULT_PROFILE != "default":
119+
collect_ignore = ["test_path_verification.py"]
120+
121+
122+
def path_verify_skipped() -> bool:
123+
"""Single truthy reading of BOXLITE_E2E_SKIP_PATH_VERIFY for the SDK
124+
entry smokes (CLI / C / Go / Node). They each spawn a subprocess
125+
that creates a box and then assert `runner_hits_for_box >= 1`,
126+
which can't be satisfied on a cloud run where journalctl lives on
127+
a remote EC2. When this returns True the entry tests skip the
128+
journal-hits assertion; the box-id + driver-output assertions
129+
still run."""
130+
return os.environ.get("BOXLITE_E2E_SKIP_PATH_VERIFY", "").lower() in (
131+
"1", "true", "yes", "on"
132+
)
133+
134+
135+
def skip_or_fail_unless_sdk_build_required(reason: str) -> None:
136+
"""SDK entry-point fixtures (test_c_entry, test_go_entry,
137+
test_node_entry, test_cli_entry, test_cli_detach_recovery) skip
138+
when their build artifact is missing — convenient for the local
139+
dev path where someone hasn't built every SDK yet. On the cloud
140+
gate the workflow produces every artifact up front; set
141+
BOXLITE_E2E_REQUIRE_SDK_BUILDS=1 there so a regression in the
142+
build step surfaces as a test failure, not a silent skip."""
143+
require = os.environ.get("BOXLITE_E2E_REQUIRE_SDK_BUILDS", "")
144+
if require.lower() in ("1", "true", "yes", "on"):
145+
pytest.fail(
146+
f"BOXLITE_E2E_REQUIRE_SDK_BUILDS=1 forbids skipping this case "
147+
f"but the prerequisite is missing: {reason}"
148+
)
149+
pytest.skip(reason)
150+
151+
152+
34153
def _profile(name: str) -> dict:
35154
if not CRED_PATH.exists():
36155
pytest.exit(
@@ -105,7 +224,21 @@ async def verify_runner_saw_all_boxes(rt):
105224
journal — if not, the SDK silently bypassed the API → Runner
106225
chain (e.g. degraded to local FFI, or the runner-side journal
107226
write broke). Tests that don't create any boxes are unaffected.
227+
228+
Set ``BOXLITE_E2E_SKIP_PATH_VERIFY=1`` to bypass this check entirely.
229+
Intended for cloud-CI runs where the runner journal lives on a
230+
remote EC2 instance and isn't reachable from ``journalctl`` on the
231+
pytest host. The FFI-bypass risk this guard defends against doesn't
232+
apply on a stock GitHub-hosted runner (no KVM, libkrun can't start
233+
a VM), so disabling it there loses no real safety net.
108234
"""
235+
# Truthy values only. Plain `if os.environ.get(...)` treats "0"
236+
# and "false" as truthy because they're non-empty strings, which
237+
# is the opposite of what someone setting the var to "0" expects.
238+
if os.environ.get("BOXLITE_E2E_SKIP_PATH_VERIFY", "").lower() in ("1", "true", "yes", "on"):
239+
yield
240+
return
241+
109242
since = runner_journal_seek()
110243
object.__setattr__(rt, "_created", [])
111244

scripts/test/e2e/cases/test_c_entry.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,33 +18,43 @@
1818

1919
import pytest
2020

21+
from conftest import skip_or_fail_unless_sdk_build_required, path_verify_skipped
22+
2123
sys.path.insert(0, str(Path(__file__).parent.parent / "lib"))
2224
from path_verification import runner_journal_seek, runner_hits_for_box
2325

2426
REPO = Path(__file__).resolve().parents[4]
2527
SRC = REPO / "scripts/test/e2e/sdks/c/e2e_basic.c"
2628
HDR = REPO / "sdks/c/include"
2729
LIB_DIR = REPO / "target/release"
28-
UUID_RE = re.compile(
29-
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
30+
# Box ids are server-issued and opaque: the local runtime mints 12-char
31+
# Base62, but a REST server may return a ULID or UUID (see BoxID docs,
32+
# src/boxlite/src/runtime/id.rs).
33+
BOX_ID_RE = re.compile(
34+
r"\b("
35+
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" # UUID
36+
r"|[0-9A-HJKMNP-TV-Z]{26}" # ULID
37+
r"|[0-9A-Za-z]{12}" # 12-char Base62
38+
r")\b"
3039
)
3140

3241

3342
def _profile():
43+
name = os.environ.get("BOXLITE_E2E_PROFILE", "p1")
3444
return tomllib.loads(
3545
(Path.home() / ".boxlite/credentials.toml").read_text()
36-
)["profiles"]["p1"]
46+
)["profiles"][name]
3747

3848

3949
@pytest.fixture(scope="module")
4050
def c_binary():
4151
if not shutil.which("gcc"):
42-
pytest.skip("gcc not installed")
52+
skip_or_fail_unless_sdk_build_required("gcc not installed")
4353
if not SRC.exists():
44-
pytest.skip(f"{SRC} missing")
54+
skip_or_fail_unless_sdk_build_required(f"{SRC} missing")
4555
if not (LIB_DIR / "libboxlite.so").exists() and \
4656
not (LIB_DIR / "libboxlite.a").exists():
47-
pytest.skip(
57+
skip_or_fail_unless_sdk_build_required(
4858
f"libboxlite.so / .a missing under {LIB_DIR}; build with "
4959
f"`cargo build --release -p boxlite-c` first"
5060
)
@@ -60,7 +70,7 @@ def c_binary():
6070
try:
6171
subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=120)
6272
except subprocess.CalledProcessError as e:
63-
pytest.skip(f"gcc build failed: {e.stderr[:600]}")
73+
skip_or_fail_unless_sdk_build_required(f"gcc build failed: {e.stderr[:600]}")
6474
return bin_path
6575

6676

@@ -73,7 +83,7 @@ def test_c_sdk_create_remove(c_binary):
7383
"BOXLITE_E2E_URL": p["url"],
7484
"BOXLITE_E2E_API_KEY": p["api_key"],
7585
"BOXLITE_E2E_PREFIX": p.get("path_prefix") or "",
76-
"BOXLITE_E2E_IMAGE": "alpine:3.23",
86+
"BOXLITE_E2E_IMAGE": os.environ.get("BOXLITE_E2E_IMAGE", "alpine:3.23"),
7787
"LD_LIBRARY_PATH": str(LIB_DIR),
7888
}
7989
r = subprocess.run(
@@ -84,12 +94,13 @@ def test_c_sdk_create_remove(c_binary):
8494
f"C driver exit={r.returncode}\nstdout:\n{r.stdout}\nstderr:\n{r.stderr}"
8595
)
8696

87-
m = UUID_RE.search(r.stdout)
97+
m = BOX_ID_RE.search(r.stdout)
8898
assert m, f"C driver did not print BOX_ID: {r.stdout!r}"
8999
box_id = m.group(0)
90100
assert "OK" in r.stdout
91101

92-
hits = runner_hits_for_box(journal_since, box_id)
93-
assert hits >= 1, (
94-
f"runner journal did not see box {box_id} created by C SDK"
95-
)
102+
if not path_verify_skipped():
103+
hits = runner_hits_for_box(journal_since, box_id)
104+
assert hits >= 1, (
105+
f"runner journal did not see box {box_id} created by C SDK"
106+
)

scripts/test/e2e/cases/test_cli_detach_recovery.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030

3131
import pytest
3232

33+
from conftest import skip_or_fail_unless_sdk_build_required, path_verify_skipped
34+
3335
sys.path.insert(
3436
0,
3537
str(Path(__file__).resolve().parents[4] / "scripts" / "test" / "e2e" / "lib"),
@@ -38,15 +40,22 @@
3840

3941
BOXLITE_BIN = os.environ.get("BOXLITE_E2E_CLI", shutil.which("boxlite"))
4042
IMAGE = os.environ.get("BOXLITE_E2E_IMAGE", "alpine:3.23")
41-
UUID_RE = re.compile(
42-
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
43+
# Box ids are server-issued and opaque: the local runtime mints 12-char
44+
# Base62, but a REST server may return a ULID or UUID (see BoxID docs,
45+
# src/boxlite/src/runtime/id.rs).
46+
BOX_ID_RE = re.compile(
47+
r"\b("
48+
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" # UUID
49+
r"|[0-9A-HJKMNP-TV-Z]{26}" # ULID
50+
r"|[0-9A-Za-z]{12}" # 12-char Base62
51+
r")\b"
4352
)
4453

4554

4655
@pytest.fixture(scope="module")
4756
def cli():
4857
if not BOXLITE_BIN or not Path(BOXLITE_BIN).exists():
49-
pytest.skip(f"boxlite CLI not found at {BOXLITE_BIN!r}")
58+
skip_or_fail_unless_sdk_build_required(f"boxlite CLI not found at {BOXLITE_BIN!r}")
5059
return BOXLITE_BIN
5160

5261

@@ -80,7 +89,7 @@ def test_detached_box_survives_cli_exit_and_is_reusable(cli):
8089

8190
# 1) detach run in one CLI process
8291
r_run = run(cli, "run", "-d", IMAGE, "--", "sleep", "300", timeout=120)
83-
m = UUID_RE.search(r_run.stdout)
92+
m = BOX_ID_RE.search(r_run.stdout)
8493
assert m, f"`boxlite run -d` did not print a uuid: {r_run.stdout!r}"
8594
box_id = m.group(0)
8695

@@ -113,11 +122,12 @@ def test_detached_box_survives_cli_exit_and_is_reusable(cli):
113122
)
114123

115124
# 5) runner journal saw the box id (path-bypass guard)
116-
hits = runner_hits_for_box(journal_since, box_id)
117-
assert hits >= 1, (
118-
f"runner journal did not see detached box {box_id}; "
119-
f"`boxlite run -d` may have bypassed the API"
120-
)
125+
if not path_verify_skipped():
126+
hits = runner_hits_for_box(journal_since, box_id)
127+
assert hits >= 1, (
128+
f"runner journal did not see detached box {box_id}; "
129+
f"`boxlite run -d` may have bypassed the API"
130+
)
121131
finally:
122132
run(cli, "rm", "-f", box_id, check=False)
123133

@@ -127,7 +137,7 @@ def test_detached_box_exec_propagates_exit_code_on_fresh_cli(cli):
127137
still propagate when the exec is launched from a fresh CLI process
128138
(i.e. no in-memory SDK state to lean on)."""
129139
r_run = run(cli, "run", "-d", IMAGE, "--", "sleep", "300", timeout=120)
130-
m = UUID_RE.search(r_run.stdout)
140+
m = BOX_ID_RE.search(r_run.stdout)
131141
assert m
132142
box_id = m.group(0)
133143

0 commit comments

Comments
 (0)