Skip to content

Commit 9ae024f

Browse files
committed
feat(fhevm-cli): cross-version compat check and post-boot health gate
Detect relayer v1/v2 API mismatch at validate time (fail fast instead of runtime 500s) and catch containers that crash shortly after boot by inspecting exit codes after a delay.
1 parent 26f5187 commit 9ae024f

File tree

3 files changed

+175
-4
lines changed

3 files changed

+175
-4
lines changed

test-suite/fhevm/src/cli.test.ts

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ import {
1212
} from "./artifacts";
1313
import { REPO_ROOT, STATE_DIR, TEST_GREP, composePath, resolveServiceOverrides } from "./layout";
1414
import { STEP_NAMES } from "./types";
15-
import { main, overrideWarnings, probeBootstrap, resolveUpgradePlan } from "./runtime";
16-
import { compatPolicyForState, requiresMultichainAclAddress } from "./compat";
15+
import { main, overrideWarnings, postBootHealthGate, probeBootstrap, resolveUpgradePlan } from "./runtime";
16+
import { compatPolicyForState, requiresMultichainAclAddress, validateBundleCompatibility } from "./compat";
1717
import { predictedCrsId, predictedKeyId } from "./utils";
1818
import { applyVersionEnvOverrides, createGitHubClient, resolveTarget } from "./versions";
1919
import {
@@ -1353,3 +1353,84 @@ describe("version resolution edge cases", () => {
13531353
expect(result).toBe(original);
13541354
});
13551355
});
1356+
1357+
describe("validateBundleCompatibility", () => {
1358+
const stateWithVersions = (relayer: string, testSuite: string) =>
1359+
stubState({ envOverrides: { RELAYER_VERSION: relayer, TEST_SUITE_VERSION: testSuite } });
1360+
1361+
test("detects relayer v1 vs test-suite v2 mismatch", () => {
1362+
const issues = validateBundleCompatibility(stateWithVersions("v0.9.0", "v0.11.0"));
1363+
expect(issues).toHaveLength(1);
1364+
expect(issues[0].code).toBe("relayer-v1-vs-test-suite-v2");
1365+
});
1366+
1367+
test("modern relayer is OK", () => {
1368+
expect(validateBundleCompatibility(stateWithVersions("v0.10.0", "v0.11.0"))).toEqual([]);
1369+
});
1370+
1371+
test("legacy test-suite is OK", () => {
1372+
expect(validateBundleCompatibility(stateWithVersions("v0.9.0", "v0.10.0"))).toEqual([]);
1373+
});
1374+
1375+
test("both modern is OK", () => {
1376+
expect(validateBundleCompatibility(stateWithVersions("v0.10.0", "v0.12.0"))).toEqual([]);
1377+
});
1378+
1379+
test("SHA relayer treated as modern", () => {
1380+
expect(validateBundleCompatibility(stateWithVersions("abc1234", "v0.11.0"))).toEqual([]);
1381+
});
1382+
1383+
test("SHA test-suite treated as modern triggers mismatch", () => {
1384+
const issues = validateBundleCompatibility(stateWithVersions("v0.9.0", "abc1234"));
1385+
expect(issues).toHaveLength(1);
1386+
expect(issues[0].code).toBe("relayer-v1-vs-test-suite-v2");
1387+
});
1388+
1389+
test("empty versions treated as modern", () => {
1390+
expect(validateBundleCompatibility(stateWithVersions("", ""))).toEqual([]);
1391+
});
1392+
1393+
test("boundary v0.10.0 relayer is OK", () => {
1394+
expect(validateBundleCompatibility(stateWithVersions("v0.10.0", "v0.11.0"))).toEqual([]);
1395+
});
1396+
});
1397+
1398+
describe("postBootHealthGate", () => {
1399+
const inspectResult = (status: string, exitCode: number) =>
1400+
JSON.stringify([{ Name: "test", State: { Status: status, ExitCode: exitCode }, NetworkSettings: { Networks: {} } }]);
1401+
1402+
test("resolves when all containers are running", async () => {
1403+
const runner = fakeRunner({
1404+
"docker inspect container-a": inspectResult("running", 0),
1405+
"docker inspect container-b": inspectResult("running", 0),
1406+
});
1407+
await postBootHealthGate({ runner }, ["container-a", "container-b"], 0);
1408+
});
1409+
1410+
test("throws when a container crashed", async () => {
1411+
const runner = fakeRunner({
1412+
"docker inspect container-a": inspectResult("running", 0),
1413+
"docker inspect container-b": inspectResult("exited", 1),
1414+
"docker logs --tail 30 container-b": "Error: missing API key",
1415+
});
1416+
await expect(
1417+
postBootHealthGate({ runner }, ["container-a", "container-b"], 0),
1418+
).rejects.toThrow(/container-b.*exit 1/);
1419+
});
1420+
1421+
test("throws when container not found", async () => {
1422+
const runner = fakeRunner({
1423+
"docker inspect container-a": { stdout: "", stderr: "", code: 1 },
1424+
});
1425+
await expect(
1426+
postBootHealthGate({ runner }, ["container-a"], 0),
1427+
).rejects.toThrow(/container not found/);
1428+
});
1429+
1430+
test("ignores containers that exited with code 0 (migrations)", async () => {
1431+
const runner = fakeRunner({
1432+
"docker inspect container-a": inspectResult("exited", 0),
1433+
});
1434+
await postBootHealthGate({ runner }, ["container-a"], 0);
1435+
});
1436+
});

test-suite/fhevm/src/compat.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,33 @@ export const requiresLegacyRelayerReadinessConfig = (state: Pick<State, "version
108108
export const requiresLegacyRelayerUrl = (state: Pick<State, "versions">) =>
109109
versionLt(state.versions.env.TEST_SUITE_VERSION ?? "", [0, 11, 0]);
110110

111+
export type BundleIncompatibility = { severity: "error"; code: string; message: string };
112+
113+
/**
114+
* Detect cross-component version incompatibilities that would cause runtime failures.
115+
* Returns an empty array when the bundle is consistent.
116+
*/
117+
export const validateBundleCompatibility = (
118+
state: Pick<State, "versions">,
119+
): BundleIncompatibility[] => {
120+
const issues: BundleIncompatibility[] = [];
121+
const relayerVersion = state.versions.env.RELAYER_VERSION ?? "";
122+
const testSuiteVersion = state.versions.env.TEST_SUITE_VERSION ?? "";
123+
if (
124+
versionLt(relayerVersion, [0, 10, 0]) &&
125+
!versionLt(testSuiteVersion, [0, 11, 0])
126+
) {
127+
issues.push({
128+
severity: "error",
129+
code: "relayer-v1-vs-test-suite-v2",
130+
message:
131+
`RELAYER_VERSION ${relayerVersion} only serves /v1 API, but TEST_SUITE_VERSION ${testSuiteVersion} expects /v2. ` +
132+
`Upgrade the relayer to >= v0.10.0 or pin TEST_SUITE_VERSION below v0.11.0.`,
133+
});
134+
}
135+
return issues;
136+
};
137+
111138
export const compatPolicyForState = (state: State): CompatPolicy => {
112139
const policy: CompatPolicy = { coprocessorArgs: {}, connectorEnv: {} };
113140
for (const rule of COMPAT_RULES.coprocessor) {

test-suite/fhevm/src/runtime.ts

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { requiresMultichainAclAddress } from "./compat";
1+
import { requiresMultichainAclAddress, validateBundleCompatibility } from "./compat";
22
import path from "node:path";
33
import { parseArgs } from "node:util";
44

@@ -117,6 +117,59 @@ const dockerInspect = async (runner: Runner, name: string) => {
117117
}>;
118118
};
119119

120+
const POST_BOOT_HEALTH_GATE_DELAY_MS = 5_000;
121+
122+
const KMS_CONNECTOR_HEALTH_CONTAINERS = [
123+
"kms-connector-gw-listener",
124+
"kms-connector-kms-worker",
125+
"kms-connector-tx-sender",
126+
];
127+
128+
const coprocessorHealthContainers = (state: Pick<State, "topology">): string[] => {
129+
const suffixes = GROUP_SERVICE_SUFFIXES["coprocessor"].filter((s) => !s.includes("migration"));
130+
const names: string[] = [];
131+
for (let index = 0; index < state.topology.count; index += 1) {
132+
for (const suffix of suffixes) {
133+
names.push(toServiceName(suffix, index));
134+
}
135+
}
136+
return names;
137+
};
138+
139+
export const postBootHealthGate = async (
140+
deps: Pick<RuntimeDeps, "runner">,
141+
containerNames: string[],
142+
delayMs = POST_BOOT_HEALTH_GATE_DELAY_MS,
143+
) => {
144+
if (delayMs > 0) await sleep(delayMs);
145+
const crashed: { name: string; exitCode: number; logs: string }[] = [];
146+
for (const name of containerNames) {
147+
const [inspect] = await dockerInspect(deps.runner, name);
148+
if (!inspect) {
149+
crashed.push({ name, exitCode: -1, logs: "(container not found)" });
150+
continue;
151+
}
152+
if (inspect.State.Status === "exited" && inspect.State.ExitCode !== 0) {
153+
const result = await deps.runner(["docker", "logs", "--tail", "30", name], {
154+
allowFailure: true,
155+
});
156+
crashed.push({
157+
name,
158+
exitCode: inspect.State.ExitCode,
159+
logs: (result.stdout + result.stderr).trim(),
160+
});
161+
}
162+
}
163+
if (crashed.length) {
164+
const details = crashed
165+
.map((c) => ` ${c.name} (exit ${c.exitCode}):\n ${c.logs.split("\n").join("\n ")}`)
166+
.join("\n");
167+
throw new Error(
168+
`Post-boot health gate: ${crashed.length} container(s) crashed shortly after starting:\n${details}`,
169+
);
170+
}
171+
};
172+
120173
const loadState = async () => (await exists(STATE_FILE) ? readJson<State>(STATE_FILE) : undefined);
121174
const saveState = async (state: State) => writeJson(STATE_FILE, state);
122175

@@ -983,16 +1036,26 @@ const runStep = async (state: State, step: StepName, deps: RuntimeDeps) => {
9831036
case "regenerate":
9841037
await regen(state, deps);
9851038
break;
986-
case "validate":
1039+
case "validate": {
9871040
validateDiscovery(state);
1041+
const incompatibilities = validateBundleCompatibility(state);
1042+
if (incompatibilities.length) {
1043+
throw new Error(
1044+
`Bundle version incompatibilities detected:\n` +
1045+
incompatibilities.map((i) => ` - ${i.message}`).join("\n"),
1046+
);
1047+
}
9881048
break;
1049+
}
9891050
case "coprocessor":
9901051
await composeUp("coprocessor", state, deps, saveState, log, serviceNameList(state, "coprocessor"));
9911052
await waitForCoprocessor(state, deps);
1053+
await postBootHealthGate(deps, coprocessorHealthContainers(state));
9921054
break;
9931055
case "kms-connector":
9941056
await composeUp("kms-connector", state, deps, saveState, log);
9951057
await waitForKmsConnector(deps);
1058+
await postBootHealthGate(deps, KMS_CONNECTOR_HEALTH_CONTAINERS);
9961059
break;
9971060
case "bootstrap":
9981061
await composeUp("gateway-sc", state, deps, saveState, log, ["gateway-sc-add-network"], { noDeps: true });

0 commit comments

Comments
 (0)