Skip to content

Commit 021f58a

Browse files
committed
feat: implement ralph loop CI gating
Implement CI-gating spec compliance for Ralph loop with persisted ciStatus state, prompt gating context, CI fix tracking, lint warning aggregation, and CI blocked notifications.\n\nAlso add CI status summaries to GitHub evaluation comments, apply required tracking issue labels, add unit tests for CI-gating helpers, and update IMPLEMENTATION_PLAN.md with completed task notes and validation outcomes.\n\nCo-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 002e4f6 commit 021f58a

4 files changed

Lines changed: 449 additions & 11 deletions

File tree

IMPLEMENTATION_PLAN.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,3 +212,18 @@ This plan lists prioritized tasks required to bring the implementation into full
212212
- Added a shared helper to clamp evaluation timeout to a safe 180s–600s window, using loop timeout config as the source of truth.
213213
- Evaluation now retries once when the SDK reports a `session.idle` timeout, reducing transient fallback-score failures.
214214
- Validation run after this change: `typecheck`, `lint` (warnings only), `test`, and `npm audit --production` all pass; audit reports 0 vulnerabilities.
215+
216+
## 17. Ralph Loop CI Gating and Reporting Compliance
217+
- **Task:** Implement CI status persistence, prompt gating, and CI visibility/reporting in the Ralph loop. **[COMPLETE]**
218+
- **Spec:** CI-gating/spec.md (CI Status Tracking, CI Gating Logic, CI Fix Tracking, GitHub Reporting, Lint Warning Accumulation), Ralph-loop/spec.md (GitHub issue labels)
219+
- **Files:** ralph-loop.ts, src/ralph/ci-gating.ts (new), test/unit/ralph/ci-gating.test.ts (new)
220+
- **Tests:** test/unit/ralph/ci-gating.test.ts
221+
- **Dependencies:** Task 16
222+
- **Notes:**
223+
- Targets low-scoring checklist areas around spec compliance/code quality by implementing missing `ciStatus` state fields and CI gating behavior required by `ci-gating/spec.md`.
224+
- Added full CI check execution per iteration (`build`, `test`, `lint`), persisted result fields (`passed`, status breakdown, errors, timestamps), and CI-broken fix tracking (`ciBrokenSince`, `ciFixAttempts`, `ciLastFixAttempt`).
225+
- Added build-prompt CI context injection (`✅ pass`, `⚠️ lint warnings`, `❌ blocking failures`) so red CI explicitly blocks feature work and partial CI is highlighted.
226+
- Added CI status summaries to GitHub fitness comments and CI-blocked issue notifications (`🚨 CI BLOCKED at Iteration N`) with failure details.
227+
- Added lint warning aggregation (top rules/files) and threshold warning log when warnings exceed 20.
228+
- Tracking issue creation now includes required labels: `ralph-loop`, `automated`.
229+
- Validation run after this change: `npm run typecheck`, `npm run lint`, `npm test`, and `npm audit --production` all pass; audit reports 0 vulnerabilities.

ralph-loop.ts

Lines changed: 153 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,16 @@ import {
1212
isSessionIdleTimeoutError,
1313
resolveEvaluationTimeoutMs,
1414
} from "./src/ralph/evaluation.ts";
15+
import {
16+
deriveCiStatus,
17+
generateCiBlockedComment,
18+
generateCiCommentSummary,
19+
generateCiPromptContext,
20+
isCiBroken,
21+
normalizeCiStatus,
22+
type CiStatus,
23+
type CommandCheckResult,
24+
} from "./src/ralph/ci-gating.ts";
1525

1626
// --- Types ---
1727

@@ -59,6 +69,11 @@ interface RalphState {
5969
currentModel: string;
6070
trackingIssueNumber: number | null;
6171
evaluations: Evaluation[];
72+
ciStatus: CiStatus;
73+
ciBrokenSince: number | null;
74+
ciFixAttempts: number;
75+
ciLastFixAttempt: number | null;
76+
ciLastBlockedNotification: number | null;
6277
}
6378

6479
type Mode = "plan" | "build";
@@ -74,19 +89,54 @@ async function loadConfig(): Promise<RalphConfig> {
7489
return JSON.parse(raw) as RalphConfig;
7590
}
7691

77-
async function loadState(): Promise<RalphState> {
78-
if (existsSync(STATE_FILE)) {
79-
const raw = await readFile(STATE_FILE, "utf-8");
80-
return JSON.parse(raw) as RalphState;
81-
}
92+
function defaultState(): RalphState {
8293
return {
8394
currentIteration: 0,
8495
currentModel: "",
8596
trackingIssueNumber: null,
8697
evaluations: [],
98+
ciStatus: normalizeCiStatus(undefined),
99+
ciBrokenSince: null,
100+
ciFixAttempts: 0,
101+
ciLastFixAttempt: null,
102+
ciLastBlockedNotification: null,
87103
};
88104
}
89105

106+
async function loadState(): Promise<RalphState> {
107+
if (existsSync(STATE_FILE)) {
108+
const raw = await readFile(STATE_FILE, "utf-8");
109+
const parsed = JSON.parse(raw) as Partial<RalphState>;
110+
return {
111+
currentIteration:
112+
typeof parsed.currentIteration === "number" ? parsed.currentIteration : 0,
113+
currentModel:
114+
typeof parsed.currentModel === "string" ? parsed.currentModel : "",
115+
trackingIssueNumber:
116+
typeof parsed.trackingIssueNumber === "number"
117+
? parsed.trackingIssueNumber
118+
: null,
119+
evaluations: Array.isArray(parsed.evaluations)
120+
? (parsed.evaluations as Evaluation[])
121+
: [],
122+
ciStatus: normalizeCiStatus(parsed.ciStatus),
123+
ciBrokenSince:
124+
typeof parsed.ciBrokenSince === "number" ? parsed.ciBrokenSince : null,
125+
ciFixAttempts:
126+
typeof parsed.ciFixAttempts === "number" ? parsed.ciFixAttempts : 0,
127+
ciLastFixAttempt:
128+
typeof parsed.ciLastFixAttempt === "number"
129+
? parsed.ciLastFixAttempt
130+
: null,
131+
ciLastBlockedNotification:
132+
typeof parsed.ciLastBlockedNotification === "number"
133+
? parsed.ciLastBlockedNotification
134+
: null,
135+
};
136+
}
137+
return defaultState();
138+
}
139+
90140
async function saveState(state: RalphState): Promise<void> {
91141
await writeFile(STATE_FILE, JSON.stringify(state, null, 2));
92142
}
@@ -150,7 +200,7 @@ function selectModel(
150200

151201
// --- Fitness evaluation ---
152202

153-
function runCommand(cmd: string): { success: boolean; output: string } {
203+
function runCommand(cmd: string): CommandCheckResult {
154204
try {
155205
const output = execSync(cmd, {
156206
encoding: "utf-8",
@@ -167,6 +217,63 @@ function runCommand(cmd: string): { success: boolean; output: string } {
167217
}
168218
}
169219

220+
function runCiCheck(iteration: number, state: RalphState): void {
221+
const buildResult = runCommand("npm run build 2>&1");
222+
const testResult = runCommand("npm test 2>&1");
223+
const lintResult = runCommand("npm run lint 2>&1");
224+
const { status, lintSummary } = deriveCiStatus(
225+
buildResult,
226+
testResult,
227+
lintResult,
228+
);
229+
state.ciStatus = status;
230+
231+
if (status.lintStatus === "warnings") {
232+
log(
233+
`CI warnings: ${status.lintWarningCount ?? 0} warnings (build/test passing)`,
234+
"WARN",
235+
);
236+
if ((status.lintWarningCount ?? 0) > 20) {
237+
log(
238+
`[Lint Warning] Threshold exceeded: ${status.lintWarningCount} > 20`,
239+
"WARN",
240+
);
241+
}
242+
if (lintSummary.topRules.length > 0 || lintSummary.topFiles.length > 0) {
243+
const ruleSummary = lintSummary.topRules.join(", ");
244+
const fileSummary = lintSummary.topFiles.join(", ");
245+
log(
246+
`Lint warning details:\nTop rules: ${ruleSummary || "none"}\nTop files: ${fileSummary || "none"}`,
247+
"WARN",
248+
);
249+
}
250+
}
251+
252+
const wasBroken = state.ciBrokenSince !== null;
253+
const nowBroken = isCiBroken(status);
254+
255+
if (nowBroken) {
256+
if (state.ciBrokenSince === null) {
257+
state.ciBrokenSince = iteration;
258+
}
259+
state.ciFixAttempts += 1;
260+
state.ciLastFixAttempt = iteration;
261+
return;
262+
}
263+
264+
if (wasBroken && state.ciBrokenSince !== null) {
265+
const brokenIterations = iteration - state.ciBrokenSince;
266+
log(
267+
`[CI Recovery] Fixed after ${brokenIterations} iterations and ${state.ciFixAttempts} attempts`,
268+
"INFO",
269+
);
270+
state.ciBrokenSince = null;
271+
state.ciFixAttempts = 0;
272+
state.ciLastFixAttempt = iteration;
273+
state.ciLastBlockedNotification = null;
274+
}
275+
}
276+
170277
async function collectSpecFiles(): Promise<string> {
171278
const specs: string[] = [];
172279
// Scan all subdirectories under openspec/specs/ automatically
@@ -382,6 +489,7 @@ function generateCommentBody(
382489
iteration: number,
383490
model: string,
384491
scores: FitnessScores,
492+
ciStatus: CiStatus,
385493
): string {
386494
// Sort checklist ascending by score so regressions surface first
387495
const sortedChecklist = [...(scores.checklist ?? [])].sort(
@@ -413,6 +521,7 @@ function generateCommentBody(
413521
| **Aggregate** | **${scores.aggregate}/100** |
414522
415523
**Model**: ${model}
524+
**CI**: ${generateCiCommentSummary(ciStatus)}
416525
417526
${accordion}
418527
@@ -478,7 +587,8 @@ async function postToGitHub(
478587
if (!state.trackingIssueNumber) {
479588
const result = execSync(
480589
`gh issue create --repo "${config.trackingRepo}" ` +
481-
`--title "[Ralph Loop] Fitness Tracking"`,
590+
`--title "[Ralph Loop] Fitness Tracking" ` +
591+
`--label "ralph-loop" --label "automated"`,
482592
{ encoding: "utf-8", timeout: 30_000 },
483593
);
484594
const match = result.match(/\/issues\/(\d+)/);
@@ -490,7 +600,7 @@ async function postToGitHub(
490600

491601
if (state.trackingIssueNumber) {
492602
// Post per-evaluation comment (uses --body-file to preserve newlines)
493-
const comment = generateCommentBody(iteration, model, scores);
603+
const comment = generateCommentBody(iteration, model, scores, state.ciStatus);
494604
ghWithBodyFile(
495605
`gh issue comment ${state.trackingIssueNumber} --repo "${config.trackingRepo}"`,
496606
comment,
@@ -512,6 +622,33 @@ async function postToGitHub(
512622
}
513623
}
514624

625+
async function postCiBlockedNotification(
626+
state: RalphState,
627+
config: RalphConfig,
628+
iteration: number,
629+
): Promise<void> {
630+
if (
631+
!config.trackingRepo ||
632+
!state.trackingIssueNumber ||
633+
!isCiBroken(state.ciStatus) ||
634+
state.ciLastBlockedNotification === iteration
635+
) {
636+
return;
637+
}
638+
639+
try {
640+
const body = generateCiBlockedComment(iteration, state.ciStatus);
641+
ghWithBodyFile(
642+
`gh issue comment ${state.trackingIssueNumber} --repo "${config.trackingRepo}"`,
643+
body,
644+
true,
645+
);
646+
state.ciLastBlockedNotification = iteration;
647+
} catch (err) {
648+
log(`Failed to post CI blocked notification: ${err}`, "ERROR");
649+
}
650+
}
651+
515652
// --- Tool event formatting ---
516653

517654
/**
@@ -743,10 +880,11 @@ async function ralphLoop(mode: Mode, maxIterationsOverride?: number) {
743880
for (let i = startIteration; i <= endIteration; i++) {
744881
if (shuttingDown) break;
745882

883+
const ciContext = generateCiPromptContext(state.ciStatus);
746884
const improvementContext = generateImprovementContext(state.evaluations);
747-
const prompt = improvementContext
748-
? `${basePrompt}\n${improvementContext}`
749-
: basePrompt;
885+
const prompt = [basePrompt, ciContext, improvementContext]
886+
.filter((v) => v.trim() !== "")
887+
.join("\n");
750888

751889
const lastEval = state.evaluations[state.evaluations.length - 1];
752890
const scoreHint = lastEval
@@ -757,6 +895,9 @@ async function ralphLoop(mode: Mode, maxIterationsOverride?: number) {
757895
const worstItem = [...lastEval.scores.checklist].sort((a, b) => a.score - b.score)[0]!;
758896
log(`Target this iteration: [${worstItem.score}/100] ${worstItem.requirement}`, "ITER");
759897
}
898+
if (isCiBroken(state.ciStatus)) {
899+
await postCiBlockedNotification(state, config, i);
900+
}
760901

761902
const session = await client.createSession({
762903
model: state.currentModel,
@@ -802,6 +943,7 @@ async function ralphLoop(mode: Mode, maxIterationsOverride?: number) {
802943
log(`Iteration ${i} complete in ${elapsed}s | Tools used: ${toolSummary || "none"}`, "ITER");
803944

804945
state.currentIteration = i;
946+
runCiCheck(i, state);
805947

806948
// Fitness evaluation every N iterations
807949
if (i % config.evaluationInterval === 0) {

0 commit comments

Comments
 (0)