Fetch missing base SHA; ground summary on counts; preflight skip-install

itsderek23 · claude · itsderek23 · commit 1a2710766f0a · 2026-04-24T15:55:13.000-06:00
Three fixes found while running releasebot against PR paperclipai#4432: - ensureShaReachable now falls back to fetching the SHA directly from origin when it's not in pull/<n>/head. This covers the case where origin/master has advanced past the PR base since the last local fetch. - Summary prompt now receives a structured Facts block with exact per-side pass/fail counts and visual-review verdict counts, with an explicit instruction not to invert them. Fixes a hallucination where the summary claimed "Playwright failed on the after side" when the after side had zero failures. - --skip-install now preflights cli/node_modules/tsx/dist/cli.mjs in both worktrees and exits early with a clear message, instead of failing mid-boot with MODULE_NOT_FOUND. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/tools/releasebot/src/cli.ts b/tools/releasebot/src/cli.ts
@@ -84,6 +84,18 @@ async function main(): Promise<void> {
     await pnpmInstall(afterWt);
   } else {
     log("skipping pnpm install (--skip-install)");
+    for (const wt of [beforeWt, afterWt]) {
+      const tsxCli = path.join(wt, "cli/node_modules/tsx/dist/cli.mjs");
+      try {
+        await fs.stat(tsxCli);
+      } catch {
+        console.error(
+          `--skip-install passed but ${wt} is missing node_modules (checked ${path.relative(repoRoot, tsxCli)}).`,
+        );
+        console.error("Re-run without --skip-install to install dependencies in the fresh worktree.");
+        process.exit(2);
+      }
+    }
   }
 
   log("gathering source context around diff hunks...");
diff --git a/tools/releasebot/src/pr.ts b/tools/releasebot/src/pr.ts
@@ -40,7 +40,17 @@ export async function fetchPrDiff(prNumber: number): Promise<string> {
 export async function ensureShaReachable(sha: string, prNumber: number): Promise<void> {
   try {
     await execFile("git", ["cat-file", "-e", sha]);
+    return;
   } catch {
+    // not local yet
+  }
+  try {
     await execFile("git", ["fetch", "origin", `pull/${prNumber}/head`]);
+    await execFile("git", ["cat-file", "-e", sha]);
+    return;
+  } catch {
+    // pull/<n>/head covers the head commit and its ancestors, but not a base SHA
+    // on origin/<baseBranch> that has moved since the last fetch. Fall through.
   }
+  await execFile("git", ["fetch", "origin", sha]);
 }
diff --git a/tools/releasebot/src/review.ts b/tools/releasebot/src/review.ts
@@ -25,7 +25,9 @@ const SUMMARY_SYSTEM = `Write a <=400-character single-paragraph run summary for
 
 The visual review (per-step verdicts: pass/intentional_change/fail) is the source of truth for whether this PR ships cleanly. Anchor the summary on those verdicts.
 
-The Playwright assertion layer is a secondary signal. After-side assertion failures are worth mentioning only if the visual review also flagged the step as fail. Before-side assertion failures are EXPECTED when the test asserts on copy the PR introduced — do not flag them as regressions.
+The Playwright assertion layer is a secondary signal. The user message gives you exact pass/fail counts for each side as "Facts". Treat those numbers as ground truth — do NOT invert, embellish, or restate them inaccurately. If "after_fails" is 0, the after side did not fail. Before-side assertion failures are EXPECTED when the test asserts on copy the PR introduced — do not flag them as regressions.
+
+After-side assertion failures are worth mentioning only if the visual review also flagged the step as fail.
 
 Start with the headline outcome (how many steps passed / were intentional_change / failed per the visual review), then what the visual review actually observed, then any caveat worth a reviewer's attention. Natural user-facing language — no code identifiers, no bulleted lists. End on a complete sentence. Return plain text only.`;
 
@@ -145,13 +147,23 @@ async function summarize(
   before: SideResult,
   after: SideResult,
 ): Promise<string> {
+  const beforeFails = before.steps.filter((s) => s.status === "fail").length;
   const afterFails = after.steps.filter((s) => s.status === "fail").length;
+  const visualCounts = {
+    pass: steps.filter((s) => s.verdict === "pass").length,
+    intentional_change: steps.filter((s) => s.verdict === "intentional_change").length,
+    fail: steps.filter((s) => s.verdict === "fail").length,
+  };
   const content = [
     `PR #${pr.number}: ${pr.title}`,
     `Body: ${pr.body.slice(0, 600)}`,
-    `Step-level visual review (source of truth): ${JSON.stringify(steps)}`,
-    `Playwright assertion failures on after side (secondary signal): ${afterFails}/${after.steps.length}`,
-  ].join("\n\n");
+    "Facts (ground truth — quote these numbers, do not invert):",
+    `  total_steps: ${steps.length}`,
+    `  visual_review: ${visualCounts.pass} pass, ${visualCounts.intentional_change} intentional_change, ${visualCounts.fail} fail`,
+    `  before_fails: ${beforeFails}/${before.steps.length} (expected — PR introduces new copy/routes)`,
+    `  after_fails: ${afterFails}/${after.steps.length}`,
+    `Per-step visual review detail: ${JSON.stringify(steps)}`,
+  ].join("\n");
   const resp = await client.messages.create({
     model: MODEL,
     max_tokens: 400,